add ruff as part of pre-commit config, apply/resolve lint errors ruff check . --fix produced

drickett · drickett · commit 7b29d10b97b1 · 2025-03-05T11:21:43.000-08:00
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -9,7 +9,12 @@ repos:
       - id: end-of-file-fixer
       - id: name-tests-test
       - id: trailing-whitespace
-  - repo: https://github.com/psf/black
-    rev: 23.1.0
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.9.9
     hooks:
-      - id: black
+      # Run the linter.
+      - id: ruff
+        args: [ --fix ]
+      # Run the formatter.
+      - id: ruff-format
diff --git a/pyproject.toml b/pyproject.toml
@@ -44,6 +44,7 @@ dev = [
   "pytest-mock>=3.14.0",
   "pytest-xdist>=3.6.1",
   "ruff>=0.9.9",
+  "types-pytz>=2025.1.0.20250204",
 ]
 
 [project.urls]
diff --git a/src/easyhla/__init__.py b/src/easyhla/__init__.py
@@ -1,2 +1,3 @@
-from .easyhla import EasyHLA, HLAType
-from .entrypoint import run
+from .easyhla import EasyHLA as EasyHLA
+from .easyhla import HLAType as HLAType
+from .entrypoint import run as run
diff --git a/src/easyhla/easyhla.py b/src/easyhla/easyhla.py
@@ -1,14 +1,15 @@
+# ruff: noqa: C901
+# TODO: Remove this noqa line and refactor/reduce code-complexity
+
 import logging
 import os
 import re
 from datetime import datetime
 from enum import Enum
-from pathlib import Path
 from typing import Any, Dict, Final, List, Literal, Optional, Tuple
 
 import Bio.SeqIO
 import numpy as np
-import typer
 
 from .models import (
     Alleles,
@@ -342,7 +343,7 @@ def combine_stds(
         seq: List[int],
         max_mismatch_threshold: Optional[int],
     ) -> Dict[int, List[HLACombinedStandardResult]]:
-        length = len(matching_stds[0].sequence)
+        # length = len(matching_stds[0].sequence)
 
         default_min = 9999
         if max_mismatch_threshold is None:
@@ -376,9 +377,9 @@ def combine_stds(
                         computed_minimum_mismatches = max(
                             mismatches, tmp_max_mismatch_threshold
                         )
-                    if not mismatches in combos:
+                    if mismatches not in combos:
                         combos[mismatches] = {}
-                    if not combined_std_name in combos[mismatches]:
+                    if combined_std_name not in combos[mismatches]:
                         combos[mismatches][combined_std_name] = []
                     stds = [std_a.allele, std_b.allele]
                     stds.sort()
@@ -423,11 +424,11 @@ def load_hla_frequencies(self, letter: HLA_TYPES) -> Dict[str, int]:
         with open(filepath, "r", encoding="utf-8") as f:
             for line in f.readlines():
                 column_id = EasyHLA.COLUMN_IDS[letter]
-                l = line.strip().split(",")[column_id : column_id + 2]
-                _l = ",".join([f"{a[:2]}|{a[-2:]}" for a in l])
-                if hla_freqs.get(_l, None) is None:
-                    hla_freqs[_l] = 0
-                hla_freqs[_l] += 1
+                line_array = line.strip().split(",")[column_id : column_id + 2]
+                elements = ",".join([f"{a[:2]}|{a[-2:]}" for a in line_array])
+                if hla_freqs.get(elements, None) is None:
+                    hla_freqs[elements] = 0
+                hla_freqs[elements] += 1
         return hla_freqs
 
     # TODO: Convert this to a dictionary instead of a object that looks like:
@@ -449,9 +450,9 @@ def load_hla_stds(self, letter: HLA_TYPES) -> List[HLAStandard]:
 
         with open(filepath, "r", encoding="utf-8") as f:
             for line in f.readlines():
-                l = line.strip().split(",")
-                seq = self.nuc2bin((l[1] + l[2]))
-                hla_stds.append(HLAStandard(allele=l[0], sequence=seq))
+                line_array = line.strip().split(",")
+                seq = self.nuc2bin((line_array[1] + line_array[2]))
+                hla_stds.append(HLAStandard(allele=line_array[0], sequence=seq))
         return hla_stds
 
     def load_allele_definitions_last_modified_time(self) -> datetime:
@@ -497,7 +498,7 @@ def interpret(
                 return None
             if not self.check_bases(str(entry.seq), samp):
                 return None
-        except ValueError as e:
+        except ValueError:
             return None
 
         is_exon = False
@@ -691,7 +692,7 @@ def run(
             f"{npats} patients, {nseqs} sequences processed.", to_stdout=to_stdout
         )
 
-        self.log.info(f"% patients, % sequences processed.", npats, nseqs)
+        self.log.info("% patients, % sequences processed.", npats, nseqs)
 
         with open(output_filename, "w", encoding="utf-8") as f:
             f.write(
@@ -814,9 +815,9 @@ def get_mismatches(
             _seq = np.array([int(nuc) for nuc in hla_csr.standard.split("-")])
             # TODO: replace with https://stackoverflow.com/questions/16094563/numpy-get-index-where-value-is-true
             for idx in np.flatnonzero(_seq ^ seq):
-                if not idx in correct_bases_at_pos:
+                if idx not in correct_bases_at_pos:
                     correct_bases_at_pos[idx] = []
-                if not _seq[idx] in correct_bases_at_pos[idx]:
+                if _seq[idx] not in correct_bases_at_pos[idx]:
                     correct_bases_at_pos[idx].append(_seq[idx])
 
         mislist: List[str] = []
diff --git a/src/easyhla/entrypoint.py b/src/easyhla/entrypoint.py
@@ -28,8 +28,6 @@ def main(
         "--print",
         "-p",
         help="Print to stdout as sequences are interpretted",
-        flag_value=True,
-        is_flag=True,
     ),
     sequence_file: Path = typer.Argument(
         ...,
diff --git a/src/easyhla/models.py b/src/easyhla/models.py
@@ -1,5 +1,5 @@
 import re
-from typing import Dict, List, Optional, Set, Tuple
+from typing import Dict, List, Set, Tuple
 
 import numpy as np
 import pydantic_numpy.typing as pnd
@@ -28,7 +28,7 @@ def is_homozygous(self) -> bool:
         :return: ...
         :rtype: bool
         """
-        return any([_a[0] == _a[1] for _a in self.alleles])
+        return any(_a[0] == _a[1] for _a in self.alleles)
 
     def is_ambiguous(self) -> bool:
         """
@@ -108,7 +108,7 @@ def stringify_clean(self) -> str:
         clean_allele: List[str] = []
         for n in [0, 1]:
             for i in [4, 3, 2, 1]:
-                if len(set([":".join(a[n][0:i]) for a in self.get_collection()])) == 1:
+                if len({":".join(a[n][0:i]) for a in self.get_collection()}) == 1:
                     clean_allele.append(
                         re.sub(
                             r"[A-Z]$", "", ":".join(self.get_collection()[0][n][0:i])
@@ -151,7 +151,7 @@ class HLAStandard(NumpyModel):
     sequence: pnd.NpNDArray
 
     def __eq__(self, other):
-        if type(self) != type(other):
+        if not isinstance(other, self.__class__):
             raise TypeError(f"Cannot compare against {type(other)}")
         return all(
             [self.allele == other.allele, np.array_equal(self.sequence, other.sequence)]
@@ -162,7 +162,7 @@ class HLAStandardMatch(HLAStandard):
     mismatch: int
 
     def __eq__(self, other):
-        if type(other) != type(self):
+        if not isinstance(other, self.__class__):
             raise TypeError(f"Cannot compare against {type(other)}")
         return all(
             [
@@ -205,7 +205,7 @@ def get_result(self) -> List[str]:
         ]
 
     def get_result_as_str(self) -> str:
-        return ",".join([el for el in self.get_result()])
+        return ",".join(el for el in self.get_result())
 
 
 class HLAResult(BaseModel):
diff --git a/tests/easyhla_test.py b/tests/easyhla_test.py
@@ -1,9 +1,7 @@
-import json
 import os
-from contextlib import nullcontext as does_not_raise
 from datetime import datetime
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import List, Tuple
 
 import numpy as np
 import pytest
@@ -17,7 +15,7 @@
     HLAStandardMatch,
 )
 
-from .conftest import compare_ref_vs_test, make_comparison
+from .conftest import compare_ref_vs_test
 
 
 @pytest.fixture(scope="module")
@@ -71,14 +69,14 @@ def test_unknown_hla_type(self):
         Assert we raise a value error if we put in an unknown HLA type.
         """
         with pytest.raises(ValueError):
-            easyhla = EasyHLA("D")
+            _ = EasyHLA("D")
 
     def test_known_hla_type_lowercase(self):
         """
         Assert we raise a value error if we put in an HLA type with wrong case.
         """
         with pytest.raises(ValueError):
-            easyhla = EasyHLA("a")
+            _ = EasyHLA("a")
 
     @pytest.mark.parametrize("easyhla", ["A"], indirect=True)
     def test_load_allele_definitions_last_modified_time(
@@ -1167,10 +1165,10 @@ def test_run(self, easyhla: EasyHLA):
 
         print(f"Test ended at {end_compare_time.isoformat()}")
 
-        print(f"Time elapsed: {(end_compare_time-start_time).total_seconds()}")
+        print(f"Time elapsed: {(end_compare_time - start_time).total_seconds()}")
         print(
-            f"Time elapsed for interpretation: {(end_time-start_time).total_seconds()}"
+            f"Time elapsed for interpretation: {(end_time - start_time).total_seconds()}"
         )
         print(
-            f"Time elapsed for output comparison: {(end_compare_time-end_time).total_seconds()}"
+            f"Time elapsed for output comparison: {(end_compare_time - end_time).total_seconds()}"
         )
diff --git a/tests/models_test.py b/tests/models_test.py
@@ -1,13 +1,9 @@
-from typing import Dict, List, Set, Tuple
+from typing import List, Set, Tuple
 
 import pytest
 
-from easyhla.easyhla import DATE_FORMAT, EasyHLA
 from easyhla.models import (
     Alleles,
-    HLACombinedStandardResult,
-    HLAStandard,
-    HLAStandardMatch,
 )
 
 
diff --git a/tools/check_date_modified.py b/tools/check_date_modified.py
@@ -17,10 +17,8 @@
 file.
 """
 
-
 import os
 from datetime import datetime
-from math import ceil
 from typing import List, Tuple
 
 import typer
@@ -133,8 +131,8 @@ def _check_dates() -> bool:
 def check_dates():
     if not _check_dates():
         warning_msg = "WARNING: The last modified file date has changed!"
-        preamble_height = 4
-        len_msg = ceil(len(warning_msg) / preamble_height)
+        # preamble_height = 4
+        # len_msg = ceil(len(warning_msg) / preamble_height)
         print("#" * len(warning_msg))
         print("#" * len(warning_msg))
         # if you want a silly message on an angle, uncomment this.
diff --git a/tools/data_parse.py b/tools/data_parse.py
@@ -47,21 +47,21 @@
     chance = Random().random()
     if chance > perc_pure_exon and row[1]["INTRON"] != "":
         if HLA_TYPE == "A":
-            sample_input_seqs[
-                row[1][ENUM_IDENTIFIER[HLA_TYPE]]
-            ] = f"{row[1]['EXON2']}{row[1]['INTRON']}{row[1]['EXON3']}"
+            sample_input_seqs[row[1][ENUM_IDENTIFIER[HLA_TYPE]]] = (
+                f"{row[1]['EXON2']}{row[1]['INTRON']}{row[1]['EXON3']}"
+            )
         else:
-            sample_input_seqs[
-                row[1][ENUM_IDENTIFIER[HLA_TYPE]]
-            ] = f"{row[1]['EXON2']}{row[1]['EXON3']}"
+            sample_input_seqs[row[1][ENUM_IDENTIFIER[HLA_TYPE]]] = (
+                f"{row[1]['EXON2']}{row[1]['EXON3']}"
+            )
     else:
         pure_exon_samples.append(row[1][ENUM_IDENTIFIER[HLA_TYPE]])
-        sample_input_seqs[
-            row[1][ENUM_IDENTIFIER[HLA_TYPE]] + "_exon2"
-        ] = f"{row[1]['EXON2']}"
-        sample_input_seqs[
-            row[1][ENUM_IDENTIFIER[HLA_TYPE]] + "_exon3"
-        ] = f"{row[1]['EXON3']}"
+        sample_input_seqs[row[1][ENUM_IDENTIFIER[HLA_TYPE]] + "_exon2"] = (
+            f"{row[1]['EXON2']}"
+        )
+        sample_input_seqs[row[1][ENUM_IDENTIFIER[HLA_TYPE]] + "_exon3"] = (
+            f"{row[1]['EXON3']}"
+        )
 
 with open(
     os.path.join(
@@ -98,7 +98,7 @@
                 row[1][ENUM_IDENTIFIER[HLA_TYPE]] in pure_exon_samples
                 and col == "INTRON"
             ):
-                f.write(f",")
+                f.write(",")
             elif col == output_columns[-1]:
                 f.write(f"{row[1][col]}")
             else:
diff --git a/tools/manual_comparison.py b/tools/manual_comparison.py
@@ -1,15 +1,17 @@
+# ruff: noqa: C901
+
 """
 Perform a manual comparison between two output files.
 
 For help use `python tests/manual_comparison.py --help`.
 """
 
-import os
+from itertools import zip_longest
 
 import typer
 
 from src.easyhla.easyhla import EasyHLA
-from tests.conftest import compare_ref_vs_test, make_comparison
+from tests.conftest import make_comparison
 
 
 def main(
@@ -30,22 +32,16 @@ def main(
         False,
         "--skip-preamble",
         "-s",
-        is_flag=True,
-        flag_value=True,
         help="If both files begin with a timestamp, skip that line",
     ),
     skip_preamble_ref: bool = typer.Option(
         False,
         "--skip-preamble-ref",
-        is_flag=True,
-        flag_value=True,
         help="If reference file begins with a timestamp, skip that line",
     ),
     skip_preamble_out: bool = typer.Option(
         False,
         "--skip-preamble-output",
-        is_flag=True,
-        flag_value=True,
         help="If output file begins with a timestamp, skip that line",
     ),
 ) -> None:
@@ -68,11 +64,13 @@ def main(
     if len(column_names) <= 1:
         raise RuntimeError("No column names detected, you may need to specify -s!")
 
-    assert len(reference_file) == len(
-        test_output_file
-    ), "Size of test output does not match reference file!"
+    assert len(reference_file) == len(test_output_file), (
+        "Size of test output does not match reference file!"
+    )
 
-    for row_num, (ref, test) in enumerate(zip(reference_file, test_output_file)):
+    for row_num, (ref, test) in enumerate(
+        zip_longest(reference_file, test_output_file)
+    ):
         for col_num, (_ref, _test) in enumerate(
             zip(ref.strip().split(","), test.strip().split(","))
         ):
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -44,6 +44,7 @@ dev = [`
`44`	`44`	`"pytest-mock>=3.14.0",`
`45`	`45`	`"pytest-xdist>=3.6.1",`
`46`	`46`	`"ruff>=0.9.9",`
	`47`	`+ "types-pytz>=2025.1.0.20250204",`
`47`	`48`	`]`
`48`	`49`
`49`	`50`	`[project.urls]`