cfe-lab
diff --git a/‎.devcontainer/Dockerfile‎
Lines changed: 0 additions & 23 deletions b/‎.devcontainer/Dockerfile‎
Lines changed: 0 additions & 23 deletions
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 12 additions & 12 deletions b/‎.github/workflows/test.yml‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎.yamllint.yml‎
Lines changed: 1 addition & 0 deletions b/‎.yamllint.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions b/‎pyproject.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/easyhla/bblab.py‎
Lines changed: 17 additions & 13 deletions b/‎src/easyhla/bblab.py‎
Lines changed: 17 additions & 13 deletions
diff --git a/‎src/easyhla/clinical_hla.py‎
Lines changed: 39 additions & 52 deletions b/‎src/easyhla/clinical_hla.py‎
Lines changed: 39 additions & 52 deletions
diff --git a/‎src/easyhla/clinical_hla_lib.py‎
Lines changed: 4 additions & 4 deletions b/‎src/easyhla/clinical_hla_lib.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/easyhla/interpret_from_json.py‎
Lines changed: 1 addition & 4 deletions b/‎src/easyhla/interpret_from_json.py‎
Lines changed: 1 addition & 4 deletions
diff --git a/‎src/easyhla/models.py‎
Lines changed: 5 additions & 3 deletions b/‎src/easyhla/models.py‎
Lines changed: 5 additions & 3 deletions
@@ -47,15 +47,15 @@ jobs:
       - name: Run tests
         run: uv run pytest --junitxml=pytest.xml
 
-      # TODO: Look into github actions, these are out of date
-      # - name: Upload coverage data
-      #   uses: actions/upload-artifact@v3
-      #   with:
-      #     name: coverage-data
-      #     path: coverage.xml
-
-      # - name: Publish Test Report
-      #   uses: mikepenz/action-junit-report@v3
-      #   if: success() || failure()
-      #   with:
-      #     report_paths: unit_test.xml
+# TODO: Look into github actions, these are out of date
+# - name: Upload coverage data
+#   uses: actions/upload-artifact@v3
+#   with:
+#     name: coverage-data
+#     path: coverage.xml
+
+# - name: Publish Test Report
+#   uses: mikepenz/action-junit-report@v3
+#   if: success() || failure()
+#   with:
+#     report_paths: unit_test.xml
@@ -1,6 +1,7 @@
 ignore:
   - .git/*
   - .venv/*
+  - src/easyhla/default_data/hla_standards.yaml
 
 extends: default
 
 
@@ -156,3 +156,4 @@ match = "src/**/*.py"
 [tool.mypy]
 plugins = ["numpy.typing.mypy_plugin"]
 ignore_missing_imports = true
+exclude = ["scripts/"]
@@ -5,17 +5,19 @@
 from pathlib import Path
 from typing import Any, Optional
 
-import Bio
 import typer
+from Bio.Seq import Seq
+from Bio.SeqIO import parse
 
 from .bblab_lib import (
     EXON_AND_OTHER_EXON,
     HLAInterpretationRow,
     HLAMismatchRow,
     pair_exons,
 )
-from .easyhla import DATE_FORMAT, EXON_NAME, EasyHLA
+from .easyhla import DATE_FORMAT, EasyHLA
 from .models import HLAInterpretation, HLASequence
+from .utils import EXON_NAME
 
 logger = logging.Logger(__name__, logging.ERROR)
 
@@ -49,21 +51,21 @@ def log_and_print(
 
 
 def report_unmatched_sequences(
-    unmatched: dict[EXON_NAME, dict[str, Bio.SeqIO.SeqRecord]],
+    unmatched: dict[EXON_NAME, dict[str, Seq]],
     to_stdout: bool = False,
 ) -> None:
     """
     Report exon sequences that did not have a matching exon.
 
     :param unmatched: unmatched exon sequences, grouped by which exon they represent
-    :type unmatched: dict[EXON_NAME, dict[str, Bio.SeqIO.SeqRecord]]
+    :type unmatched: dict[EXON_NAME, dict[str, Seq]]
     :param to_stdout: ..., defaults to None
     :type to_stdout: Optional[bool], optional
     """
     for exon, other_exon in EXON_AND_OTHER_EXON:
-        for entry in unmatched[exon]:
+        for sequence_id in unmatched[exon].keys():
             log_and_print(
-                f"No matching {other_exon} for {entry.description}",
+                f"No matching {other_exon} for {sequence_id}",
                 to_stdout=to_stdout,
             )
 
@@ -79,6 +81,8 @@ def process_from_file_to_files(
 ):
     if threshold and threshold < 0:
         raise RuntimeError("Threshold must be >=0 or None!")
+    elif threshold is None:
+        threshold = 0
 
     rows: list[HLAInterpretationRow] = []
     mismatch_rows: list[HLAMismatchRow] = []
@@ -93,13 +97,13 @@ def process_from_file_to_files(
     )
 
     matched_sequences: list[HLASequence]
-    unmatched: dict[EXON_NAME, dict[str, Bio.SeqIO.SeqRecord]]
+    unmatched: dict[EXON_NAME, dict[str, Seq]]
 
     with open(filename, "r", encoding="utf-8") as f:
         matched_sequences, unmatched = pair_exons(
-            Bio.SeqIO.parse(f, "fasta"),
+            parse(f, "fasta"),
             locus.value,
-            list(hla_alg.standards.values())[0],
+            list(hla_alg.hla_standards[locus.value].values())[0],
         )
 
     for hla_sequence in matched_sequences:
@@ -133,10 +137,10 @@ def process_from_file_to_files(
         row: HLAInterpretationRow = HLAInterpretationRow.summary_row(result)
         rows.append(row)
 
-        mismatch_rows.extend(result.mismatch_rows())
+        mismatch_rows.extend(HLAMismatchRow.mismatch_rows(result))
 
         npats += 1
-        nseqs += hla_sequence.num_seqs
+        nseqs += hla_sequence.num_sequences_used
 
     report_unmatched_sequences(unmatched, to_stdout=to_stdout)
 
@@ -171,11 +175,11 @@ def process_from_file_to_files(
             ),
         )
         mismatch_csv.writeheader()
-        mismatch_csv.writerows([dict[row] for row in mismatch_rows])
+        mismatch_csv.writerows([dict(row) for row in mismatch_rows])
 
     log_and_print(
         f"{npats} patients, {nseqs} sequences processed.",
-        log_level=logger.INFO,
+        log_level=logging.INFO,
         to_stdout=to_stdout,
     )
 
 
@@ -6,7 +6,7 @@
 import logging
 import os
 from datetime import datetime
-from typing import Final, Optional, TypedDict
+from typing import Final, Literal, Optional, TypedDict, cast
 
 from sqlalchemy import create_engine, event
 from sqlalchemy.engine import Engine
@@ -36,38 +36,15 @@
 )
 
 # Database connection parameters:
-HLA_DB_USER: Final[str] = os.environ.get("HLA_DB_USER")
-HLA_DB_PASSWORD: Final[str] = os.environ.get("HLA_DB_PASSWORD")
+HLA_DB_USER: Final[Optional[str]] = os.environ.get("HLA_DB_USER")
+HLA_DB_PASSWORD: Final[Optional[str]] = os.environ.get("HLA_DB_PASSWORD")
 HLA_DB_HOST: Final[str] = os.environ.get("HLA_DB_HOST", "192.168.67.7")
-HLA_DB_PORT: Final[int] = os.environ.get("HLA_DB_PORT", 1521)
+HLA_DB_PORT: Final[int] = int(os.environ.get("HLA_DB_PORT", 1521))
 HLA_DB_SERVICE_NAME: Final[str] = os.environ.get("HLA_DB_SERVICE_NAME", "cfe")
 
-HLA_ORACLE_LIB_PATH: Final[str] = os.environ.get("HLA_ORACLE_LIB_PATH")
-
-# These are the "configuration files" that the algorithm uses; these are or may
-# be updated, in which case you specify the path to the new version in the
-# environment.
-HLA_STANDARDS: Final[str] = os.environ.get("HLA_STANDARDS")
-HLA_FREQUENCIES: Final[str] = os.environ.get("HLA_FREQUENCIES")
-
-
-def prepare_interpretation_for_serialization(
-    interpretation: HLAInterpretation,
-    locus: HLA_LOCUS,
-    processing_datetime: datetime,
-) -> HLASequenceA | HLASequenceB | HLASequenceC:
-    """
-    Prepare an HLA interpretation for output.
-    """
-    if locus == "A":
-        return HLASequenceA.build_from_interpretation(
-            interpretation, processing_datetime
-        )
-    elif locus == "B":
-        return HLASequenceB.build_from_interpretation(
-            interpretation, processing_datetime
-        )
-    return HLASequenceC.build_from_interpretation(interpretation, processing_datetime)
+HLA_ORACLE_LIB_PATH: Final[str] = os.environ.get(
+    "HLA_ORACLE_LIB_PATH", "/opt/oracle/instant_client"
+)
 
 
 class SequencesByLocus(TypedDict):
@@ -91,10 +68,10 @@ def interpret_sequences(
 
 def clinical_hla_driver(
     input_dir: str,
+    hla_a_results: str,
+    hla_b_results: str,
+    hla_c_results: str,
     db_engine: Optional[Engine] = None,
-    hla_a_results: Optional[str] = None,
-    hla_b_results: Optional[str] = None,
-    hla_c_results: Optional[str] = None,
     standards_path: Optional[str] = None,
     frequencies_path: Optional[str] = None,
 ) -> None:
@@ -105,7 +82,8 @@ def clinical_hla_driver(
         "C": [],
     }
     for locus in ("B", "C"):
-        sequences[locus] = read_bc_sequences(input_dir, locus, logger)
+        b_or_c: Literal["B", "C"] = cast(Literal["B", "C"], locus)
+        sequences[b_or_c] = read_bc_sequences(input_dir, b_or_c, logger)
 
     # Perform interpretations:
     interpretations: dict[HLA_LOCUS, list[HLAInterpretation]] = {
@@ -116,25 +94,30 @@ def clinical_hla_driver(
     processing_datetime: datetime = datetime.now()
     easyhla: EasyHLA = EasyHLA.use_config(standards_path, frequencies_path)
     for locus in ("A", "B", "C"):
-        interpretations[locus] = interpret_sequences(easyhla, sequences[locus])
+        interpretations[cast(HLA_LOCUS, locus)] = interpret_sequences(
+            easyhla, sequences[cast(HLA_LOCUS, locus)]
+        )
 
     # Prepare the interpretations for output:
     seqs_for_db: SequencesByLocus = {
         "A": [],
         "B": [],
         "C": [],
     }
-    for locus in ("A", "B", "C"):
-        # Each locus has a slightly different schema in the database, so we
-        # customize for each one.
-        for interp in interpretations[locus]:
-            seqs_for_db[locus].append(
-                prepare_interpretation_for_serialization(
-                    interp,
-                    locus,
-                    processing_datetime,
-                )
-            )
+    # This next bit looks repetitive but mypy didn't like my solution for doing
+    # this in a loop (because each one is a different type).
+    for interp in interpretations["A"]:
+        seqs_for_db["A"].append(
+            HLASequenceA.build_from_interpretation(interp, processing_datetime)
+        )
+    for interp in interpretations["B"]:
+        seqs_for_db["B"].append(
+            HLASequenceB.build_from_interpretation(interp, processing_datetime)
+        )
+    for interp in interpretations["C"]:
+        seqs_for_db["C"].append(
+            HLASequenceC.build_from_interpretation(interp, processing_datetime)
+        )
 
     # First, write to the output files:
     output_files: dict[HLA_LOCUS, str] = {
@@ -148,19 +131,23 @@ def clinical_hla_driver(
         "C": HLASequenceC.CSV_HEADER,
     }
     for locus in ("A", "B", "C"):
-        if len(seqs_for_db[locus]) > 0:
-            with open(output_files[locus], "w") as f:
+        if len(seqs_for_db[cast(HLA_LOCUS, locus)]) > 0:
+            with open(output_files[cast(HLA_LOCUS, locus)], "w") as f:
                 result_csv: csv.DictWriter = csv.DictWriter(
-                    f, fieldnames=csv_headers[locus], extrasaction="ignore"
+                    f,
+                    fieldnames=csv_headers[cast(HLA_LOCUS, locus)],
+                    extrasaction="ignore",
                 )
                 result_csv.writeheader()
-                result_csv.writerows(dataclasses.asdict(x) for x in seqs_for_db[locus])
+                result_csv.writerows(
+                    dataclasses.asdict(x) for x in seqs_for_db[cast(HLA_LOCUS, locus)]
+                )
 
     # Finally, write to the DB.
     if db_engine is not None:
         with Session(db_engine) as session:
             for locus in ("A", "B", "C"):
-                session.add_all(seqs_for_db[locus])
+                session.add_all(seqs_for_db[cast(HLA_LOCUS, locus)])
             session.commit()
 
 
@@ -246,10 +233,10 @@ def schema_workaround(dbapi_connection, _):
 
     clinical_hla_driver(
         args.input_dir,
-        db_engine,
         args.hla_a_results,
         args.hla_b_results,
         args.hla_c_results,
+        db_engine,
         args.hla_standards,
         args.hla_frequencies,
     )
 
@@ -76,7 +76,7 @@ def get_common_serialization_fields(
             "alleles_all": ap.stringify(),
             "ambiguous": str(ap.is_ambiguous()),
             "homozygous": str(ap.is_homozygous()),
-            "mismatch_count": interpretation.lowest_mismatch_count(),
+            "mismatch_count": mismatch_count,
             "mismatches": mismatches_str,
             "enterdate": processing_datetime,
         }
@@ -94,7 +94,7 @@ class HLASequenceA(HLADBBase):
     alleles_all: Mapped[Optional[str]] = mapped_column(String)
     ambiguous: Mapped[Optional[str]] = mapped_column(String)
     homozygous: Mapped[Optional[str]] = mapped_column(String)
-    mismatch_count: Mapped[Optional[str]] = mapped_column(Integer)
+    mismatch_count: Mapped[Optional[int]] = mapped_column(Integer)
     mismatches: Mapped[Optional[str]] = mapped_column(String)
     seq: Mapped[Optional[str]] = mapped_column(String)
     enterdate: Mapped[Optional[datetime]] = mapped_column(DateTime)
@@ -140,7 +140,7 @@ class HLASequenceB(HLADBBase):
     alleles_all: Mapped[Optional[str]] = mapped_column(String)
     ambiguous: Mapped[Optional[str]] = mapped_column(String)
     homozygous: Mapped[Optional[str]] = mapped_column(String)
-    mismatch_count: Mapped[Optional[str]] = mapped_column(Integer)
+    mismatch_count: Mapped[Optional[int]] = mapped_column(Integer)
     mismatches: Mapped[Optional[str]] = mapped_column(String)
     b5701: Mapped[Optional[str]] = mapped_column(String)
     b5701_dist: Mapped[Optional[int]] = mapped_column(Integer)
@@ -201,7 +201,7 @@ class HLASequenceC(HLADBBase):
     alleles_all: Mapped[Optional[str]] = mapped_column(String)
     ambiguous: Mapped[Optional[str]] = mapped_column(String)
     homozygous: Mapped[Optional[str]] = mapped_column(String)
-    mismatch_count: Mapped[Optional[str]] = mapped_column(Integer)
+    mismatch_count: Mapped[Optional[int]] = mapped_column(Integer)
     mismatches: Mapped[Optional[str]] = mapped_column(String)
     seqa: Mapped[Optional[str]] = mapped_column(String)
     seqb: Mapped[Optional[str]] = mapped_column(String)
 
@@ -38,10 +38,7 @@ def main():
             hla_input.hla_std_path,
             hla_input.hla_freq_path,
         )
-        interp: HLAInterpretation = easyhla.interpret(
-            hla_input.hla_sequence(),
-            hla_input.locus,
-        )
+        interp: HLAInterpretation = easyhla.interpret(hla_input.hla_sequence())
         print(HLAResult.build_from_interpretation(interp).model_dump_json())
 
 
 
@@ -1,7 +1,7 @@
 import re
 from collections.abc import Iterable
 from operator import itemgetter
-from typing import ClassVar, Final, Optional, Self
+from typing import Final, Optional, Self
 
 import numpy as np
 from pydantic import BaseModel, ConfigDict
@@ -134,8 +134,10 @@ def __lt__(self, other: "HLAProteinPair") -> bool:
         )
         return me_tuple < other_tuple
 
-    UNMAPPED: ClassVar[Final[str]] = "unmapped"
-    DEPRECATED: ClassVar[Final[str]] = "deprecated"
+    # Note: originally these were annotated as ClassVar[Final[str]] but this
+    # isn't supported in versions of Python prior to 3.13.
+    UNMAPPED: Final[str] = "unmapped"
+    DEPRECATED: Final[str] = "deprecated"
 
     class NonAlleleException(Exception):
         def __init__(
-Original file line number
+Diff line change
@@ @@ -1,6 +1,7 @@ @@
 ignore:
   - .git/*
   - .venv/*
 +  - src/easyhla/default_data/hla_standards.yaml
 extends: default