Skip to content

Commit 2896c8d

Browse files
author
rhliang
committed
Cleaned up all mypy errors.
1 parent a03e618 commit 2896c8d

File tree

13 files changed

+149
-101
lines changed

13 files changed

+149
-101
lines changed

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ dependencies = [
3333
"pyyaml>=6.0.2",
3434
"requests>=2.32.3",
3535
"typer>=0.15.2",
36+
"types-pyyaml>=6.0.12.20250516",
37+
"types-requests>=2.32.4.20250611",
3638
]
3739

3840
[dependency-groups]

src/easyhla/bblab.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from typing import Any, Optional
77

88
import typer
9-
from Bio.Seq import Seq
9+
from Bio.Seq import MutableSeq, Seq
1010
from Bio.SeqIO import parse
1111

1212
from .bblab_lib import (
@@ -51,7 +51,7 @@ def log_and_print(
5151

5252

5353
def report_unmatched_sequences(
54-
unmatched: dict[EXON_NAME, dict[str, Seq]],
54+
unmatched: dict[EXON_NAME, dict[str, Seq | MutableSeq | None]],
5555
to_stdout: bool = False,
5656
) -> None:
5757
"""
@@ -97,7 +97,7 @@ def process_from_file_to_files(
9797
)
9898

9999
matched_sequences: list[HLASequence]
100-
unmatched: dict[EXON_NAME, dict[str, Seq]]
100+
unmatched: dict[EXON_NAME, dict[str, Seq | MutableSeq | None]]
101101

102102
with open(filename, "r", encoding="utf-8") as f:
103103
matched_sequences, unmatched = pair_exons(

src/easyhla/bblab_lib.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from typing import TypedDict
44

55
import numpy as np
6-
from Bio.Seq import Seq
6+
from Bio.Seq import MutableSeq, Seq
77
from Bio.SeqIO import SeqRecord
88
from pydantic import BaseModel
99

@@ -36,7 +36,7 @@
3636

3737
def pair_exons_helper(
3838
sequence_record: SeqRecord,
39-
unmatched: dict[EXON_NAME, dict[str, Seq]],
39+
unmatched: dict[EXON_NAME, dict[str, Seq | MutableSeq | None]],
4040
) -> tuple[str, bool, bool, str, str]:
4141
"""
4242
Helper that attempts to match the given sequence with a "partner" exon.
@@ -55,7 +55,7 @@ def pair_exons_helper(
5555
- exon3 sequence
5656
"""
5757
# The `id`` field is expected to hold the sample name.
58-
samp: str = sequence_record.id
58+
samp: str = sequence_record.id or ""
5959
is_exon: bool = False
6060
matched: bool = False
6161
exon2: str = ""
@@ -98,7 +98,7 @@ def pair_exons(
9898
sequence_records: Iterable[SeqRecord],
9999
locus: HLA_LOCUS,
100100
example_standard: HLAStandard,
101-
) -> tuple[list[HLASequence], dict[EXON_NAME, dict[str, Seq]]]:
101+
) -> tuple[list[HLASequence], dict[EXON_NAME, dict[str, Seq | MutableSeq | None]]]:
102102
"""
103103
Pair exons in the given input sequences.
104104
@@ -109,7 +109,7 @@ def pair_exons(
109109
sequences and attempt to match them up.
110110
"""
111111
matched_sequences: list[HLASequence] = []
112-
unmatched: dict[EXON_NAME, dict[str, Seq]] = {
112+
unmatched: dict[EXON_NAME, dict[str, Seq | MutableSeq | None]] = {
113113
"exon2": {},
114114
"exon3": {},
115115
}
@@ -118,7 +118,7 @@ def pair_exons(
118118
# Skip over any sequences that aren't the right length or contain
119119
# bad bases.
120120
try:
121-
check_length(locus, str(sr.seq), sr.id)
121+
check_length(locus, str(sr.seq), sr.id or "")
122122
except BadLengthException:
123123
continue
124124

@@ -147,21 +147,21 @@ def pair_exons(
147147
exon3_bin = pad_short(example_standard.sequence, nuc2bin(exon3), "exon3")
148148
matched_sequences.append(
149149
HLASequence(
150-
two=(int(x) for x in exon2_bin),
150+
two=tuple(int(x) for x in exon2_bin),
151151
intron=(),
152-
three=(int(x) for x in exon3_bin),
152+
three=tuple(int(x) for x in exon3_bin),
153153
name=identifier,
154154
locus=locus,
155155
num_sequences_used=2,
156156
)
157157
)
158158
else:
159-
seq_numpy: np.array = pad_short(
159+
seq_numpy: np.ndarray = pad_short(
160160
example_standard.sequence,
161161
nuc2bin(sr.seq), # type: ignore
162162
None,
163163
)
164-
seq: tuple[int] = tuple(int(x) for x in seq_numpy)
164+
seq: tuple[int, ...] = tuple(int(x) for x in seq_numpy)
165165
matched_sequences.append(
166166
HLASequence(
167167
two=seq[:EXON2_LENGTH],

src/easyhla/clinical_hla_lib.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,7 +348,7 @@ def identify_bc_sequence_files(
348348
if sample_match is None:
349349
logger.info(f'Skipping file "{filename}".')
350350
continue
351-
sample_name: str = sample_match.group(1)
351+
sample_name = sample_match.group(1)
352352
sample_exon: EXON_NAME = (
353353
"exon2" if sample_match.group(2).upper() == "A" else "exon3"
354354
)

src/easyhla/easyhla.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from datetime import datetime
55
from io import TextIOBase
66
from operator import attrgetter
7-
from typing import Final, Optional, TypedDict
7+
from typing import Final, Optional, TypedDict, cast
88

99
import numpy as np
1010
import yaml
@@ -127,7 +127,7 @@ def read_hla_standards(standards_io: TextIOBase) -> LoadedStandards:
127127
}
128128

129129
@staticmethod
130-
def load_default_hla_standards() -> dict[str, HLAStandard]:
130+
def load_default_hla_standards() -> LoadedStandards:
131131
"""
132132
Load HLA Standards from reference file.
133133
@@ -258,7 +258,7 @@ def combine_standards_stepper(
258258
# Keep track of matches we've already found:
259259
combos: dict[tuple[int, ...], int] = {}
260260

261-
current_rejection_threshold: int = float("inf")
261+
current_rejection_threshold: int | float = float("inf")
262262
for std_ai, std_a in enumerate(matching_stds):
263263
if std_a.mismatch > current_rejection_threshold:
264264
continue
@@ -269,8 +269,8 @@ def combine_standards_stepper(
269269
# "Mush" the two standards together to produce something
270270
# that looks like what you get when you sequence HLA.
271271
std_bin = np.array(std_b.sequence) | np.array(std_a.sequence)
272-
allele_pair: tuple[str, str] = tuple(
273-
sorted((std_a.allele, std_b.allele))
272+
allele_pair: tuple[str, str] = cast(
273+
tuple[str, str], tuple(sorted((std_a.allele, std_b.allele)))
274274
)
275275

276276
# There could be more than one combined standard with the
@@ -284,7 +284,7 @@ def combine_standards_stepper(
284284
else:
285285
seq_mask = np.full_like(std_bin, fill_value=15)
286286
# Note that seq is implicitly cast to a NumPy array:
287-
mismatches: int = np.count_nonzero((std_bin ^ seq) & seq_mask != 0)
287+
mismatches = np.count_nonzero((std_bin ^ seq) & seq_mask != 0)
288288
combos[combined_std_bin] = mismatches # cache this value
289289

290290
if mismatches > current_rejection_threshold:
@@ -330,7 +330,7 @@ def combine_standards(
330330

331331
combos: dict[tuple[int, ...], tuple[int, list[tuple[str, str]]]] = {}
332332

333-
fewest_mismatches: int = float("inf")
333+
fewest_mismatches: int | float = float("inf")
334334
for (
335335
combined_std_bin,
336336
mismatches,
@@ -346,7 +346,7 @@ def combine_standards(
346346
# criteria.
347347
result: dict[HLACombinedStandard, int] = {}
348348

349-
cutoff: int = max(fewest_mismatches, mismatch_threshold)
349+
cutoff: int | float = max(fewest_mismatches, mismatch_threshold)
350350
for combined_std_bin, mismatch_count_and_pair_list in combos.items():
351351
mismatch_count: int
352352
pair_list: list[tuple[str, str]]

src/easyhla/interpret_from_json_lib.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ def hla_sequence(self) -> HLASequence:
6868
exon3_str = self.seq1[-276:]
6969
else:
7070
exon2_str = self.seq1
71-
exon3_str = self.seq2
71+
exon3_str = self.seq2 or ""
7272

7373
num_sequences_used: int = 1 if self.locus == "A" else 2
7474
return HLASequence(

src/easyhla/update_alleles.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import time
99
from datetime import datetime
1010
from io import StringIO
11-
from typing import Final, Optional, TypedDict
11+
from typing import Final, Optional, TypedDict, cast
1212

1313
import Bio
1414
import requests
@@ -169,7 +169,7 @@ def get_commit_hash(
169169
return None
170170

171171

172-
def get_from_git(tag: str) -> tuple[str, datetime, str]:
172+
def get_from_git(tag: str) -> tuple[str, datetime, Optional[str]]:
173173
alleles_str: str
174174
retrieval_datetime: datetime
175175
for i in range(5):
@@ -185,7 +185,7 @@ def get_from_git(tag: str) -> tuple[str, datetime, str]:
185185
else:
186186
break
187187

188-
commit_hash: str
188+
commit_hash: Optional[str]
189189
for i in range(5):
190190
try:
191191
commit_hash = get_commit_hash(tag)
@@ -271,7 +271,7 @@ def main():
271271
logger.info(f"Retrieving alleles from tag {args.tag}....")
272272
alleles_str: str
273273
retrieval_datetime: datetime
274-
commit_hash: str
274+
commit_hash: Optional[str]
275275
alleles_str, retrieval_datetime, commit_hash = get_from_git(args.tag)
276276
logger.info(
277277
f"Alleles (version {args.tag}, commit hash {commit_hash}) retrieved at "
@@ -301,10 +301,12 @@ def main():
301301
logger.info("Identifying identical HLA alleles....")
302302
standards_for_saving: StoredHLAStandards = StoredHLAStandards(
303303
tag=args.tag,
304-
commit_hash=commit_hash,
304+
commit_hash=commit_hash or "",
305305
last_updated=retrieval_datetime,
306306
standards={
307-
locus: group_identical_alleles(raw_standards[locus])
307+
cast(HLA_LOCUS, locus): group_identical_alleles(
308+
raw_standards[cast(HLA_LOCUS, locus)]
309+
)
308310
for locus in ("A", "B", "C")
309311
},
310312
)

src/easyhla/utils.py

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from collections import defaultdict
55
from collections.abc import Iterable, Sequence
66
from datetime import datetime
7-
from typing import Final, Literal, Optional, Self
7+
from typing import Final, Literal, Optional, Self, cast
88

99
import numpy as np
1010
from Bio.SeqIO import SeqRecord
@@ -272,21 +272,21 @@ def pad_short(
272272
exon3_std_bin: np.ndarray = np.array(std_bin[-EXON3_LENGTH:])
273273
if exon == "exon2":
274274
left_pad, right_pad = calc_padding(
275-
exon2_std_bin,
275+
cast(Sequence[int], exon2_std_bin),
276276
seq_bin,
277277
)
278278
elif exon == "exon3":
279279
left_pad, right_pad = calc_padding(
280-
exon3_std_bin,
280+
cast(Sequence[int], exon3_std_bin),
281281
seq_bin,
282282
)
283283
else: # i.e. this is a full sequence possibly with intron
284284
left_pad, _ = calc_padding(
285-
exon2_std_bin,
285+
cast(Sequence[int], exon2_std_bin),
286286
seq_bin[: int(EXON2_LENGTH / 2)],
287287
)
288288
_, right_pad = calc_padding(
289-
exon3_std_bin,
289+
cast(Sequence[int], exon3_std_bin),
290290
seq_bin[-int(EXON3_LENGTH / 2) :],
291291
)
292292
return np.concatenate(
@@ -300,7 +300,7 @@ def pad_short(
300300

301301
def get_acceptable_match(
302302
sequence: str, reference: str, mismatch_threshold: int = 20
303-
) -> tuple[int, Optional[str]]:
303+
) -> tuple[int, str]:
304304
"""
305305
Get an "acceptable match" between the sequence and reference.
306306
@@ -316,7 +316,7 @@ def get_acceptable_match(
316316
raise ValueError("sequence must be at least as long as the reference")
317317

318318
score: int = len(reference)
319-
best_match: Optional[str] = None
319+
best_match: str = sequence[0 : len(reference)]
320320

321321
ref_np: np.ndarray = np.array(list(reference))
322322
for shift in range(len(sequence) - len(reference) + 1):
@@ -389,32 +389,36 @@ def collate_standards(
389389
checked to see if it has acceptable matches for both exon2 and exon3.
390390
"""
391391
output_status_updates: bool = False
392+
actual_report_interval: int = 1000
393+
actual_logger: logging.Logger
392394
if logger is not None and report_interval is not None and report_interval > 0:
393395
output_status_updates = True
396+
actual_report_interval = cast(int, report_interval)
397+
actual_logger = cast(logging.Logger, logger)
394398

395399
standards: dict[HLA_LOCUS, list[HLARawStandard]] = {
396400
"A": [],
397401
"B": [],
398402
"C": [],
399403
}
400404
for idx, allele_sr in enumerate(allele_srs, start=1):
401-
if output_status_updates and idx % report_interval == 0:
402-
logger.info(f"Processing sequence {idx} of {len(allele_srs)}....")
405+
if output_status_updates and idx % actual_report_interval == 0:
406+
actual_logger.info(f"Processing sequence {idx} of {len(allele_srs)}....")
403407

404408
# The FASTA headers look like:
405409
# >HLA:HLA00001 A*01:01:01:01 1098 bp
406410
allele_name: str = allele_sr.description.split(" ")[1]
407-
locus: HLA_LOCUS = allele_name[0]
408-
409-
if locus not in ("A", "B", "C"):
411+
raw_locus: str = allele_name[0]
412+
if raw_locus not in ("A", "B", "C"):
410413
continue
414+
locus: HLA_LOCUS = cast(HLA_LOCUS, raw_locus)
411415

412-
exon2_match: tuple[int, Optional[str]] = get_acceptable_match(
416+
exon2_match: tuple[int, str] = get_acceptable_match(
413417
str(allele_sr.seq),
414418
exon_references[locus]["exon2"],
415419
mismatch_threshold=acceptable_match_search_threshold,
416420
)
417-
exon3_match: tuple[int, Optional[str]] = get_acceptable_match(
421+
exon3_match: tuple[int, str] = get_acceptable_match(
418422
str(allele_sr.seq),
419423
exon_references[locus]["exon3"],
420424
mismatch_threshold=acceptable_match_search_threshold,
@@ -431,7 +435,7 @@ def collate_standards(
431435
)
432436
)
433437
elif logger is not None:
434-
logger.info(
438+
actual_logger.info(
435439
f'Rejecting "{allele_name}": {exon2_match[0]} exon2 mismatches,'
436440
f" {exon3_match[0]} exon3 mismatches."
437441
)
@@ -447,7 +451,10 @@ class GroupedAllele(BaseModel):
447451
exon3: str
448452
alleles: list[str]
449453

450-
@computed_field
454+
# Due to this issue:
455+
# https://github.com/python/mypy/issues/1362
456+
# we need the special mypy instruction here.
457+
@computed_field # type: ignore[misc]
451458
@property
452459
def name(self) -> str:
453460
"""

tests/bblab_lib_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import pytest
2-
from Bio.Seq import Seq
2+
from Bio.Seq import MutableSeq, Seq
33
from Bio.SeqIO import SeqRecord
44

55
from easyhla.bblab_lib import (
@@ -352,7 +352,7 @@
352352
)
353353
def test_pair_exons_helper(
354354
sr: SeqRecord,
355-
unmatched: dict[EXON_NAME, dict[str, Seq]],
355+
unmatched: dict[EXON_NAME, dict[str, Seq | MutableSeq | None]],
356356
expected_id: str,
357357
expected_is_exon: bool,
358358
expected_matched: bool,
@@ -730,7 +730,7 @@ def test_pair_exons(
730730
expected_unmatched: dict[EXON_NAME, dict[str, Seq]],
731731
):
732732
paired_seqs: list[HLASequence]
733-
unmatched: dict[EXON_NAME, dict[str, Seq]]
733+
unmatched: dict[EXON_NAME, dict[str, Seq | MutableSeq | None]]
734734

735735
current_standard: HLARawStandard = HLA_STANDARDS[locus]
736736
fake_standard: HLAStandard = HLAStandard(

0 commit comments

Comments
 (0)