44from collections import defaultdict
55from collections .abc import Iterable , Sequence
66from datetime import datetime
7- from typing import Final , Literal , Optional , Self
7+ from typing import Final , Literal , Optional , Self , cast
88
99import numpy as np
1010from Bio .SeqIO import SeqRecord
@@ -272,21 +272,21 @@ def pad_short(
272272 exon3_std_bin : np .ndarray = np .array (std_bin [- EXON3_LENGTH :])
273273 if exon == "exon2" :
274274 left_pad , right_pad = calc_padding (
275- exon2_std_bin ,
275+ cast ( Sequence [ int ], exon2_std_bin ) ,
276276 seq_bin ,
277277 )
278278 elif exon == "exon3" :
279279 left_pad , right_pad = calc_padding (
280- exon3_std_bin ,
280+ cast ( Sequence [ int ], exon3_std_bin ) ,
281281 seq_bin ,
282282 )
283283 else : # i.e. this is a full sequence possibly with intron
284284 left_pad , _ = calc_padding (
285- exon2_std_bin ,
285+ cast ( Sequence [ int ], exon2_std_bin ) ,
286286 seq_bin [: int (EXON2_LENGTH / 2 )],
287287 )
288288 _ , right_pad = calc_padding (
289- exon3_std_bin ,
289+ cast ( Sequence [ int ], exon3_std_bin ) ,
290290 seq_bin [- int (EXON3_LENGTH / 2 ) :],
291291 )
292292 return np .concatenate (
@@ -300,7 +300,7 @@ def pad_short(
300300
301301def get_acceptable_match (
302302 sequence : str , reference : str , mismatch_threshold : int = 20
303- ) -> tuple [int , Optional [ str ] ]:
303+ ) -> tuple [int , str ]:
304304 """
305305 Get an "acceptable match" between the sequence and reference.
306306
@@ -316,7 +316,7 @@ def get_acceptable_match(
316316 raise ValueError ("sequence must be at least as long as the reference" )
317317
318318 score : int = len (reference )
319- best_match : Optional [ str ] = None
319+ best_match : str = sequence [ 0 : len ( reference )]
320320
321321 ref_np : np .ndarray = np .array (list (reference ))
322322 for shift in range (len (sequence ) - len (reference ) + 1 ):
@@ -389,32 +389,36 @@ def collate_standards(
389389 checked to see if it has acceptable matches for both exon2 and exon3.
390390 """
391391 output_status_updates : bool = False
392+ actual_report_interval : int = 1000
393+ actual_logger : logging .Logger
392394 if logger is not None and report_interval is not None and report_interval > 0 :
393395 output_status_updates = True
396+ actual_report_interval = cast (int , report_interval )
397+ actual_logger = cast (logging .Logger , logger )
394398
395399 standards : dict [HLA_LOCUS , list [HLARawStandard ]] = {
396400 "A" : [],
397401 "B" : [],
398402 "C" : [],
399403 }
400404 for idx , allele_sr in enumerate (allele_srs , start = 1 ):
401- if output_status_updates and idx % report_interval == 0 :
402- logger .info (f"Processing sequence { idx } of { len (allele_srs )} ...." )
405+ if output_status_updates and idx % actual_report_interval == 0 :
406+ actual_logger .info (f"Processing sequence { idx } of { len (allele_srs )} ...." )
403407
404408 # The FASTA headers look like:
405409 # >HLA:HLA00001 A*01:01:01:01 1098 bp
406410 allele_name : str = allele_sr .description .split (" " )[1 ]
407- locus : HLA_LOCUS = allele_name [0 ]
408-
409- if locus not in ("A" , "B" , "C" ):
411+ raw_locus : str = allele_name [0 ]
412+ if raw_locus not in ("A" , "B" , "C" ):
410413 continue
414+ locus : HLA_LOCUS = cast (HLA_LOCUS , raw_locus )
411415
412- exon2_match : tuple [int , Optional [ str ] ] = get_acceptable_match (
416+ exon2_match : tuple [int , str ] = get_acceptable_match (
413417 str (allele_sr .seq ),
414418 exon_references [locus ]["exon2" ],
415419 mismatch_threshold = acceptable_match_search_threshold ,
416420 )
417- exon3_match : tuple [int , Optional [ str ] ] = get_acceptable_match (
421+ exon3_match : tuple [int , str ] = get_acceptable_match (
418422 str (allele_sr .seq ),
419423 exon_references [locus ]["exon3" ],
420424 mismatch_threshold = acceptable_match_search_threshold ,
@@ -431,7 +435,7 @@ def collate_standards(
431435 )
432436 )
433437 elif logger is not None :
434- logger .info (
438+ actual_logger .info (
435439 f'Rejecting "{ allele_name } ": { exon2_match [0 ]} exon2 mismatches,'
436440 f" { exon3_match [0 ]} exon3 mismatches."
437441 )
@@ -447,7 +451,10 @@ class GroupedAllele(BaseModel):
447451 exon3 : str
448452 alleles : list [str ]
449453
450- @computed_field
454+ # Due to this issue:
455+ # https://github.com/python/mypy/issues/1362
456+ # we need the special mypy instruction here.
457+ @computed_field # type: ignore[misc]
451458 @property
452459 def name (self ) -> str :
453460 """
0 commit comments