@@ -337,8 +337,8 @@ def interpret(
337337 if f"exon{ exon } " in samp .lower ():
338338 is_exon = True
339339 _samp = samp .split ("_" )[0 ]
340- for other in unmatched [3 - exon ]:
341- if other .description .lower (). startswith ( _samp ):
340+ for i , other in enumerate ( unmatched [3 - exon ]) :
341+ if _samp . lower () in other .description .lower ():
342342 matched = True
343343 intron = ""
344344 if exon == 2 :
@@ -348,7 +348,7 @@ def interpret(
348348 exon2 = str (other .seq )
349349 exon3 = str (entry .seq )
350350
351- unmatched [3 - exon ].remove ( other )
351+ unmatched [3 - exon ].pop ( i )
352352 samp = _samp
353353 break
354354
@@ -487,6 +487,13 @@ def interpret(
487487 # print(row)
488488 return HLAResult (result = row , num_pats = 1 , num_seqs = nseqs )
489489
490+ def report_unmatched_sequences (
491+ self , unmatched : List [List [Bio .SeqIO .SeqRecord ]]
492+ ) -> None :
493+ for exon in [2 , 3 ]:
494+ for entry in unmatched [exon % 2 ]:
495+ print (f"No matching exon{ 3 - exon % 2 } for { entry .description } " )
496+
490497 def run (
491498 self ,
492499 letter : HLA_TYPES ,
@@ -498,9 +505,9 @@ def run(
498505 npats = 0
499506 nseqs = 0
500507 time_start = datetime .now ()
508+ unmatched : List [List [Bio .SeqIO .SeqRecord ]] = [[], []]
501509 with open (filename , "r" , encoding = "utf-8" ) as f :
502510 fasta = Bio .SeqIO .parse (f , "fasta" )
503- unmatched : List [List [Bio .SeqIO .SeqRecord ]] = [[], []]
504511 for i , entry in enumerate (fasta ):
505512 result = self .interpret (
506513 letter ,
@@ -515,6 +522,8 @@ def run(
515522 npats += result .num_pats
516523 nseqs += result .num_seqs
517524
525+ self .report_unmatched_sequences (unmatched )
526+
518527 with open (output_filename , "w" , encoding = "utf-8" ) as f :
519528 f .write (
520529 f"Run commencing { time_start .strftime (DATE_FORMAT )} . Allele definitions last updated { self .load_allele_definitions_last_modified_time ().strftime (DATE_FORMAT )} .\n "
0 commit comments