2323from .easyhla import EasyHLA
2424from .models import (
2525 HLAInterpretation ,
26- HLAProteinPair ,
2726 HLASequence ,
28- HLAStandard ,
2927)
3028from .utils import HLA_LOCUS
3129
4947# These are the "configuration files" that the algorithm uses; these are or may
5048# be updated, in which case you specify the path to the new version in the
5149# environment.
52- HLA_STANDARDS : Final [dict [HLA_LOCUS , Optional [str ]]] = {
53- "A" : os .environ .get ("HLA_STANDARDS_A" ),
54- "B" : os .environ .get ("HLA_STANDARDS_B" ),
55- "C" : os .environ .get ("HLA_STANDARDS_C" ),
56- }
50+ HLA_STANDARDS : Final [str ] = os .environ .get ("HLA_STANDARDS" )
5751HLA_FREQUENCIES : Final [str ] = os .environ .get ("HLA_FREQUENCIES" )
5852
5953
60- def interpret_sequences (
61- sequences : list [HLASequence ],
62- locus : HLA_LOCUS ,
63- standards_file : Optional [str ] = None ,
64- frequencies_file : Optional [str ] = None ,
65- ) -> tuple [list [HLAInterpretation ], dict [HLAProteinPair , int ]]:
66- curr_standards : Optional [dict [str , HLAStandard ]] = None
67- curr_frequencies : Optional [dict [HLAProteinPair , int ]] = None
68- if frequencies_file is not None :
69- with open (frequencies_file ) as f :
70- curr_frequencies = EasyHLA .read_hla_frequencies (locus , f )
71- if standards_file is not None :
72- with open (standards_file ) as f :
73- curr_standards = EasyHLA .read_hla_standards (f )
74- easyhla : EasyHLA = EasyHLA (
75- locus ,
76- hla_standards = curr_standards ,
77- hla_frequencies = curr_frequencies ,
78- )
79- interpretations : list [HLAInterpretation ] = []
80- for sequence in sequences :
81- try :
82- interpretations .append (easyhla .interpret (sequence ))
83- except EasyHLA .NoMatchingStandards :
84- pass
85- return interpretations , easyhla .hla_frequencies
86-
87-
8854def prepare_interpretation_for_serialization (
8955 interpretation : HLAInterpretation ,
9056 locus : HLA_LOCUS ,
@@ -110,16 +76,27 @@ class SequencesByLocus(TypedDict):
11076 C : list [HLASequenceC ]
11177
11278
79+ def interpret_sequences (
80+ hla_alg : EasyHLA ,
81+ sequences : list [HLASequence ],
82+ ) -> list [HLAInterpretation ]:
83+ interpretations : list [HLAInterpretation ] = []
84+ for sequence in sequences :
85+ try :
86+ interpretations .append (hla_alg .interpret (sequence ))
87+ except EasyHLA .NoMatchingStandards :
88+ pass
89+ return interpretations
90+
91+
11392def clinical_hla_driver (
11493 input_dir : str ,
11594 db_engine : Optional [Engine ] = None ,
116- hla_a_standards : Optional [str ] = None ,
11795 hla_a_results : Optional [str ] = None ,
118- hla_b_standards : Optional [str ] = None ,
11996 hla_b_results : Optional [str ] = None ,
120- hla_c_standards : Optional [str ] = None ,
12197 hla_c_results : Optional [str ] = None ,
122- hla_frequencies : Optional [str ] = None ,
98+ standards_path : Optional [str ] = None ,
99+ frequencies_path : Optional [str ] = None ,
123100) -> None :
124101 # Read in the sequences:
125102 sequences : dict [HLA_LOCUS , list [HLASequence ]] = {
@@ -131,35 +108,15 @@ def clinical_hla_driver(
131108 sequences [locus ] = read_bc_sequences (input_dir , locus , logger )
132109
133110 # Perform interpretations:
134- standards_files : dict [HLA_LOCUS , Optional [str ]] = {
135- "A" : hla_a_standards ,
136- "B" : hla_b_standards ,
137- "C" : hla_c_standards ,
138- }
139111 interpretations : dict [HLA_LOCUS , list [HLAInterpretation ]] = {
140112 "A" : [],
141113 "B" : [],
142114 "C" : [],
143115 }
144- frequencies : dict [HLA_LOCUS , dict [HLAProteinPair , int ]] = {
145- "A" : {},
146- "B" : {},
147- "C" : {},
148- }
149-
150116 processing_datetime : datetime = datetime .now ()
151-
117+ easyhla : EasyHLA = EasyHLA . use_config ( standards_path , frequencies_path )
152118 for locus in ("A" , "B" , "C" ):
153- curr_interps : list [HLAInterpretation ]
154- curr_freqs : dict [HLAProteinPair , int ]
155- curr_interps , curr_freqs = interpret_sequences (
156- sequences [locus ],
157- locus ,
158- standards_files [locus ],
159- hla_frequencies ,
160- )
161- interpretations [locus ] = curr_interps
162- frequencies [locus ] = curr_freqs
119+ interpretations [locus ] = interpret_sequences (easyhla , sequences [locus ])
163120
164121 # Prepare the interpretations for output:
165122 seqs_for_db : SequencesByLocus = {
@@ -218,18 +175,18 @@ def main():
218175 default = DEFAULT_INPUT_DIR ,
219176 )
220177 for locus in ("A" , "B" , "C" ):
221- parser .add_argument (
222- f"--hla_{ locus .lower ()} _standards" ,
223- help = f"CSV file containing the (reduced) HLA-{ locus } standards to use" ,
224- type = str ,
225- default = None ,
226- )
227178 parser .add_argument (
228179 f"--hla_{ locus .lower ()} _results" ,
229180 help = f"CSV file containing the HLA-{ locus } results" ,
230181 type = str ,
231182 default = f"hla_{ locus .lower ()} _seq.csv" ,
232183 )
184+ parser .add_argument (
185+ "--hla_standards" ,
186+ help = "YAML file containing the HLA standards to use" ,
187+ type = str ,
188+ default = None ,
189+ )
233190 parser .add_argument (
234191 "--hla_frequencies" ,
235192 help = (
@@ -253,7 +210,6 @@ def main():
253210 help = "If set, skip connecting to the database entirely (overrules --sqlite)" ,
254211 action = "store_true" ,
255212 )
256- # FIXME what to do about "last modified"?
257213 args : argparse .Namespace = parser .parse_args ()
258214
259215 # Connect to the database:
@@ -291,12 +247,10 @@ def schema_workaround(dbapi_connection, _):
291247 clinical_hla_driver (
292248 args .input_dir ,
293249 db_engine ,
294- args .hla_a_standards ,
295250 args .hla_a_results ,
296- args .hla_b_standards ,
297251 args .hla_b_results ,
298- args .hla_c_standards ,
299252 args .hla_c_results ,
253+ args .hla_standards ,
300254 args .hla_frequencies ,
301255 )
302256
0 commit comments