Skip to content

Commit a139acc

Browse files
author
rhliang
committed
WIP: a first pass of updating for the changes in the EasyHLA object.
Tests still need to be fixed!
1 parent c533935 commit a139acc

File tree

4 files changed

+51
-105
lines changed

4 files changed

+51
-105
lines changed

src/easyhla/bblab.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ def report_unmatched_sequences(
7070

7171
def process_from_file_to_files(
7272
hla_alg: EasyHLA,
73+
locus: HLALocus,
7374
filename: str,
7475
output_filename: str,
7576
mismatches_filename: str,
@@ -97,7 +98,7 @@ def process_from_file_to_files(
9798
with open(filename, "r", encoding="utf-8") as f:
9899
matched_sequences, unmatched = pair_exons(
99100
Bio.SeqIO.parse(f, "fasta"),
100-
hla_alg.locus,
101+
locus.value,
101102
list(hla_alg.standards.values())[0],
102103
)
103104

@@ -234,10 +235,11 @@ def main(
234235
) -> None:
235236
min_log_level = max(min(40, (4 - log_level) * 10), 50)
236237
logger.setLevel(min_log_level)
237-
easyhla = EasyHLA(locus=locus.value)
238+
easyhla = EasyHLA()
238239

239240
process_from_file_to_files(
240241
easyhla,
242+
locus,
241243
sequence_file.as_posix(),
242244
output_file.as_posix(),
243245
mismatch_file.as_posix(),

src/easyhla/clinical_hla.py

Lines changed: 25 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,7 @@
2323
from .easyhla import EasyHLA
2424
from .models import (
2525
HLAInterpretation,
26-
HLAProteinPair,
2726
HLASequence,
28-
HLAStandard,
2927
)
3028
from .utils import HLA_LOCUS
3129

@@ -49,42 +47,10 @@
4947
# These are the "configuration files" that the algorithm uses; these are or may
5048
# be updated, in which case you specify the path to the new version in the
5149
# environment.
52-
HLA_STANDARDS: Final[dict[HLA_LOCUS, Optional[str]]] = {
53-
"A": os.environ.get("HLA_STANDARDS_A"),
54-
"B": os.environ.get("HLA_STANDARDS_B"),
55-
"C": os.environ.get("HLA_STANDARDS_C"),
56-
}
50+
HLA_STANDARDS: Final[str] = os.environ.get("HLA_STANDARDS")
5751
HLA_FREQUENCIES: Final[str] = os.environ.get("HLA_FREQUENCIES")
5852

5953

60-
def interpret_sequences(
61-
sequences: list[HLASequence],
62-
locus: HLA_LOCUS,
63-
standards_file: Optional[str] = None,
64-
frequencies_file: Optional[str] = None,
65-
) -> tuple[list[HLAInterpretation], dict[HLAProteinPair, int]]:
66-
curr_standards: Optional[dict[str, HLAStandard]] = None
67-
curr_frequencies: Optional[dict[HLAProteinPair, int]] = None
68-
if frequencies_file is not None:
69-
with open(frequencies_file) as f:
70-
curr_frequencies = EasyHLA.read_hla_frequencies(locus, f)
71-
if standards_file is not None:
72-
with open(standards_file) as f:
73-
curr_standards = EasyHLA.read_hla_standards(f)
74-
easyhla: EasyHLA = EasyHLA(
75-
locus,
76-
hla_standards=curr_standards,
77-
hla_frequencies=curr_frequencies,
78-
)
79-
interpretations: list[HLAInterpretation] = []
80-
for sequence in sequences:
81-
try:
82-
interpretations.append(easyhla.interpret(sequence))
83-
except EasyHLA.NoMatchingStandards:
84-
pass
85-
return interpretations, easyhla.hla_frequencies
86-
87-
8854
def prepare_interpretation_for_serialization(
8955
interpretation: HLAInterpretation,
9056
locus: HLA_LOCUS,
@@ -110,16 +76,27 @@ class SequencesByLocus(TypedDict):
11076
C: list[HLASequenceC]
11177

11278

79+
def interpret_sequences(
80+
hla_alg: EasyHLA,
81+
sequences: list[HLASequence],
82+
) -> list[HLAInterpretation]:
83+
interpretations: list[HLAInterpretation] = []
84+
for sequence in sequences:
85+
try:
86+
interpretations.append(hla_alg.interpret(sequence))
87+
except EasyHLA.NoMatchingStandards:
88+
pass
89+
return interpretations
90+
91+
11392
def clinical_hla_driver(
11493
input_dir: str,
11594
db_engine: Optional[Engine] = None,
116-
hla_a_standards: Optional[str] = None,
11795
hla_a_results: Optional[str] = None,
118-
hla_b_standards: Optional[str] = None,
11996
hla_b_results: Optional[str] = None,
120-
hla_c_standards: Optional[str] = None,
12197
hla_c_results: Optional[str] = None,
122-
hla_frequencies: Optional[str] = None,
98+
standards_path: Optional[str] = None,
99+
frequencies_path: Optional[str] = None,
123100
) -> None:
124101
# Read in the sequences:
125102
sequences: dict[HLA_LOCUS, list[HLASequence]] = {
@@ -131,35 +108,15 @@ def clinical_hla_driver(
131108
sequences[locus] = read_bc_sequences(input_dir, locus, logger)
132109

133110
# Perform interpretations:
134-
standards_files: dict[HLA_LOCUS, Optional[str]] = {
135-
"A": hla_a_standards,
136-
"B": hla_b_standards,
137-
"C": hla_c_standards,
138-
}
139111
interpretations: dict[HLA_LOCUS, list[HLAInterpretation]] = {
140112
"A": [],
141113
"B": [],
142114
"C": [],
143115
}
144-
frequencies: dict[HLA_LOCUS, dict[HLAProteinPair, int]] = {
145-
"A": {},
146-
"B": {},
147-
"C": {},
148-
}
149-
150116
processing_datetime: datetime = datetime.now()
151-
117+
easyhla: EasyHLA = EasyHLA.use_config(standards_path, frequencies_path)
152118
for locus in ("A", "B", "C"):
153-
curr_interps: list[HLAInterpretation]
154-
curr_freqs: dict[HLAProteinPair, int]
155-
curr_interps, curr_freqs = interpret_sequences(
156-
sequences[locus],
157-
locus,
158-
standards_files[locus],
159-
hla_frequencies,
160-
)
161-
interpretations[locus] = curr_interps
162-
frequencies[locus] = curr_freqs
119+
interpretations[locus] = interpret_sequences(easyhla, sequences[locus])
163120

164121
# Prepare the interpretations for output:
165122
seqs_for_db: SequencesByLocus = {
@@ -218,18 +175,18 @@ def main():
218175
default=DEFAULT_INPUT_DIR,
219176
)
220177
for locus in ("A", "B", "C"):
221-
parser.add_argument(
222-
f"--hla_{locus.lower()}_standards",
223-
help=f"CSV file containing the (reduced) HLA-{locus} standards to use",
224-
type=str,
225-
default=None,
226-
)
227178
parser.add_argument(
228179
f"--hla_{locus.lower()}_results",
229180
help=f"CSV file containing the HLA-{locus} results",
230181
type=str,
231182
default=f"hla_{locus.lower()}_seq.csv",
232183
)
184+
parser.add_argument(
185+
"--hla_standards",
186+
help="YAML file containing the HLA standards to use",
187+
type=str,
188+
default=None,
189+
)
233190
parser.add_argument(
234191
"--hla_frequencies",
235192
help=(
@@ -253,7 +210,6 @@ def main():
253210
help="If set, skip connecting to the database entirely (overrules --sqlite)",
254211
action="store_true",
255212
)
256-
# FIXME what to do about "last modified"?
257213
args: argparse.Namespace = parser.parse_args()
258214

259215
# Connect to the database:
@@ -291,12 +247,10 @@ def schema_workaround(dbapi_connection, _):
291247
clinical_hla_driver(
292248
args.input_dir,
293249
db_engine,
294-
args.hla_a_standards,
295250
args.hla_a_results,
296-
args.hla_b_standards,
297251
args.hla_b_results,
298-
args.hla_c_standards,
299252
args.hla_c_results,
253+
args.hla_standards,
300254
args.hla_frequencies,
301255
)
302256

src/easyhla/easyhla.py

Lines changed: 15 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
DATE_FORMAT = "%a %b %d %H:%M:%S %Z %Y"
3131

3232

33-
class ProcessedStoredStandards(TypedDict):
33+
class LoadedStandards(TypedDict):
3434
tag: str
3535
last_modified: datetime
3636
standards: dict[HLA_LOCUS, dict[str, HLAStandard]]
@@ -49,7 +49,7 @@ class EasyHLA:
4949

5050
def __init__(
5151
self,
52-
hla_standards: Optional[ProcessedStoredStandards] = None,
52+
loaded_standards: Optional[LoadedStandards] = None,
5353
hla_frequencies: Optional[dict[HLA_LOCUS, dict[HLAProteinPair, int]]] = None,
5454
):
5555
"""
@@ -58,14 +58,14 @@ def __init__(
5858
:param logger: Python logger object, defaults to None
5959
:type logger: Optional[logging.Logger], optional
6060
"""
61-
if hla_standards is None:
62-
hla_standards = self.load_default_hla_standards()
61+
if loaded_standards is None:
62+
loaded_standards = self.load_default_hla_standards()
6363

64-
self.hla_standards: dict[HLA_LOCUS, dict[str, HLAStandard]] = hla_standards[
64+
self.hla_standards: dict[HLA_LOCUS, dict[str, HLAStandard]] = loaded_standards[
6565
"standards"
6666
]
67-
self.last_modified: datetime = hla_standards["last_modified"]
68-
self.tag: str = hla_standards["tag"]
67+
self.last_modified: datetime = loaded_standards["last_modified"]
68+
self.tag: str = loaded_standards["tag"]
6969

7070
self.hla_frequencies: dict[HLA_LOCUS, dict[HLAProteinPair, int]]
7171
if hla_frequencies is not None:
@@ -82,7 +82,7 @@ def use_config(
8282
"""
8383
An alternate constructor that accepts file paths for the configuration.
8484
"""
85-
processed_stds: Optional[ProcessedStoredStandards] = None
85+
processed_stds: Optional[LoadedStandards] = None
8686
frequencies: Optional[dict[HLA_LOCUS, dict[HLAProteinPair, int]]] = None
8787

8888
if standards_path is not None:
@@ -96,9 +96,7 @@ def use_config(
9696
return cls(processed_stds, frequencies)
9797

9898
@staticmethod
99-
def read_hla_standards(
100-
standards_io: TextIOBase,
101-
) -> ProcessedStoredStandards:
99+
def read_hla_standards(standards_io: TextIOBase) -> LoadedStandards:
102100
"""
103101
Read HLA standards from a specified file-like object.
104102
@@ -133,7 +131,8 @@ def read_hla_standards(
133131
"standards": hla_stds,
134132
}
135133

136-
def load_default_hla_standards(self) -> dict[str, HLAStandard]:
134+
@staticmethod
135+
def load_default_hla_standards() -> dict[str, HLAStandard]:
137136
"""
138137
Load HLA Standards from reference file.
139138
@@ -146,7 +145,7 @@ def load_default_hla_standards(self) -> dict[str, HLAStandard]:
146145
"hla_standards.yaml",
147146
)
148147
with open(standards_filename) as standards_file:
149-
return self.read_hla_standards(standards_file)
148+
return EasyHLA.read_hla_standards(standards_file)
150149

151150
@staticmethod
152151
def read_hla_frequencies(
@@ -184,9 +183,8 @@ def read_hla_frequencies(
184183
hla_freqs[locus][protein_pair] += 1
185184
return hla_freqs
186185

187-
def load_default_hla_frequencies(
188-
self,
189-
) -> dict[HLA_LOCUS, dict[HLAProteinPair, int]]:
186+
@staticmethod
187+
def load_default_hla_frequencies() -> dict[HLA_LOCUS, dict[HLAProteinPair, int]]:
190188
"""
191189
Load HLA frequencies from reference file.
192190
@@ -205,7 +203,7 @@ def load_default_hla_frequencies(
205203
"hla_frequencies.csv",
206204
)
207205
with open(default_frequencies_filename, "r") as f:
208-
hla_freqs = self.read_hla_frequencies(f)
206+
hla_freqs = EasyHLA.read_hla_frequencies(f)
209207
return hla_freqs
210208

211209
@staticmethod

src/easyhla/interpret_from_json.py

Lines changed: 7 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -39,22 +39,14 @@ def main():
3939
error_result: HLAResult = HLAResult(errors=errors)
4040
print(error_result.model_dump_json())
4141
else:
42-
curr_standards: Optional[dict[str, HLAStandard]] = None
43-
if hla_input.hla_std_path is not None:
44-
with open(hla_input.hla_std_path) as f:
45-
curr_standards = EasyHLA.read_hla_standards(f)
46-
47-
curr_frequencies: Optional[dict[HLAProteinPair, int]] = None
48-
if hla_input.hla_freq_path is not None:
49-
with open(hla_input.hla_freq_path) as f:
50-
curr_frequencies = EasyHLA.read_hla_frequencies(hla_input.locus, f)
51-
52-
easyhla: EasyHLA = EasyHLA(
53-
locus=hla_input.locus,
54-
hla_standards=curr_standards,
55-
hla_frequencies=curr_frequencies,
42+
easyhla: EasyHLA = EasyHLA.use_config(
43+
hla_input.hla_std_path,
44+
hla_input.hla_freq_path,
45+
)
46+
interp: HLAInterpretation = easyhla.interpret(
47+
hla_input.hla_sequence(),
48+
hla_input.locus,
5649
)
57-
interp: HLAInterpretation = easyhla.interpret(hla_input.hla_sequence())
5850
print(HLAResult.build_from_interpretation(interp).model_dump_json())
5951

6052

0 commit comments

Comments
 (0)