Skip to content

Commit 01c14a0

Browse files
authored
Merge pull request #6 from cfe-lab/FoldClinicalIn
Merging to attempt to create a release candidate.
2 parents d28ec7e + caadbc7 commit 01c14a0

File tree

11 files changed

+463
-161
lines changed

11 files changed

+463
-161
lines changed
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
name: Build and publish the Ruby package
2+
3+
on:
4+
release:
5+
types: [published]
6+
7+
jobs:
8+
build_gem:
9+
runs-on: ubuntu-latest
10+
11+
env:
12+
HLA_ALGORITHM_VERSION: ${{ github.ref_name }}
13+
BUILD_PATH: ${{ github.workspace }}/ruby
14+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
15+
16+
steps:
17+
- name: Install Ruby
18+
run: |
19+
sudo apt update
20+
sudo apt install -y ruby
21+
22+
- name: Checkout code from repo
23+
uses: actions/checkout@v4
24+
25+
- name: Build the Ruby package
26+
run: |
27+
cd $BUILD_PATH
28+
gem build ${BUILD_PATH}/hla_algorithm.gemspec
29+
30+
- name: Publish gem to GitHub Packages
31+
run: |
32+
mkdir -p $HOME/.gem
33+
touch $HOME/.gem/credentials
34+
chmod 0600 $HOME/.gem/credentials
35+
printf -- "---\n:github: Bearer ${GH_TOKEN}\n" > $HOME/.gem/credentials
36+
gem push --KEY github --host https://rubygems.pkg.github.com/${OWNER} ${BUILD_PATH}/*.gem
37+
env:
38+
OWNER: ${{ github.repository_owner }}
39+
40+
- name: Add gem as a release asset
41+
run: gh release upload $HLA_ALGORITHM_VERSION ${BUILD_PATH}/*.gem

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ package = true
102102

103103
[tool.uv-dynamic-versioning]
104104
vcs = "git"
105-
style = "semver"
105+
style = "pep440"
106106
fallback-version = "0.0.0"
107107

108108
[tool.pytest.ini_options]

ruby/lib/hla_algorithm.rb

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
HLA_INTERPRET_FROM_JSON = ENV['HLA_INTERPRET_FROM_JSON']
77
if HLA_INTERPRET_FROM_JSON.nil?
8-
raise "HLA_INTERPRET_FROM_JSON must be set"
8+
raise 'HLA_INTERPRET_FROM_JSON must be set'
99
end
1010

1111

@@ -26,18 +26,21 @@ class HLAResult
2626
)
2727

2828
def initialize(raw_result)
29-
@seqs = raw_result["seqs"]
30-
@alleles_all = raw_result["alleles_all"]
31-
@alleles_clean = raw_result["alleles_clean"]
32-
@alleles_for_mismatches = raw_result["alleles_for_mismatches"]
33-
@mismatches = raw_result["mismatches"]
34-
@ambiguous = raw_result["ambiguous"]
35-
@homozygous = raw_result["homozygous"]
36-
@locus = raw_result["locus"]
37-
@alg_version = raw_result["alg_version"]
38-
@b5701 = raw_result["b5701"]
39-
@dist_b5701 = raw_result["dist_b5701"]
40-
@errors = raw_result["errors"]
29+
@seqs = raw_result['seqs']
30+
@alleles_all = raw_result['alleles_all']
31+
@alleles_clean = raw_result['alleles_clean']
32+
@alleles_for_mismatches = raw_result['alleles_for_mismatches']
33+
@mismatches = raw_result['mismatches']
34+
@ambiguous = raw_result['ambiguous']
35+
@homozygous = raw_result['homozygous']
36+
@locus = raw_result['locus']
37+
@alg_version = raw_result['alg_version']
38+
@alleles_version = raw_result['alleles_version']
39+
@alleles_last_updated = raw_result['alleles_last_updated']
40+
@b5701 = raw_result['b5701']
41+
@dist_b5701 = raw_result['dist_b5701']
42+
@errors = raw_result['errors']
43+
@all_mismatches = raw_result['all_mismatches']
4144
end
4245
end
4346

@@ -51,20 +54,21 @@ def initialize(
5154
@hla_freq_path = hla_freq_path
5255
end
5356

54-
def analyze(seqs, locus='B')
57+
def analyze(seqs, locus='B', threshold=nil)
5558
hla_input = {
56-
"seq1" => seqs[0],
57-
"seq2" => seqs[1],
58-
"locus" => locus,
59-
"hla_std_path" => nil,
60-
"hla_freq_path" => nil
59+
'seq1' => seqs[0],
60+
'seq2' => seqs[1],
61+
'locus' => locus,
62+
'threshold' => threshold,
63+
'hla_std_path' => nil,
64+
'hla_freq_path' => nil
6165
}
6266

6367
if (!@hla_std_path.nil?)
64-
hla_input["hla_std_path"] = File.expand_path(@hla_std_path)
68+
hla_input['hla_std_path'] = File.expand_path(@hla_std_path)
6569
end
6670
if (!@hla_freq_path.nil?)
67-
hla_input["hla_freq_path"] = File.expand_path(@hla_freq_path)
71+
hla_input['hla_freq_path'] = File.expand_path(@hla_freq_path)
6872
end
6973

7074
python_stdout, python_stderr, wait_thread = Open3.capture3(
@@ -73,7 +77,7 @@ def analyze(seqs, locus='B')
7377
)
7478

7579
if !wait_thread.success?
76-
error_msg = "HLA algorithm failed with exit code "\
80+
error_msg = 'HLA algorithm failed with exit code '\
7781
"#{wait_thread.value}. Error output:\n"\
7882
"#{python_stderr}"
7983
raise error_msg

src/hla_algorithm/hla_algorithm.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,7 @@ def load_default_hla_standards() -> LoadedStandards:
134134
:return: List of known HLA standards
135135
:rtype: list[HLAStandard]
136136
"""
137-
standards_filename: str = os.path.join(
137+
standards_filename: str = HLAAlgorithm._path_join_shim(
138138
os.path.dirname(__file__),
139139
"default_data",
140140
"hla_standards.yaml",
@@ -192,6 +192,13 @@ def read_hla_frequencies(
192192
hla_freqs[locus][protein_pair] += 1
193193
return hla_freqs
194194

195+
@staticmethod
196+
def _path_join_shim(*args) -> str:
197+
"""
198+
A shim for os.path.join which allows us to mock out the method easily in testing.
199+
"""
200+
return os.path.join(*args)
201+
195202
@staticmethod
196203
def load_default_hla_frequencies() -> dict[HLA_LOCUS, dict[HLAProteinPair, int]]:
197204
"""
@@ -201,7 +208,7 @@ def load_default_hla_frequencies() -> dict[HLA_LOCUS, dict[HLAProteinPair, int]]
201208
:rtype: dict[HLA_LOCUS, dict[HLAProteinPair, int]]
202209
"""
203210
hla_freqs: dict[HLA_LOCUS, dict[HLAProteinPair, int]]
204-
default_frequencies_filename: str = os.path.join(
211+
default_frequencies_filename: str = HLAAlgorithm._path_join_shim(
205212
os.path.dirname(__file__),
206213
"default_data",
207214
"hla_frequencies.csv",
@@ -282,9 +289,8 @@ def combine_standards_stepper(
282289
mismatches = combos[combined_std_bin]
283290

284291
else:
285-
seq_mask = np.full_like(std_bin, fill_value=15)
286292
# Note that seq is implicitly cast to a NumPy array:
287-
mismatches = np.count_nonzero((std_bin ^ seq) & seq_mask != 0)
293+
mismatches = np.count_nonzero(std_bin ^ seq != 0)
288294
combos[combined_std_bin] = mismatches # cache this value
289295

290296
if mismatches > current_rejection_threshold:
@@ -335,7 +341,9 @@ def combine_standards(
335341
combined_std_bin,
336342
mismatches,
337343
allele_pair,
338-
) in HLAAlgorithm.combine_standards_stepper(matching_stds, seq, mismatch_threshold):
344+
) in HLAAlgorithm.combine_standards_stepper(
345+
matching_stds, seq, mismatch_threshold
346+
):
339347
if combined_std_bin not in combos:
340348
combos[combined_std_bin] = (mismatches, [])
341349
combos[combined_std_bin][1].append(allele_pair)
@@ -404,8 +412,8 @@ def get_mismatches(
404412
mislist.append(
405413
HLAMismatch(
406414
index=dex,
407-
observed_base=BIN2NUC[sequence_bin[index]],
408-
expected_base=BIN2NUC[correct_base_bin],
415+
sequence_base=BIN2NUC[sequence_bin[index]],
416+
standard_base=BIN2NUC[correct_base_bin],
409417
)
410418
)
411419

@@ -459,14 +467,13 @@ def interpret(
459467
hla_sequence=hla_sequence,
460468
matches={
461469
combined_std: HLAMatchDetails(
462-
mismatch_count=mismatch_count,
463470
mismatches=self.get_mismatches(
464471
combined_std.standard_bin,
465472
seq,
466473
locus,
467474
),
468475
)
469-
for combined_std, mismatch_count in all_combos.items()
476+
for combined_std in all_combos
470477
},
471478
allele_frequencies=self.hla_frequencies[locus],
472479
b5701_standards=b5701_standards,

src/hla_algorithm/interpret_from_json.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,14 @@ def main():
3838
hla_input.hla_std_path,
3939
hla_input.hla_freq_path,
4040
)
41-
interp: HLAInterpretation = hla_alg.interpret(hla_input.hla_sequence())
42-
print(HLAResult.build_from_interpretation(interp).model_dump_json())
41+
interp: HLAInterpretation = hla_alg.interpret(
42+
hla_input.hla_sequence(), hla_input.threshold
43+
)
44+
print(
45+
HLAResult.build_from_interpretation(
46+
interp, hla_alg.tag, hla_alg.last_updated
47+
).model_dump_json()
48+
)
4349

4450

4551
if __name__ == "__main__":

src/hla_algorithm/interpret_from_json_lib.py

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from datetime import datetime
12
from typing import Optional
23

34
from pydantic import BaseModel, Field
@@ -24,6 +25,7 @@ class HLAInput(BaseModel):
2425
seq1: str
2526
seq2: Optional[str]
2627
locus: HLA_LOCUS
28+
threshold: Optional[int] = None
2729
hla_std_path: Optional[str] = None
2830
hla_freq_path: Optional[str] = None
2931

@@ -81,6 +83,22 @@ def hla_sequence(self) -> HLASequence:
8183
)
8284

8385

86+
class HLAMatchAdaptor(BaseModel):
87+
"""
88+
An "adaptor" for HLAMatchDetails for inclusion in an HLAResult.
89+
"""
90+
91+
mismatch_count: int
92+
mismatches: list[str]
93+
94+
@classmethod
95+
def from_match_details(cls, match: HLAMatchDetails) -> "HLAMatchAdaptor":
96+
return cls(
97+
mismatch_count=match.mismatch_count,
98+
mismatches=[str(x) for x in match.mismatches],
99+
)
100+
101+
84102
class HLAResult(BaseModel):
85103
seqs: list[str] = Field(default_factory=list)
86104
alleles_all: list[str] = Field(default_factory=list)
@@ -91,12 +109,20 @@ class HLAResult(BaseModel):
91109
homozygous: bool = False
92110
locus: HLA_LOCUS = "B"
93111
alg_version: str = __version__
112+
alleles_version: str = ""
113+
alleles_last_updated: datetime = Field(default_factory=datetime.now)
94114
b5701: bool = False
95115
dist_b5701: Optional[int] = None
96116
errors: list[str] = Field(default_factory=list)
117+
all_mismatches: dict[str, HLAMatchAdaptor] = Field(default_factory=dict)
97118

98119
@classmethod
99-
def build_from_interpretation(cls, interp: HLAInterpretation) -> "HLAResult":
120+
def build_from_interpretation(
121+
cls,
122+
interp: HLAInterpretation,
123+
alleles_version: str,
124+
alleles_last_updated: datetime,
125+
) -> "HLAResult":
100126
aps: AllelePairs = interp.best_matching_allele_pairs()
101127

102128
# Pick one of the combined standards represented by what goes into
@@ -124,6 +150,12 @@ def build_from_interpretation(cls, interp: HLAInterpretation) -> "HLAResult":
124150
ambiguous=aps.is_ambiguous(),
125151
homozygous=aps.is_homozygous(),
126152
locus=interp.locus,
153+
alleles_version=alleles_version,
154+
alleles_last_updated=alleles_last_updated,
127155
b5701=interp.is_b5701(),
128156
dist_b5701=interp.distance_from_b7501(),
157+
all_mismatches={
158+
cs.get_allele_pair_str(): HLAMatchAdaptor.from_match_details(match)
159+
for cs, match in interp.matches.items()
160+
},
129161
)

src/hla_algorithm/models.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -98,17 +98,20 @@ def get_allele_pair_str(self):
9898

9999
class HLAMismatch(BaseModel):
100100
index: int
101-
observed_base: str
102-
expected_base: str
101+
sequence_base: str
102+
standard_base: str
103103

104104
def __str__(self):
105-
return f"{self.index}:{self.observed_base}->{self.expected_base}"
105+
return f"{self.index}:{self.sequence_base}->{self.standard_base}"
106106

107107

108108
class HLAMatchDetails(BaseModel):
109-
mismatch_count: int
110109
mismatches: list[HLAMismatch]
111110

111+
@property
112+
def mismatch_count(self) -> int:
113+
return len(self.mismatches)
114+
112115

113116
class HLAProteinPair(BaseModel):
114117
# Allows this to be hashable:

0 commit comments

Comments
 (0)