Skip to content

Commit a17122e

Browse files
authored
Merge pull request #4 from cfe-lab/FoldClinicalIn
Continuing to consolidate all of our HLA functionality
2 parents de6af45 + 9e66998 commit a17122e

25 files changed

+395
-268
lines changed

.devcontainer/Dockerfile

Lines changed: 0 additions & 23 deletions
This file was deleted.
Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -22,39 +22,40 @@ jobs:
2222
fail-fast: false
2323
matrix:
2424
os: [ubuntu-latest]
25-
python-version: ["3.10", "3.11", "3.12"]
25+
python-version: ["3.10", "3.11", "3.12", "3.x"]
2626

2727
steps:
28-
- uses: actions/checkout@v3
28+
- uses: actions/checkout@v4
2929

3030
- name: Set up Python ${{ matrix.python-version }}
3131
uses: actions/setup-python@v4
3232
with:
3333
python-version: ${{ matrix.python-version }}
3434

3535
- name: Install dependencies
36-
run:
37-
- apt update && apt install yamllint
38-
- pip install uv
36+
run: |
37+
apt update && apt install yamllint
38+
pip install uv
3939
4040
- name: Check code
41-
run:
42-
- yamllint .
43-
- uv run mypy --check .
44-
- uv run ruff check .
41+
continue-on-error: true
42+
run: |
43+
yamllint .
44+
uv run mypy --check .
45+
uv run ruff check .
4546
4647
- name: Run tests
4748
run: uv run pytest --junitxml=pytest.xml
4849

49-
# TODO: Look into github actions, these are out of date
50-
# - name: Upload coverage data
51-
# uses: actions/upload-artifact@v3
52-
# with:
53-
# name: coverage-data
54-
# path: coverage.xml
55-
56-
# - name: Publish Test Report
57-
# uses: mikepenz/action-junit-report@v3
58-
# if: success() || failure()
59-
# with:
60-
# report_paths: unit_test.xml
50+
# TODO: Look into github actions, these are out of date
51+
# - name: Upload coverage data
52+
# uses: actions/upload-artifact@v3
53+
# with:
54+
# name: coverage-data
55+
# path: coverage.xml
56+
57+
# - name: Publish Test Report
58+
# uses: mikepenz/action-junit-report@v3
59+
# if: success() || failure()
60+
# with:
61+
# report_paths: unit_test.xml

.yamllint.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
ignore:
22
- .git/*
33
- .venv/*
4+
- src/easyhla/default_data/hla_standards.yaml
45

56
extends: default
67

pyproject.toml

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
[build-system]
2-
requires = ["hatchling"]
2+
requires = ["hatchling", "uv-dynamic-versioning"]
33
build-backend = "hatchling.build"
44

55
[project]
@@ -33,6 +33,9 @@ dependencies = [
3333
"pyyaml>=6.0.2",
3434
"requests>=2.32.3",
3535
"typer>=0.15.2",
36+
"types-pyyaml>=6.0.12.20250516",
37+
"types-requests>=2.32.4.20250611",
38+
"uv-dynamic-versioning>=0.8.2",
3639
]
3740

3841
[dependency-groups]
@@ -54,9 +57,9 @@ dev = [
5457
]
5558

5659
[project.urls]
57-
Documentation = "https://github.com/unknown/easyhla#readme"
58-
Issues = "https://github.com/unknown/easyhla/issues"
59-
Source = "https://github.com/unknown/easyhla"
60+
Documentation = "https://github.com/cfe-lab/pyeasyhla/blob/main/README.md"
61+
Issues = "https://github.com/cfe-lab/pyeasyhla/issues"
62+
Source = "https://github.com/cfe-lab/pyeasyhla"
6063

6164
[project.scripts]
6265
clinical_hla = "easyhla.clinical_hla:main"
@@ -72,28 +75,48 @@ database = [
7275
]
7376

7477
[tool.hatch.version]
75-
path = "src/easyhla/__about__.py"
78+
source = "uv-dynamic-versioning"
7679

7780
[tool.hatch.build]
7881
include = [
79-
"src/easyhla/*.py",
80-
"src/easyhla/default_data/*.csv",
81-
"src/easyhla/default_data/hla_nuc.fasta.mtime",
82+
"src/easyhla/__about__.py",
83+
"src/easyhla/__init__.py",
84+
"src/easyhla/__main__.py",
85+
"src/easyhla/easyhla.py",
86+
"src/easyhla/interpret_from_json_lib.py",
87+
"src/easyhla/interpret_from_json.py",
88+
"src/easyhla/models.py",
89+
"src/easyhla/py.typed",
90+
"src/easyhla/update_alleles.py",
91+
"src/easyhla/update_frequency_file_lib.py",
92+
"src/easyhla/update_frequency_file.py",
93+
"src/easyhla/utils.py",
94+
"src/easyhla/default_data/hla_standards.yaml",
95+
"src/easyhla/default_data/hla_frequencies.csv",
8296
]
8397
exclude = [
84-
"tools",
85-
"tests/output",
86-
"tests/input",
98+
"tests",
8799
]
88100
skip-excluded-dirs = true
89101
directory = "output"
90102

91103
[tool.hatch.build.targets.wheel]
92104
packages = ["src/easyhla"]
93105

106+
[tool.hatch.build.hooks.version]
107+
path = "src/easyhla/_version.py"
108+
template = '''
109+
__version__ = "{version}"
110+
'''
111+
94112
[tool.uv]
95113
package = true
96114

115+
[tool.uv-dynamic-versioning]
116+
vcs = "git"
117+
style = "semver"
118+
fallback-version = "0.0.0"
119+
97120
[tool.pytest.ini_options]
98121
pythonpath = "src"
99122
minversion = "6.0"
@@ -147,3 +170,4 @@ match = "src/**/*.py"
147170
[tool.mypy]
148171
plugins = ["numpy.typing.mypy_plugin"]
149172
ignore_missing_imports = true
173+
exclude = ["scripts/"]

src/easyhla/__about__.py

Lines changed: 0 additions & 1 deletion
This file was deleted.

src/easyhla/bblab.py

Lines changed: 17 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,19 @@
55
from pathlib import Path
66
from typing import Any, Optional
77

8-
import Bio
98
import typer
9+
from Bio.Seq import MutableSeq, Seq
10+
from Bio.SeqIO import parse
1011

1112
from .bblab_lib import (
1213
EXON_AND_OTHER_EXON,
1314
HLAInterpretationRow,
1415
HLAMismatchRow,
1516
pair_exons,
1617
)
17-
from .easyhla import DATE_FORMAT, EXON_NAME, EasyHLA
18+
from .easyhla import DATE_FORMAT, EasyHLA
1819
from .models import HLAInterpretation, HLASequence
20+
from .utils import EXON_NAME
1921

2022
logger = logging.Logger(__name__, logging.ERROR)
2123

@@ -49,21 +51,21 @@ def log_and_print(
4951

5052

5153
def report_unmatched_sequences(
52-
unmatched: dict[EXON_NAME, dict[str, Bio.SeqIO.SeqRecord]],
54+
unmatched: dict[EXON_NAME, dict[str, Seq | MutableSeq | None]],
5355
to_stdout: bool = False,
5456
) -> None:
5557
"""
5658
Report exon sequences that did not have a matching exon.
5759
5860
:param unmatched: unmatched exon sequences, grouped by which exon they represent
59-
:type unmatched: dict[EXON_NAME, dict[str, Bio.SeqIO.SeqRecord]]
61+
:type unmatched: dict[EXON_NAME, dict[str, Seq]]
6062
:param to_stdout: ..., defaults to None
6163
:type to_stdout: Optional[bool], optional
6264
"""
6365
for exon, other_exon in EXON_AND_OTHER_EXON:
64-
for entry in unmatched[exon]:
66+
for sequence_id in unmatched[exon].keys():
6567
log_and_print(
66-
f"No matching {other_exon} for {entry.description}",
68+
f"No matching {other_exon} for {sequence_id}",
6769
to_stdout=to_stdout,
6870
)
6971

@@ -79,6 +81,8 @@ def process_from_file_to_files(
7981
):
8082
if threshold and threshold < 0:
8183
raise RuntimeError("Threshold must be >=0 or None!")
84+
elif threshold is None:
85+
threshold = 0
8286

8387
rows: list[HLAInterpretationRow] = []
8488
mismatch_rows: list[HLAMismatchRow] = []
@@ -93,13 +97,13 @@ def process_from_file_to_files(
9397
)
9498

9599
matched_sequences: list[HLASequence]
96-
unmatched: dict[EXON_NAME, dict[str, Bio.SeqIO.SeqRecord]]
100+
unmatched: dict[EXON_NAME, dict[str, Seq | MutableSeq | None]]
97101

98102
with open(filename, "r", encoding="utf-8") as f:
99103
matched_sequences, unmatched = pair_exons(
100-
Bio.SeqIO.parse(f, "fasta"),
104+
parse(f, "fasta"),
101105
locus.value,
102-
list(hla_alg.standards.values())[0],
106+
list(hla_alg.hla_standards[locus.value].values())[0],
103107
)
104108

105109
for hla_sequence in matched_sequences:
@@ -133,10 +137,10 @@ def process_from_file_to_files(
133137
row: HLAInterpretationRow = HLAInterpretationRow.summary_row(result)
134138
rows.append(row)
135139

136-
mismatch_rows.extend(result.mismatch_rows())
140+
mismatch_rows.extend(HLAMismatchRow.mismatch_rows(result))
137141

138142
npats += 1
139-
nseqs += hla_sequence.num_seqs
143+
nseqs += hla_sequence.num_sequences_used
140144

141145
report_unmatched_sequences(unmatched, to_stdout=to_stdout)
142146

@@ -171,11 +175,11 @@ def process_from_file_to_files(
171175
),
172176
)
173177
mismatch_csv.writeheader()
174-
mismatch_csv.writerows([dict[row] for row in mismatch_rows])
178+
mismatch_csv.writerows([dict(row) for row in mismatch_rows])
175179

176180
log_and_print(
177181
f"{npats} patients, {nseqs} sequences processed.",
178-
log_level=logger.INFO,
182+
log_level=logging.INFO,
179183
to_stdout=to_stdout,
180184
)
181185

src/easyhla/bblab_lib.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from typing import TypedDict
44

55
import numpy as np
6-
from Bio.Seq import Seq
6+
from Bio.Seq import MutableSeq, Seq
77
from Bio.SeqIO import SeqRecord
88
from pydantic import BaseModel
99

@@ -36,7 +36,7 @@
3636

3737
def pair_exons_helper(
3838
sequence_record: SeqRecord,
39-
unmatched: dict[EXON_NAME, dict[str, Seq]],
39+
unmatched: dict[EXON_NAME, dict[str, Seq | MutableSeq | None]],
4040
) -> tuple[str, bool, bool, str, str]:
4141
"""
4242
Helper that attempts to match the given sequence with a "partner" exon.
@@ -55,7 +55,7 @@ def pair_exons_helper(
5555
- exon3 sequence
5656
"""
5757
# The `id`` field is expected to hold the sample name.
58-
samp: str = sequence_record.id
58+
samp: str = sequence_record.id or ""
5959
is_exon: bool = False
6060
matched: bool = False
6161
exon2: str = ""
@@ -98,7 +98,7 @@ def pair_exons(
9898
sequence_records: Iterable[SeqRecord],
9999
locus: HLA_LOCUS,
100100
example_standard: HLAStandard,
101-
) -> tuple[list[HLASequence], dict[EXON_NAME, dict[str, Seq]]]:
101+
) -> tuple[list[HLASequence], dict[EXON_NAME, dict[str, Seq | MutableSeq | None]]]:
102102
"""
103103
Pair exons in the given input sequences.
104104
@@ -109,7 +109,7 @@ def pair_exons(
109109
sequences and attempt to match them up.
110110
"""
111111
matched_sequences: list[HLASequence] = []
112-
unmatched: dict[EXON_NAME, dict[str, Seq]] = {
112+
unmatched: dict[EXON_NAME, dict[str, Seq | MutableSeq | None]] = {
113113
"exon2": {},
114114
"exon3": {},
115115
}
@@ -118,7 +118,7 @@ def pair_exons(
118118
# Skip over any sequences that aren't the right length or contain
119119
# bad bases.
120120
try:
121-
check_length(locus, str(sr.seq), sr.id)
121+
check_length(locus, str(sr.seq), sr.id or "")
122122
except BadLengthException:
123123
continue
124124

@@ -147,21 +147,21 @@ def pair_exons(
147147
exon3_bin = pad_short(example_standard.sequence, nuc2bin(exon3), "exon3")
148148
matched_sequences.append(
149149
HLASequence(
150-
two=(int(x) for x in exon2_bin),
150+
two=tuple(int(x) for x in exon2_bin),
151151
intron=(),
152-
three=(int(x) for x in exon3_bin),
152+
three=tuple(int(x) for x in exon3_bin),
153153
name=identifier,
154154
locus=locus,
155155
num_sequences_used=2,
156156
)
157157
)
158158
else:
159-
seq_numpy: np.array = pad_short(
159+
seq_numpy: np.ndarray = pad_short(
160160
example_standard.sequence,
161161
nuc2bin(sr.seq), # type: ignore
162162
None,
163163
)
164-
seq: tuple[int] = tuple(int(x) for x in seq_numpy)
164+
seq: tuple[int, ...] = tuple(int(x) for x in seq_numpy)
165165
matched_sequences.append(
166166
HLASequence(
167167
two=seq[:EXON2_LENGTH],

0 commit comments

Comments
 (0)