Skip to content

Commit 1d4485e

Browse files
authored
Fix formatter to handle dict keys with special characters (#2891)
* Fix formatter to handle dict keys with special characters * add test for keys with periods
1 parent 2ea4a3c commit 1d4485e

File tree

2 files changed

+111
-1
lines changed

2 files changed

+111
-1
lines changed

.generator/src/generator/formatter.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,13 @@ def format_data_with_schema_list(
390390
return parameters, imports
391391

392392

393+
def _is_valid_identifier(key):
394+
"""Check if a key can be used as a Python keyword argument."""
395+
if not key or not key[0].isalpha() and key[0] != '_':
396+
return False
397+
return all(c.isalnum() or c == '_' for c in key)
398+
399+
393400
@format_data_with_schema.register(dict)
394401
def format_data_with_schema_dict(
395402
data,
@@ -402,6 +409,7 @@ def format_data_with_schema_dict(
402409
"""Format data with schema."""
403410
assert version is not None
404411
name, imports = get_name_and_imports(schema, version, imports)
412+
use_dict_literal = False
405413

406414
parameters = ""
407415
if "properties" in schema:
@@ -436,7 +444,13 @@ def format_data_with_schema_dict(
436444
replace_values=replace_values,
437445
version=version,
438446
)
439-
parameters += f"{escape_reserved_keyword(k)}={value}, "
447+
safe_key = escape_reserved_keyword(k)
448+
if not _is_valid_identifier(safe_key):
449+
# Key contains special characters (like dots), must use dict literal
450+
use_dict_literal = True
451+
parameters += f'"{k}": {value}, '
452+
else:
453+
parameters += f"{safe_key}={value}, "
440454
imports = _merge_imports(imports, extra_imports)
441455

442456
if "oneOf" in schema:
@@ -458,6 +472,9 @@ def format_data_with_schema_dict(
458472
parameters = f"[{key_val_pairs}]"
459473

460474
if name:
475+
# If we detected invalid identifiers, use dict literal syntax
476+
if use_dict_literal:
477+
return f"{{{parameters}}}", imports
461478
return f"{name}({parameters})", imports
462479

463480
return parameters, imports

.generator/tests/test_formatter.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
import pytest
2+
from collections import defaultdict
3+
from generator.formatter import _is_valid_identifier, format_data_with_schema
4+
5+
6+
class TestIsValidIdentifier:
7+
@pytest.mark.parametrize("key,expected", [
8+
("valid_key", True),
9+
("_valid_key", True),
10+
("ValidKey123", True),
11+
("key123", True),
12+
("key.with.dots", False),
13+
("123invalid", False),
14+
(".starts_with_dot", False),
15+
("", False),
16+
("key-with-dash", False),
17+
("key with space", False),
18+
("key@special", False),
19+
])
20+
def test_is_valid_identifier(self, key, expected):
21+
assert _is_valid_identifier(key) == expected
22+
23+
24+
class TestFormatDataWithSchemaDictWithSpecialChars:
25+
def test_ocsf_dotted_keys_actual_failing_case(self):
26+
data = {
27+
"ocsf.activity_name": "Other",
28+
"ocsf.activity_id": "99"
29+
}
30+
schema = {
31+
"additionalProperties": {
32+
"type": "string"
33+
}
34+
}
35+
36+
result, imports = format_data_with_schema(
37+
data,
38+
schema,
39+
version="v1"
40+
)
41+
42+
assert result.startswith("{")
43+
assert result.endswith("}")
44+
assert '"ocsf.activity_name": \'Other\'' in result
45+
assert '"ocsf.activity_id": \'99\'' in result
46+
assert "dict(" not in result
47+
48+
def test_multiple_dotted_keys(self):
49+
data = {
50+
"ocsf.activity_name": "Other",
51+
"ocsf.activity_id": "99",
52+
"ocsf.category_name": "System Activity"
53+
}
54+
schema = {
55+
"additionalProperties": {
56+
"type": "string"
57+
}
58+
}
59+
60+
result, imports = format_data_with_schema(
61+
data,
62+
schema,
63+
version="v1"
64+
)
65+
66+
assert result.startswith("{")
67+
assert result.endswith("}")
68+
assert '"ocsf.activity_name": \'Other\'' in result
69+
assert '"ocsf.activity_id": \'99\'' in result
70+
assert '"ocsf.category_name": \'System Activity\'' in result
71+
assert "dict(" not in result
72+
73+
def test_dict_with_valid_identifiers_uses_constructor(self):
74+
data = {
75+
"normal_key": "value1",
76+
"another_key": "value2"
77+
}
78+
schema = {
79+
"additionalProperties": {
80+
"type": "string"
81+
}
82+
}
83+
84+
result, imports = format_data_with_schema(
85+
data,
86+
schema,
87+
version="v1"
88+
)
89+
90+
assert result.startswith("dict(")
91+
assert result.endswith(")")
92+
assert "normal_key='value1'" in result
93+
assert "another_key='value2'" in result

0 commit comments

Comments
 (0)