diff --git a/c/tests/test_core.c b/c/tests/test_core.c index 1b97e4485f..df06e1f6ff 100644 --- a/c/tests/test_core.c +++ b/c/tests/test_core.c @@ -25,6 +25,7 @@ #include "testlib.h" #include #include +#include #include @@ -82,6 +83,161 @@ test_generate_uuid(void) CU_ASSERT_STRING_NOT_EQUAL(uuid, other_uuid); } +static void +set_u64_le(uint8_t *dest, uint64_t value) +{ + dest[0] = (uint8_t)(value & 0xFF); + dest[1] = (uint8_t)((value >> 8) & 0xFF); + dest[2] = (uint8_t)((value >> 16) & 0xFF); + dest[3] = (uint8_t)((value >> 24) & 0xFF); + dest[4] = (uint8_t)((value >> 32) & 0xFF); + dest[5] = (uint8_t)((value >> 40) & 0xFF); + dest[6] = (uint8_t)((value >> 48) & 0xFF); + dest[7] = (uint8_t)((value >> 56) & 0xFF); +} + +static void +test_json_struct_metadata_get_blob(void) +{ + int ret; + char metadata[128]; + const char *json; + tsk_size_t json_buffer_length; + const uint8_t *blob; + tsk_size_t blob_length; + uint8_t *bytes; + tsk_size_t metadata_length; + size_t header_length; + size_t json_length; + size_t payload_length; + size_t total_length; + const char json_payload[] = "{\"a\":1}"; + const uint8_t binary_payload[] = { 0x01, 0x02, 0x03, 0x04 }; + const uint8_t empty_payload[] = { 0 }; + + bytes = (uint8_t *) metadata; + header_length = 4 + 1 + 8 + 8; + json_length = strlen(json_payload); + payload_length = sizeof(binary_payload); + total_length = header_length + json_length + payload_length; + CU_ASSERT_FATAL(total_length <= sizeof(metadata)); + memset(metadata, 0, sizeof(metadata)); + bytes[0] = 'J'; + bytes[1] = 'B'; + bytes[2] = 'L'; + bytes[3] = 'B'; + bytes[4] = 1; + set_u64_le(bytes + 5, (uint64_t) json_length); + set_u64_le(bytes + 13, (uint64_t) payload_length); + memcpy(bytes + header_length, json_payload, json_length); + memcpy(bytes + header_length + json_length, binary_payload, payload_length); + metadata_length = (tsk_size_t) total_length; + ret = tsk_json_struct_metadata_get_blob( + metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length); + CU_ASSERT_EQUAL(ret, 0); + CU_ASSERT_PTR_EQUAL(json, (const char *) bytes + header_length); + CU_ASSERT_EQUAL(json_buffer_length, (tsk_size_t) json_length); + if (json_length > 0) { + CU_ASSERT_EQUAL(memcmp(json, json_payload, json_length), 0); + } + CU_ASSERT_PTR_EQUAL(blob, bytes + header_length + json_length); + CU_ASSERT_EQUAL(blob_length, (tsk_size_t) payload_length); + CU_ASSERT_EQUAL(memcmp(blob, binary_payload, payload_length), 0); + + payload_length = 0; + total_length = header_length + json_length + payload_length; + CU_ASSERT_FATAL(total_length <= sizeof(metadata)); + set_u64_le(bytes + 13, (uint64_t) payload_length); + metadata_length = (tsk_size_t) total_length; + ret = tsk_json_struct_metadata_get_blob( + metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length); + CU_ASSERT_EQUAL(ret, 0); + CU_ASSERT_PTR_EQUAL(json, (const char *) bytes + header_length); + CU_ASSERT_EQUAL(json_buffer_length, (tsk_size_t) json_length); + CU_ASSERT_EQUAL(blob_length, (tsk_size_t) payload_length); + CU_ASSERT_PTR_EQUAL(blob, bytes + header_length + json_length); + + json_length = 0; + payload_length = sizeof(empty_payload); + total_length = header_length + json_length + payload_length; + CU_ASSERT_FATAL(total_length <= sizeof(metadata)); + set_u64_le(bytes + 5, (uint64_t) json_length); + set_u64_le(bytes + 13, (uint64_t) payload_length); + memcpy(bytes + header_length + json_length, empty_payload, payload_length); + metadata_length = (tsk_size_t) total_length; + ret = tsk_json_struct_metadata_get_blob( + metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length); + CU_ASSERT_EQUAL(ret, 0); + CU_ASSERT_PTR_EQUAL(json, (const char *) bytes + header_length); + CU_ASSERT_EQUAL(json_buffer_length, (tsk_size_t) json_length); + CU_ASSERT_EQUAL(blob_length, (tsk_size_t) payload_length); + CU_ASSERT_PTR_EQUAL(blob, bytes + header_length + json_length); + CU_ASSERT_EQUAL(memcmp(blob, empty_payload, payload_length), 0); + + blob = NULL; + blob_length = 0; + json = NULL; + json_buffer_length = 0; + metadata_length = header_length - 1; + ret = tsk_json_struct_metadata_get_blob( + metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length); + CU_ASSERT_EQUAL(ret, TSK_ERR_FILE_FORMAT); + + metadata_length = (tsk_size_t) total_length; + bytes[0] = 'X'; + ret = tsk_json_struct_metadata_get_blob( + metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length); + CU_ASSERT_EQUAL(ret, TSK_ERR_FILE_FORMAT); + bytes[0] = 'J'; + + bytes[4] = 2; + ret = tsk_json_struct_metadata_get_blob( + metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length); + CU_ASSERT_EQUAL(ret, TSK_ERR_FILE_VERSION_TOO_NEW); + bytes[4] = 1; + + metadata_length = (tsk_size_t)(total_length - 1); + ret = tsk_json_struct_metadata_get_blob( + metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length); + CU_ASSERT_EQUAL(ret, TSK_ERR_FILE_FORMAT); + + ret = tsk_json_struct_metadata_get_blob( + NULL, metadata_length, &json, &json_buffer_length, &blob, &blob_length); + CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE); + ret = tsk_json_struct_metadata_get_blob( + metadata, metadata_length, NULL, &json_buffer_length, &blob, &blob_length); + CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE); + ret = tsk_json_struct_metadata_get_blob( + metadata, metadata_length, &json, NULL, &blob, &blob_length); + CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE); + ret = tsk_json_struct_metadata_get_blob( + metadata, metadata_length, &json, &json_buffer_length, NULL, &blob_length); + CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE); + ret = tsk_json_struct_metadata_get_blob( + metadata, metadata_length, &json, &json_buffer_length, &blob, NULL); + CU_ASSERT_EQUAL(ret, TSK_ERR_BAD_PARAM_VALUE); + + memset(metadata, 0, sizeof(metadata)); + bytes[0] = 'J'; + bytes[1] = 'B'; + bytes[2] = 'L'; + bytes[3] = 'B'; + bytes[4] = 1; + metadata_length = (tsk_size_t) header_length; + + set_u64_le(bytes + 5, UINT64_MAX - (uint64_t) header_length + 1); + set_u64_le(bytes + 13, 0); + ret = tsk_json_struct_metadata_get_blob( + metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length); + CU_ASSERT_EQUAL(ret, TSK_ERR_FILE_FORMAT); + + set_u64_le(bytes + 5, 8); + set_u64_le(bytes + 13, UINT64_MAX - (uint64_t)(header_length + 8) + 1); + ret = tsk_json_struct_metadata_get_blob( + metadata, metadata_length, &json, &json_buffer_length, &blob, &blob_length); + CU_ASSERT_EQUAL(ret, TSK_ERR_FILE_FORMAT); +} + static void test_double_round(void) { @@ -652,6 +808,7 @@ main(int argc, char **argv) { "test_strerror", test_strerror }, { "test_strerror_kastore", test_strerror_kastore }, { "test_generate_uuid", test_generate_uuid }, + { "test_json_struct_metadata_get_blob", test_json_struct_metadata_get_blob }, { "test_double_round", test_double_round }, { "test_blkalloc", test_blkalloc }, { "test_unknown_time", test_unknown_time }, diff --git a/c/tskit/core.c b/c/tskit/core.c index 0f31550a71..5fb6d71a02 100644 --- a/c/tskit/core.c +++ b/c/tskit/core.c @@ -33,6 +33,9 @@ #include #define UUID_NUM_BYTES 16 +#define TSK_JSON_BINARY_HEADER_SIZE 21 + +static const uint8_t TSK_JSON_BINARY_MAGIC[4] = { 'J', 'B', 'L', 'B' }; #if defined(_WIN32) @@ -95,6 +98,22 @@ get_random_bytes(uint8_t *buf) #endif +static uint64_t +tsk_load_u64_le(const uint8_t *p) +{ + uint64_t value; + + value = (uint64_t) p[0]; + value |= (uint64_t) p[1] << 8; + value |= (uint64_t) p[2] << 16; + value |= (uint64_t) p[3] << 24; + value |= (uint64_t) p[4] << 32; + value |= (uint64_t) p[5] << 40; + value |= (uint64_t) p[6] << 48; + value |= (uint64_t) p[7] << 56; + return value; +} + /* Generate a new UUID4 using a system-generated source of randomness. * Note that this function writes a NULL terminator to the end of this * string, so that the total length of the buffer must be 37 bytes. @@ -121,6 +140,67 @@ tsk_generate_uuid(char *dest, int TSK_UNUSED(flags)) out: return ret; } + +int +tsk_json_struct_metadata_get_blob(const char *metadata, tsk_size_t metadata_length, + const char **json, tsk_size_t *json_length, const uint8_t **blob, + tsk_size_t *blob_length) +{ + int ret; + uint8_t version; + uint64_t json_length_u64; + uint64_t binary_length_u64; + uint64_t header_and_json_length; + uint64_t total_length; + const uint8_t *bytes; + const uint8_t *blob_start; + const char *json_start; + + if (metadata == NULL || json == NULL || json_length == NULL || blob == NULL + || blob_length == NULL) { + ret = tsk_trace_error(TSK_ERR_BAD_PARAM_VALUE); + goto out; + } + bytes = (const uint8_t *) metadata; + if (metadata_length < TSK_JSON_BINARY_HEADER_SIZE) { + ret = tsk_trace_error(TSK_ERR_FILE_FORMAT); + goto out; + } + if (memcmp(bytes, TSK_JSON_BINARY_MAGIC, sizeof(TSK_JSON_BINARY_MAGIC)) != 0) { + ret = tsk_trace_error(TSK_ERR_FILE_FORMAT); + goto out; + } + version = bytes[4]; + if (version != 1) { + ret = tsk_trace_error(TSK_ERR_FILE_VERSION_TOO_NEW); + goto out; + } + json_length_u64 = tsk_load_u64_le(bytes + 5); + binary_length_u64 = tsk_load_u64_le(bytes + 13); + if (json_length_u64 > UINT64_MAX - (uint64_t) TSK_JSON_BINARY_HEADER_SIZE) { + ret = tsk_trace_error(TSK_ERR_FILE_FORMAT); + goto out; + } + header_and_json_length = (uint64_t) TSK_JSON_BINARY_HEADER_SIZE + json_length_u64; + if (binary_length_u64 > UINT64_MAX - header_and_json_length) { + ret = tsk_trace_error(TSK_ERR_FILE_FORMAT); + goto out; + } + total_length = header_and_json_length + binary_length_u64; + if ((uint64_t) metadata_length < total_length) { + ret = tsk_trace_error(TSK_ERR_FILE_FORMAT); + goto out; + } + json_start = (const char *) bytes + TSK_JSON_BINARY_HEADER_SIZE; + blob_start = bytes + TSK_JSON_BINARY_HEADER_SIZE + json_length_u64; + *json = json_start; + *json_length = (tsk_size_t) json_length_u64; + *blob = blob_start; + *blob_length = (tsk_size_t) binary_length_u64; + ret = 0; +out: + return ret; +} static const char * tsk_strerror_internal(int err) { diff --git a/c/tskit/core.h b/c/tskit/core.h index 481905b7ad..9d5ce2087a 100644 --- a/c/tskit/core.h +++ b/c/tskit/core.h @@ -1096,6 +1096,31 @@ bool tsk_isfinite(double val); #define TSK_UUID_SIZE 36 int tsk_generate_uuid(char *dest, int flags); +/** +@brief Extract the binary payload from ``json+struct`` encoded metadata. + +@rst +Metadata produced by :py:class:`tskit.metadata.JSONStructCodec` consists of a fixed-size +header followed by canonical JSON bytes and an optional binary payload. This helper +validates the framing, returning pointers to the embedded JSON and binary sections +without copying. + +The output pointers reference memory owned by the caller and remain valid only while +the original metadata buffer is alive. +@endrst + +@param[in] metadata Pointer to the encoded metadata bytes. +@param[in] metadata_length Number of bytes available at ``metadata``. +@param[out] json On success, set to the start of the JSON bytes. +@param[out] json_length On success, set to the JSON length in bytes. +@param[out] blob On success, set to the start of the binary payload. +@param[out] blob_length On success, set to the payload length in bytes. +@return 0 on success, or a :ref:`TSK_ERR ` code on failure. +*/ +int tsk_json_struct_metadata_get_blob(const char *metadata, tsk_size_t metadata_length, + const char **json, tsk_size_t *json_length, const uint8_t **blob, + tsk_size_t *blob_length); + /* TODO most of these can probably be macros so they compile out as no-ops. * Lets do the 64 bit tsk_size_t switch first though. */ void *tsk_malloc(tsk_size_t size); diff --git a/python/CHANGELOG.rst b/python/CHANGELOG.rst index d11a2a0e2d..7f0d616cb6 100644 --- a/python/CHANGELOG.rst +++ b/python/CHANGELOG.rst @@ -87,6 +87,9 @@ also around 10% faster. (:user:`benjeffery`, :pr:`3313`, :pr:`3317`, :issue:`1896`) +- Add ``json+struct`` metadata codec that allows storing binary data using a struct + schema alongside JSON metadata. (:user:`benjeffery`, :pr:`3306`) + **Bugfixes** - In some tables with mutations out-of-order ``TableCollection.sort`` did not re-order diff --git a/python/tests/test_metadata.py b/python/tests/test_metadata.py index 316615cc12..55e98a9fd6 100644 --- a/python/tests/test_metadata.py +++ b/python/tests/test_metadata.py @@ -626,6 +626,157 @@ def test_zero_length(self): assert ms.decode_row(b"") == {} +class TestJSONStructCodec: + def test_requires_subschemas(self): + with pytest.raises( + tskit.MetadataSchemaValidationError, + match="requires 'json' and 'struct' schema mappings", + ): + tskit.MetadataSchema({"codec": "json+struct"}) + + def test_disallow_duplicate_keys(self): + schema = { + "codec": "json+struct", + "json": {"type": "object", "properties": {"x": {"type": "number"}}}, + "struct": { + "type": "object", + "properties": {"x": {"type": "number", "binaryFormat": "i"}}, + }, + } + with pytest.raises( + tskit.MetadataSchemaValidationError, match="must not share property names" + ): + tskit.MetadataSchema(schema) + + def test_round_trip_with_struct_and_json(self): + schema = { + "codec": "json+struct", + "json": { + "type": "object", + "properties": { + "label": {"type": "string"}, + "count": {"type": "number"}, + }, + "required": ["label"], + }, + "struct": { + "type": "object", + "properties": {"blob": {"type": "integer", "binaryFormat": "i"}}, + }, + } + ms = tskit.MetadataSchema(schema) + row = {"label": "alpha", "count": 7, "blob": 5} + encoded = ms.validate_and_encode_row(row) + out = ms.decode_row(encoded) + assert out == row + + def test_json_defaults_applied(self): + schema = { + "codec": "json+struct", + "json": { + "type": "object", + "properties": {"number": {"type": "number", "default": 5}}, + }, + "struct": {"type": "object", "properties": {}}, + } + ms = tskit.MetadataSchema(schema) + assert ms.decode_row(ms.validate_and_encode_row({})) == {"number": 5} + assert ms.decode_row(ms.validate_and_encode_row({"number": 9})) == {"number": 9} + + def test_nested_default_error(self): + schema = { + "codec": "json+struct", + "json": { + "type": "object", + "properties": { + "obj": { + "type": "object", + "properties": { + "nested_obj_no_default": { + "type": "object", + "properties": {}, + }, + "nested_obj": { + "type": "object", + "properties": {}, + "default": {"foo": "bar"}, + }, + }, + } + }, + }, + "struct": {"type": "object", "properties": {}}, + } + with pytest.raises( + tskit.MetadataSchemaValidationError, + match="Defaults can only be specified at the top level for JSON codec", + ): + tskit.MetadataSchema(schema) + + def test_decode_without_magic_errors(self): + ms = tskit.MetadataSchema( + { + "codec": "json+struct", + "json": {"type": "object", "properties": {}}, + "struct": {"type": "object", "properties": {}}, + } + ) + with pytest.raises(ValueError, match="missing magic header"): + ms.decode_row(b"{}") + + def test_decode_version_mismatch(self): + ms = tskit.MetadataSchema( + { + "codec": "json+struct", + "json": {"type": "object", "properties": {}}, + "struct": {"type": "object", "properties": {}}, + } + ) + header = metadata.JSONStructCodec._HDR.pack( + metadata.JSONStructCodec.MAGIC, + metadata.JSONStructCodec.VERSION + 1, + len(b"{}"), + 0, + ) + with pytest.raises( + ValueError, + match="Unsupported json\\+struct version", + ): + ms.decode_row(header + b"{}") + + def test_decode_truncated_lengths(self): + schema = { + "codec": "json+struct", + "json": {"type": "object", "properties": {}}, + "struct": {"type": "object", "properties": {}}, + } + ms = tskit.MetadataSchema(schema) + header = metadata.JSONStructCodec._HDR.pack( + metadata.JSONStructCodec.MAGIC, metadata.JSONStructCodec.VERSION, 5, 0 + ) + with pytest.raises(ValueError, match="declared lengths exceed buffer size"): + ms.decode_row(header + b"abc") + + header = metadata.JSONStructCodec._HDR.pack( + metadata.JSONStructCodec.MAGIC, metadata.JSONStructCodec.VERSION, 1, 3 + ) + with pytest.raises(ValueError, match="declared lengths exceed buffer size"): + ms.decode_row(header + b"a") + + def test_missing_struct_property_fails_validation(self): + schema = { + "codec": "json+struct", + "json": {"type": "object", "properties": {}}, + "struct": { + "type": "object", + "properties": {"payload": {"type": "integer", "binaryFormat": "i"}}, + }, + } + ms = tskit.MetadataSchema(schema) + with pytest.raises(tskit.MetadataValidationError, match="required property"): + ms.validate_and_encode_row({}) + + class TestStructCodec: def encode_decode(self, method_name, sub_schema, obj, buffer): assert ( diff --git a/python/tskit/metadata.py b/python/tskit/metadata.py index c447debab2..506f8b14c4 100644 --- a/python/tskit/metadata.py +++ b/python/tskit/metadata.py @@ -193,6 +193,137 @@ def decode(self, data: bytes) -> bytes: return data +class JSONStructCodec(AbstractMetadataCodec): + """ + Pack canonical JSON metadata together with a struct-encoded binary payload. + The codec expects a metadata schema with separate ``json`` and ``struct`` + subschemas and produces a single dict containing the union of the keys from + those subschemas after decoding. + """ + + MAGIC = b"JBLB" + VERSION = 1 + _HDR = struct.Struct("<4sBQQ") # magic, version, json_len, blob_len + + @classmethod + def is_schema_trivial(self, schema: Mapping) -> bool: + return False + + def __init__(self, schema: Mapping[str, Any]) -> None: + json_schema = schema.get("json") + struct_schema = schema.get("struct") + if not isinstance(json_schema, Mapping) or not isinstance( + struct_schema, Mapping + ): + raise exceptions.MetadataSchemaValidationError( + "json+struct requires 'json' and 'struct' schema mappings" + ) + + json_schema = copy.deepcopy(dict(json_schema, codec="json")) + struct_schema = copy.deepcopy(dict(struct_schema, codec="struct")) + + try: + json_schema = JSONCodec.modify_schema(json_schema) + JSONCodec.schema_validator.check_schema(json_schema) + except jsonschema.exceptions.SchemaError as ve: + raise exceptions.MetadataSchemaValidationError(str(ve)) from ve + try: + struct_schema = StructCodec.modify_schema(struct_schema) + StructCodecSchemaValidator.check_schema(struct_schema) + except jsonschema.exceptions.SchemaError as ve: + raise exceptions.MetadataSchemaValidationError(str(ve)) from ve + + self.json_schema = json_schema + self.struct_schema = struct_schema + json_props = self.json_schema.get("properties", {}) + struct_props = self.struct_schema.get("properties", {}) + overlap = set(json_props).intersection(struct_props) + if overlap: + raise exceptions.MetadataSchemaValidationError( + "json and struct schemas must not share property names: " + + ", ".join(sorted(overlap)) + ) + for name, sub_schema in ( + ("json", self.json_schema), + ("struct", self.struct_schema), + ): + sub_type = sub_schema.get("type") + if sub_type is None: + continue + if isinstance(sub_type, list): + is_object = "object" in sub_type + else: + is_object = sub_type == "object" + if not is_object: + raise exceptions.MetadataSchemaValidationError( + f"{name} subschema must describe an object for json+struct codec" + ) + + self.json_codec = JSONCodec(self.json_schema) + self.struct_codec = StructCodec(self.struct_schema) + self._struct_keys = set(struct_props.keys()) + self._validate_json = TSKITMetadataSchemaValidator(self.json_schema).validate + self._validate_struct = TSKITMetadataSchemaValidator( + self.struct_schema + ).validate + + def validate_row(self, row: Any) -> None: + if not isinstance(row, dict): + raise exceptions.MetadataValidationError( + "json+struct metadata must be a mapping" + ) + struct_data = {k: v for k, v in row.items() if k in self._struct_keys} + json_data = {k: v for k, v in row.items() if k not in self._struct_keys} + try: + self._validate_json(json_data) + self._validate_struct(struct_data) + except jsonschema.exceptions.ValidationError as ve: + raise exceptions.MetadataValidationError(str(ve)) from ve + + def encode(self, obj: Any) -> bytes: + if not isinstance(obj, dict): + raise exceptions.MetadataEncodingError( + "json+struct metadata must be a mapping" + ) + json_bytes = self.json_codec.encode( + {k: v for k, v in obj.items() if k not in self._struct_keys} + ) + blob_bytes = self.struct_codec.encode( + {k: v for k, v in obj.items() if k in self._struct_keys} + ) + header = self._HDR.pack( + self.MAGIC, self.VERSION, len(json_bytes), len(blob_bytes) + ) + return header + json_bytes + blob_bytes + + def decode(self, encoded: bytes) -> Any: + if len(encoded) >= self._HDR.size and encoded[:4] == self.MAGIC: + _, version, jlen, blen = self._HDR.unpack_from(encoded) + if version != self.VERSION: + raise ValueError("Unsupported json+struct version") + start = self._HDR.size + if jlen > len(encoded) - start or blen > len(encoded) - start - jlen: + raise ValueError( + "Invalid json+struct payload: declared lengths exceed buffer size" + ) + json_bytes = encoded[start : start + jlen] + blob_bytes = encoded[start + jlen : start + jlen + blen] + json_data = self.json_codec.decode(json_bytes) + struct_data = self.struct_codec.decode(blob_bytes) + overlap = set(json_data).intersection(struct_data) + if overlap: + raise ValueError( + "json+struct decoded duplicate keys: " + ", ".join(sorted(overlap)) + ) + combined = dict(json_data) + combined.update(struct_data) + return combined + raise ValueError("Invalid json+struct payload: missing magic header") + + +register_metadata_codec(JSONStructCodec, "json+struct") + + def binary_format_validator(validator, types, instance, schema): # We're hooking into jsonschemas validation code here, which works by creating # generators of exceptions, hence the yielding @@ -806,7 +937,11 @@ def __init__(self, schema: Mapping[str, Any] | None) -> None: self._schema = codec_cls.modify_schema(schema) self.codec_instance = codec_cls(self._schema) self._string = tskit.canonical_json(self._schema) - self._validate_row = TSKITMetadataSchemaValidator(self._schema).validate + self._validate_row = getattr( + self.codec_instance, + "validate_row", + TSKITMetadataSchemaValidator(self._schema).validate, + ) self._bypass_validation = codec_cls.is_schema_trivial(schema) self.encode_row = self.codec_instance.encode self.decode_row = self.codec_instance.decode