From 2cc03aa741e4064047dc9b9d2f506f2c5170e473 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Fri, 11 Apr 2025 02:06:32 -0400 Subject: [PATCH 1/6] [_709] orthogonal options when chaining obj.metadata(opt1=val1)(opt2=val2) In other words, setting opt2 = val2 does not reset opt1 back to its default value. [_709] correct and streamline. _opts and __kw should be separate. [_709] reasonable copy of _opts --- irods/manager/metadata_manager.py | 28 ++++++++++++++++++++++------ irods/meta.py | 9 +++++++-- irods/test/meta_test.py | 23 +++++++++++++++++++++++ 3 files changed, 52 insertions(+), 8 deletions(-) diff --git a/irods/manager/metadata_manager.py b/irods/manager/metadata_manager.py index 3e1cfc47e..fbf4bccac 100644 --- a/irods/manager/metadata_manager.py +++ b/irods/manager/metadata_manager.py @@ -29,9 +29,13 @@ class InvalidAtomicAVURequest(Exception): class MetadataManager(Manager): + def __init__(self, *_): + self._opts = {'admin':False, 'timestamps':False} + super().__init__(*_) + @property def use_timestamps(self): - return getattr(self, "_use_ts", False) + return self._opts['timestamps'] __kw : Dict[str, Any] = {} # default (empty) keywords @@ -40,12 +44,24 @@ def _updated_keywords(self, opts): kw_.update(opts) return kw_ - def __call__(self, admin=False, timestamps=False, **irods_kw_opt): - if admin: - irods_kw_opt.update([(kw.ADMIN_KW, "")]) + def get_api_keywords(self): return self.__kw.copy() + + def __call__(self, **flags): + # Make a new shallow copy of the manager object, but update options from parameter list. new_self = copy.copy(self) - new_self._use_ts = timestamps - new_self.__kw = irods_kw_opt + new_self._opts = copy.copy(self._opts) + + # Update the flags that do bookkeeping in the returned(new) manager object. + new_self._opts.update( + (key,val) for key,val in flags.items() if val is not None + ) + + # Update the ADMIN_KW flag in the returned(new) object. + if new_self._opts.get('admin'): + self.__kw[kw.ADMIN_KW] = "" + else: + self.__kw.pop(kw.ADMIN_KW, None) + return new_self @staticmethod diff --git a/irods/meta.py b/irods/meta.py index aef1b512f..3f5f0c290 100644 --- a/irods/meta.py +++ b/irods/meta.py @@ -89,9 +89,14 @@ def __init__(self, operation, avu, **kw): class iRODSMetaCollection: - def __call__(self, admin=False, timestamps=False, **opts): + def __call__(self, **opts): + """Optional parameters in **opts are: + + admin (default: False): apply ADMIN_KW to future metadata operations. + timestamps (default: False): attach (ctime,mtime) timestamp attributes to AVUs received from iRODS. + """ x = copy.copy(self) - x._manager = (x._manager)(admin, timestamps, **opts) + x._manager = (x._manager)(**opts) x._reset_metadata() return x diff --git a/irods/test/meta_test.py b/irods/test/meta_test.py index 1a0d01bf4..9610106f8 100644 --- a/irods/test/meta_test.py +++ b/irods/test/meta_test.py @@ -798,6 +798,29 @@ def test_xml_mode_addresses_odd_metadata_characters__issue_582(self): # in use, with the "odd" characters being present in the metadata value. del obj.metadata[attr_str] + def test_cascading_changes_of_metadata_manager_options__issue_709(self): + d = None + get_option = lambda metacoll, key: metacoll._manager._opts[key] + try: + d = self.sess.data_objects.create(f'{self.coll.path}/issue_709_test_1') + m = d.metadata + self.assertEqual(get_option(m,'admin'),False) + + m2 = m(admin = True) + self.assertEqual(get_option(m2,'timestamps'),False) + self.assertEqual(get_option(m2,'admin'),True) + + m3 = m2(timestamps = True) + self.assertEqual(get_option(m3,'timestamps'), True) + self.assertEqual(get_option(m3,'admin'), True) + self.assertEqual(m3._manager.get_api_keywords().get(kw.ADMIN_KW), "") + + m4 = m3(admin = False) + self.assertEqual(get_option(m4,'admin'), False) + self.assertEqual(m4._manager.get_api_keywords().get(kw.ADMIN_KW), None) + finally: + if d: + d.unlink(force=True) if __name__ == "__main__": # let the tests find the parent irods lib From f69fed728fc35b1cd14792ceb39b2d1fba7a18c1 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Thu, 27 Nov 2025 01:19:47 -0500 Subject: [PATCH 2/6] [_768] reload option [_768] add test [_768] reasonable handling of _meta reload test altered for clarity --- irods/manager/metadata_manager.py | 3 +++ irods/meta.py | 6 +++++- irods/test/meta_test.py | 20 ++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/irods/manager/metadata_manager.py b/irods/manager/metadata_manager.py index fbf4bccac..99415dc5a 100644 --- a/irods/manager/metadata_manager.py +++ b/irods/manager/metadata_manager.py @@ -83,6 +83,9 @@ def _model_class_to_resource_description(model_cls): }[model_cls] def get(self, model_cls, path): + if not path: + # Short circuit. This should be of the same type as the object returned at the function's end. + return [] resource_type = self._model_class_to_resource_type(model_cls) model = { "d": DataObjectMeta, diff --git a/irods/meta.py b/irods/meta.py index 3f5f0c290..58e705ec1 100644 --- a/irods/meta.py +++ b/irods/meta.py @@ -107,7 +107,11 @@ def __init__(self, manager, model_cls, path): self._reset_metadata() def _reset_metadata(self): - self._meta = self._manager.get(self._model_cls, self._path) + m = self._manager + if not hasattr(self,"_meta"): + self._meta = m.get(None, "") + if m._opts.setdefault('reload',True): + self._meta = m.get(self._model_cls, self._path) def get_all(self, key): """ diff --git a/irods/test/meta_test.py b/irods/test/meta_test.py index 9610106f8..b4c3cdc62 100644 --- a/irods/test/meta_test.py +++ b/irods/test/meta_test.py @@ -822,6 +822,26 @@ def test_cascading_changes_of_metadata_manager_options__issue_709(self): if d: d.unlink(force=True) + def test_reload_can_be_deactivated__issue_768(self): + # Set an initial AVU + metacoll = self.obj.metadata + metacoll.set(item_1:=iRODSMeta('aa','bb','cc')) + + # Initial defaults will always reload the AVU list from the server, so new AVU should be seen. + self.assertIn(item_1, metacoll.items()) + + # Setting reload option to False will prevent reload of object AVUs, so an AVU just set should not be seen. + metacoll_2 = metacoll(reload=False) + metacoll_2.set(item_2:=iRODSMeta('xx','yy','zz')) + items = metacoll_2.items() + self.assertIn(item_1, items) + self.assertNotIn(item_2, items) + + # Restore old setting. Check that both AVUs are seen as present. + items_reloaded = metacoll_2(reload=True).items() + self.assertIn(item_1, items_reloaded) + self.assertIn(item_2, items_reloaded) + if __name__ == "__main__": # let the tests find the parent irods lib sys.path.insert(0, os.path.abspath("../..")) From 33ada397a917a2d4dc1148d2c99838f040107876 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Wed, 9 Apr 2025 19:34:42 -0400 Subject: [PATCH 3/6] [_707] support for subclassing iRODSMeta Example: iRODSBinOrStringMeta allows storing arbitrary octet strings in metadata. --- irods/manager/metadata_manager.py | 25 +++++++------ irods/meta.py | 59 +++++++++++++++++++++++++------ irods/test/meta_test.py | 19 ++++++++++ 3 files changed, 80 insertions(+), 23 deletions(-) diff --git a/irods/manager/metadata_manager.py b/irods/manager/metadata_manager.py index 99415dc5a..c3d3fd870 100644 --- a/irods/manager/metadata_manager.py +++ b/irods/manager/metadata_manager.py @@ -30,7 +30,11 @@ class InvalidAtomicAVURequest(Exception): class MetadataManager(Manager): def __init__(self, *_): - self._opts = {'admin':False, 'timestamps':False} + self._opts = { + 'admin':False, + 'timestamps':False, + 'iRODSMeta_type':iRODSMeta + } super().__init__(*_) @property @@ -39,6 +43,7 @@ def use_timestamps(self): __kw : Dict[str, Any] = {} # default (empty) keywords + def _updated_keywords(self, opts): kw_ = self.__kw.copy() kw_.update(opts) @@ -115,9 +120,9 @@ def meta_opts(row): return opts return [ - iRODSMeta( - row[model.name], row[model.value], row[model.units], **meta_opts(row) - ) + self._opts['iRODSMeta_type'](None,None,None)._from_column_triple( + row[model.name], row[model.value], row[model.units], + **meta_opts(row)) for row in results ] @@ -128,9 +133,7 @@ def add(self, model_cls, path, meta, **opts): "add", "-" + resource_type, path, - meta.name, - meta.value, - meta.units, + *meta._to_column_triple(), **self._updated_keywords(opts) ) request = iRODSMessage( @@ -147,9 +150,7 @@ def remove(self, model_cls, path, meta, **opts): "rm", "-" + resource_type, path, - meta.name, - meta.value, - meta.units, + *meta._to_column_triple(), **self._updated_keywords(opts) ) request = iRODSMessage( @@ -186,9 +187,7 @@ def set(self, model_cls, path, meta, **opts): "set", "-" + resource_type, path, - meta.name, - meta.value, - meta.units, + *meta._to_column_triple(), **self._updated_keywords(opts) ) request = iRODSMessage( diff --git a/irods/meta.py b/irods/meta.py index 58e705ec1..4028140bf 100644 --- a/irods/meta.py +++ b/irods/meta.py @@ -1,14 +1,41 @@ +import base64 +import copy + + class iRODSMeta: + def _to_column_triple(self): + return (self.name ,self.forward_translate(self.value)) + (('',) if not self.units else (self.forward_translate(self.units),)) + + def _from_column_triple(self, name, value, units, **kw): + self.__low_level_init(name, + self.reverse_translate(value), + units=None if not units else self.reverse_translate(units), + **kw) + return self + + reverse_translate = forward_translate = staticmethod(lambda _:_) + + INIT_KW_ARGS = 'units avu_id create_time modify_time'.split() + def __init__( - self, name, value, units=None, avu_id=None, create_time=None, modify_time=None + self, name, value, /, units=None, *, avu_id=None, create_time=None, modify_time=None, ): - self.avu_id = avu_id + # Defer initialization for iRODSMeta(attribute,value,...) if neither attribute nor value is True under + # a 'bool' transformation. In so doing we streamline initialization for iRODSMeta (and any subclasses) + # for alternatively populating via _from_column_triple(...). + # This is the pathway for allowing user-defined encodings of the iRODSMeta (byte-)string AVU components. + if name or value: + # Note: calling locals() inside the dict comprehension would not access variables in this frame. + local_vars = locals() + kw = {name:local_vars.get(name) for name in self.INIT_KW_ARGS} + self.__low_level_init(name, value, **kw) + + def __low_level_init(self, name, value, **kw): self.name = name self.value = value - self.units = units - self.create_time = create_time - self.modify_time = modify_time + for attr in self.INIT_KW_ARGS: + setattr(self, attr, kw.get(attr)) def __eq__(self, other): return tuple(self) == tuple(other) @@ -20,7 +47,22 @@ def __iter__(self): yield self.units def __repr__(self): - return "".format(**vars(self)) + return f"<{self.__class__.__name__} {self.avu_id} {self.name} {self.value} {self.units}>" + + def __hash__(self): + return hash(tuple(self)) + +class iRODSBinOrStringMeta(iRODSMeta): + + @staticmethod + def reverse_translate(value): + """Translate an AVU field from its iRODS object-database form into the client representation of that field.""" + return value if value[0] != '\\' else base64.decodebytes(value[1:].encode('utf8')) + + @staticmethod + def forward_translate(value): + """Translate an AVU field from the form it takes in the client, into an iRODS object-database compatible form.""" + return b'\\' + base64.encodebytes(value).strip() if isinstance(value,(bytes,bytearray)) else value class BadAVUOperationKeyword(Exception): @@ -84,9 +126,6 @@ def __init__(self, operation, avu, **kw): setattr(self, atr, locals()[atr]) -import copy - - class iRODSMetaCollection: def __call__(self, **opts): @@ -138,7 +177,7 @@ def get_one(self, key): def _get_meta(self, *args): if not len(args): raise ValueError("Must specify an iRODSMeta object or key, value, units)") - return args[0] if len(args) == 1 else iRODSMeta(*args) + return args[0] if len(args) == 1 else self._manager._opts['iRODSMeta_type'](*args) def apply_atomic_operations(self, *avu_ops): self._manager.apply_atomic_operations(self._model_cls, self._path, *avu_ops) diff --git a/irods/test/meta_test.py b/irods/test/meta_test.py index b4c3cdc62..4dc622ca9 100644 --- a/irods/test/meta_test.py +++ b/irods/test/meta_test.py @@ -14,6 +14,7 @@ AVUOperation, BadAVUOperationValue, BadAVUOperationKeyword, + iRODSBinOrStringMeta, ) from irods.models import DataObject, Collection, Resource, CollectionMeta import irods.test.helpers as helpers @@ -798,6 +799,24 @@ def test_xml_mode_addresses_odd_metadata_characters__issue_582(self): # in use, with the "odd" characters being present in the metadata value. del obj.metadata[attr_str] + def test_binary_avu_fields__issue_707(self): + meta_coll = self.obj.metadata(iRODSMeta_type=iRODSBinOrStringMeta) + illegal_unicode_sequence = '\u1000'.encode('utf8')[:2] + avu_name = 'issue709' + meta_coll.set( + avu_name, + (value:=b'value_'+illegal_unicode_sequence), + (units:=b'units_'+illegal_unicode_sequence) + ) + + self.assertEqual( + meta_coll.get_one(avu_name), + (avu_name, value, units) + ) + meta_coll.add(*(new_avu:=iRODSMeta(avu_name, '\u1000', '\u1001'))) + relevant_avus = meta_coll.get_all(avu_name) + self.assertIn(new_avu, relevant_avus) + def test_cascading_changes_of_metadata_manager_options__issue_709(self): d = None get_option = lambda metacoll, key: metacoll._manager._opts[key] From 43bea2025fe2e7b6036c0f5104cbe026d023ceb7 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Fri, 28 Nov 2025 02:44:31 -0500 Subject: [PATCH 4/6] [_707,_768] README sections --- README.md | 61 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/README.md b/README.md index d8ee206de..a18deccb1 100644 --- a/README.md +++ b/README.md @@ -832,6 +832,67 @@ of create and modify timestamps for every AVU returned from the server: datetime.datetime(2022, 9, 19, 15, 26, 7) ``` +Disabling AVU reloads from the iRODS server +------------------------------------------- + +With the default setting of reload = True, an iRODSMetaCollection will +proactively read all current AVUs back from the iRODS server after any +metadata write done by the client. This helps methods such as items() +to return an up-to-date result. Changing that default can, however, greatly +increase code efficiency if for example a lot of AVUs must be added or deleted +at once without reading any back again. + +``` +# Make a metadata view in which AVUs are not reloaded, for quick update: +non_current_metadata_view = obj.metadata(reload = False) +for i in range(10): + non_current_metadata_view.add("my_key", "my_value_"+str(i)) + +# Force reload of AVUs and display: +current_metadata = obj.metadata().items() +from pprint import pp +print(f"{current_metadata = }") +``` + +Subclassing iRODSMeta +--------------------- +The keyword option `iRODSMeta_type` can be used to set up any iRODSMeta +subclass as the translator between native iRODS metadata APIs +and the way in which the AVUs thus conveyed should be represented to the +client. + +An example is the `irods.meta.iRODSBinOrStringMeta` class which uses the +`base64` module to "hide" arbitrary bytestrings within the `value` and +`units` attributes of an iRODS metadata AVU: + +``` +from irods.meta import iRODSBinOrStringMeta as MyMeta +d = session.data_objects.get('/path/to/object') +unencodable_octets = '\u1000'.encode('utf8')[:-1] + +# Use our custom client-metadata type to store arbitrary octet strings +meta_view = d.metadata(iRODSMeta_type = MyMeta) +meta_view.set(m1 := MyMeta('mybinary', unencodable_octets, b'\x02')) + +# Show that traditional AVU's can exist alongside the custom kind. +irods.client_configuration.connections.xml_parser_default = 'QUASI_XML' +meta_view.set(m2 := MyMeta('mytext', '\1', '\2')) + +try: + # These two lines are equivalent: + assert {m1,m2} <= (all_avus := set(meta_view.items())) + assert {tuple(m1),tuple(m2)} <= all_avus +finally: + del meta_view['mytext'], meta_view['mybinary'] +``` + +Whereas the content of native iRODS AVUs must obey some valid text encoding as +determined by the resident ICAT DB, the above is a possible alternative - albeit +one semantically bound to the local application that defines the needed +translations. Still, this can be a valid usage for users who need a guarantee +that any given octet string they might generate can be placed into metadata without +violating standard text encodings. + Atomic operations on metadata ----------------------------- From 66284dc43b376e7bf5657616f1862e9c76282d9a Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Mon, 1 Dec 2025 22:45:56 -0500 Subject: [PATCH 5/6] [_700] revise access_time test We test a still open replica at its point of creation, asserting mod and access times are equal. And we clean up after ourselves now. --- irods/test/data_obj_test.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index 071771717..9148f996a 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -3305,20 +3305,23 @@ def test_access_time__issue_700(self): if self.sess.server_version < (5,): self.skipTest("iRODS servers < 5.0.0 do not provide an access_time attribute for data objects.") + data = None prior_ts = datetime.now(timezone.utc) - timedelta(seconds=2) # Create a new, uniquely named test data object. - data = self.sess.data_objects.create( - logical_path:=f'{helpers.home_collection(self.sess)}/{unique_name(my_function_name(), datetime.now())}' - ) + logical_path = f'{helpers.home_collection(self.sess)}/{unique_name(my_function_name(), datetime.now())}' - with data.open('w') as f: - data = self.sess.data_objects.get(logical_path) - self.assertEqual(data.access_time, data.modify_time) - self.assertGreaterEqual(data.access_time, prior_ts) + try: + with self.sess.data_objects.open(logical_path,'w') as f: + data = self.sess.data_objects.get(logical_path) + self.assertEqual(data.access_time, data.modify_time) + self.assertGreaterEqual(data.access_time, prior_ts) - # Test that access_time is there, and of the right type. - self.assertIs(type(data.access_time), datetime) + # Test that access_time is there, and of the right type. + self.assertIs(type(data.access_time), datetime) + finally: + if data: + self.sess.data_objects.unlink(data.path, force = True) if __name__ == "__main__": # let the tests find the parent irods lib From dfb0cd9de2954b38f8d1a68b40b4ae0c00402a64 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Thu, 4 Dec 2025 10:40:47 -0500 Subject: [PATCH 6/6] Update README.md - various review comments Co-authored-by: Kory Draughn --- README.md | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index a18deccb1..ac95df740 100644 --- a/README.md +++ b/README.md @@ -835,10 +835,10 @@ datetime.datetime(2022, 9, 19, 15, 26, 7) Disabling AVU reloads from the iRODS server ------------------------------------------- -With the default setting of reload = True, an iRODSMetaCollection will +With the default setting of `reload = True`, an `iRODSMetaCollection` will proactively read all current AVUs back from the iRODS server after any -metadata write done by the client. This helps methods such as items() -to return an up-to-date result. Changing that default can, however, greatly +metadata write done by the client. This helps methods such as `items()` +to return an up-to-date result. Setting `reload = False` can, however, greatly increase code efficiency if for example a lot of AVUs must be added or deleted at once without reading any back again. @@ -850,13 +850,12 @@ for i in range(10): # Force reload of AVUs and display: current_metadata = obj.metadata().items() -from pprint import pp print(f"{current_metadata = }") ``` Subclassing iRODSMeta --------------------- -The keyword option `iRODSMeta_type` can be used to set up any iRODSMeta +The keyword option `iRODSMeta_type` can be used to set up any `iRODSMeta` subclass as the translator between native iRODS metadata APIs and the way in which the AVUs thus conveyed should be represented to the client. @@ -865,12 +864,12 @@ An example is the `irods.meta.iRODSBinOrStringMeta` class which uses the `base64` module to "hide" arbitrary bytestrings within the `value` and `units` attributes of an iRODS metadata AVU: -``` +```py from irods.meta import iRODSBinOrStringMeta as MyMeta d = session.data_objects.get('/path/to/object') unencodable_octets = '\u1000'.encode('utf8')[:-1] -# Use our custom client-metadata type to store arbitrary octet strings +# Use our custom client-metadata type to store arbitrary octet strings. meta_view = d.metadata(iRODSMeta_type = MyMeta) meta_view.set(m1 := MyMeta('mybinary', unencodable_octets, b'\x02')) @@ -879,7 +878,7 @@ irods.client_configuration.connections.xml_parser_default = 'QUASI_XML' meta_view.set(m2 := MyMeta('mytext', '\1', '\2')) try: - # These two lines are equivalent: + # These two lines are equivalent. assert {m1,m2} <= (all_avus := set(meta_view.items())) assert {tuple(m1),tuple(m2)} <= all_avus finally: @@ -887,7 +886,7 @@ finally: ``` Whereas the content of native iRODS AVUs must obey some valid text encoding as -determined by the resident ICAT DB, the above is a possible alternative - albeit +determined by the resident iRODS catalog, the above is a possible alternative - albeit one semantically bound to the local application that defines the needed translations. Still, this can be a valid usage for users who need a guarantee that any given octet string they might generate can be placed into metadata without