diff --git a/README.md b/README.md index d8ee206de..ac95df740 100644 --- a/README.md +++ b/README.md @@ -832,6 +832,66 @@ of create and modify timestamps for every AVU returned from the server: datetime.datetime(2022, 9, 19, 15, 26, 7) ``` +Disabling AVU reloads from the iRODS server +------------------------------------------- + +With the default setting of `reload = True`, an `iRODSMetaCollection` will +proactively read all current AVUs back from the iRODS server after any +metadata write done by the client. This helps methods such as `items()` +to return an up-to-date result. Setting `reload = False` can, however, greatly +increase code efficiency if for example a lot of AVUs must be added or deleted +at once without reading any back again. + +``` +# Make a metadata view in which AVUs are not reloaded, for quick update: +non_current_metadata_view = obj.metadata(reload = False) +for i in range(10): + non_current_metadata_view.add("my_key", "my_value_"+str(i)) + +# Force reload of AVUs and display: +current_metadata = obj.metadata().items() +print(f"{current_metadata = }") +``` + +Subclassing iRODSMeta +--------------------- +The keyword option `iRODSMeta_type` can be used to set up any `iRODSMeta` +subclass as the translator between native iRODS metadata APIs +and the way in which the AVUs thus conveyed should be represented to the +client. + +An example is the `irods.meta.iRODSBinOrStringMeta` class which uses the +`base64` module to "hide" arbitrary bytestrings within the `value` and +`units` attributes of an iRODS metadata AVU: + +```py +from irods.meta import iRODSBinOrStringMeta as MyMeta +d = session.data_objects.get('/path/to/object') +unencodable_octets = '\u1000'.encode('utf8')[:-1] + +# Use our custom client-metadata type to store arbitrary octet strings. +meta_view = d.metadata(iRODSMeta_type = MyMeta) +meta_view.set(m1 := MyMeta('mybinary', unencodable_octets, b'\x02')) + +# Show that traditional AVU's can exist alongside the custom kind. +irods.client_configuration.connections.xml_parser_default = 'QUASI_XML' +meta_view.set(m2 := MyMeta('mytext', '\1', '\2')) + +try: + # These two lines are equivalent. + assert {m1,m2} <= (all_avus := set(meta_view.items())) + assert {tuple(m1),tuple(m2)} <= all_avus +finally: + del meta_view['mytext'], meta_view['mybinary'] +``` + +Whereas the content of native iRODS AVUs must obey some valid text encoding as +determined by the resident iRODS catalog, the above is a possible alternative - albeit +one semantically bound to the local application that defines the needed +translations. Still, this can be a valid usage for users who need a guarantee +that any given octet string they might generate can be placed into metadata without +violating standard text encodings. + Atomic operations on metadata ----------------------------- diff --git a/irods/manager/metadata_manager.py b/irods/manager/metadata_manager.py index 3e1cfc47e..c3d3fd870 100644 --- a/irods/manager/metadata_manager.py +++ b/irods/manager/metadata_manager.py @@ -29,23 +29,44 @@ class InvalidAtomicAVURequest(Exception): class MetadataManager(Manager): + def __init__(self, *_): + self._opts = { + 'admin':False, + 'timestamps':False, + 'iRODSMeta_type':iRODSMeta + } + super().__init__(*_) + @property def use_timestamps(self): - return getattr(self, "_use_ts", False) + return self._opts['timestamps'] __kw : Dict[str, Any] = {} # default (empty) keywords + def _updated_keywords(self, opts): kw_ = self.__kw.copy() kw_.update(opts) return kw_ - def __call__(self, admin=False, timestamps=False, **irods_kw_opt): - if admin: - irods_kw_opt.update([(kw.ADMIN_KW, "")]) + def get_api_keywords(self): return self.__kw.copy() + + def __call__(self, **flags): + # Make a new shallow copy of the manager object, but update options from parameter list. new_self = copy.copy(self) - new_self._use_ts = timestamps - new_self.__kw = irods_kw_opt + new_self._opts = copy.copy(self._opts) + + # Update the flags that do bookkeeping in the returned(new) manager object. + new_self._opts.update( + (key,val) for key,val in flags.items() if val is not None + ) + + # Update the ADMIN_KW flag in the returned(new) object. + if new_self._opts.get('admin'): + self.__kw[kw.ADMIN_KW] = "" + else: + self.__kw.pop(kw.ADMIN_KW, None) + return new_self @staticmethod @@ -67,6 +88,9 @@ def _model_class_to_resource_description(model_cls): }[model_cls] def get(self, model_cls, path): + if not path: + # Short circuit. This should be of the same type as the object returned at the function's end. + return [] resource_type = self._model_class_to_resource_type(model_cls) model = { "d": DataObjectMeta, @@ -96,9 +120,9 @@ def meta_opts(row): return opts return [ - iRODSMeta( - row[model.name], row[model.value], row[model.units], **meta_opts(row) - ) + self._opts['iRODSMeta_type'](None,None,None)._from_column_triple( + row[model.name], row[model.value], row[model.units], + **meta_opts(row)) for row in results ] @@ -109,9 +133,7 @@ def add(self, model_cls, path, meta, **opts): "add", "-" + resource_type, path, - meta.name, - meta.value, - meta.units, + *meta._to_column_triple(), **self._updated_keywords(opts) ) request = iRODSMessage( @@ -128,9 +150,7 @@ def remove(self, model_cls, path, meta, **opts): "rm", "-" + resource_type, path, - meta.name, - meta.value, - meta.units, + *meta._to_column_triple(), **self._updated_keywords(opts) ) request = iRODSMessage( @@ -167,9 +187,7 @@ def set(self, model_cls, path, meta, **opts): "set", "-" + resource_type, path, - meta.name, - meta.value, - meta.units, + *meta._to_column_triple(), **self._updated_keywords(opts) ) request = iRODSMessage( diff --git a/irods/meta.py b/irods/meta.py index aef1b512f..4028140bf 100644 --- a/irods/meta.py +++ b/irods/meta.py @@ -1,14 +1,41 @@ +import base64 +import copy + + class iRODSMeta: + def _to_column_triple(self): + return (self.name ,self.forward_translate(self.value)) + (('',) if not self.units else (self.forward_translate(self.units),)) + + def _from_column_triple(self, name, value, units, **kw): + self.__low_level_init(name, + self.reverse_translate(value), + units=None if not units else self.reverse_translate(units), + **kw) + return self + + reverse_translate = forward_translate = staticmethod(lambda _:_) + + INIT_KW_ARGS = 'units avu_id create_time modify_time'.split() + def __init__( - self, name, value, units=None, avu_id=None, create_time=None, modify_time=None + self, name, value, /, units=None, *, avu_id=None, create_time=None, modify_time=None, ): - self.avu_id = avu_id + # Defer initialization for iRODSMeta(attribute,value,...) if neither attribute nor value is True under + # a 'bool' transformation. In so doing we streamline initialization for iRODSMeta (and any subclasses) + # for alternatively populating via _from_column_triple(...). + # This is the pathway for allowing user-defined encodings of the iRODSMeta (byte-)string AVU components. + if name or value: + # Note: calling locals() inside the dict comprehension would not access variables in this frame. + local_vars = locals() + kw = {name:local_vars.get(name) for name in self.INIT_KW_ARGS} + self.__low_level_init(name, value, **kw) + + def __low_level_init(self, name, value, **kw): self.name = name self.value = value - self.units = units - self.create_time = create_time - self.modify_time = modify_time + for attr in self.INIT_KW_ARGS: + setattr(self, attr, kw.get(attr)) def __eq__(self, other): return tuple(self) == tuple(other) @@ -20,7 +47,22 @@ def __iter__(self): yield self.units def __repr__(self): - return "".format(**vars(self)) + return f"<{self.__class__.__name__} {self.avu_id} {self.name} {self.value} {self.units}>" + + def __hash__(self): + return hash(tuple(self)) + +class iRODSBinOrStringMeta(iRODSMeta): + + @staticmethod + def reverse_translate(value): + """Translate an AVU field from its iRODS object-database form into the client representation of that field.""" + return value if value[0] != '\\' else base64.decodebytes(value[1:].encode('utf8')) + + @staticmethod + def forward_translate(value): + """Translate an AVU field from the form it takes in the client, into an iRODS object-database compatible form.""" + return b'\\' + base64.encodebytes(value).strip() if isinstance(value,(bytes,bytearray)) else value class BadAVUOperationKeyword(Exception): @@ -84,14 +126,16 @@ def __init__(self, operation, avu, **kw): setattr(self, atr, locals()[atr]) -import copy - - class iRODSMetaCollection: - def __call__(self, admin=False, timestamps=False, **opts): + def __call__(self, **opts): + """Optional parameters in **opts are: + + admin (default: False): apply ADMIN_KW to future metadata operations. + timestamps (default: False): attach (ctime,mtime) timestamp attributes to AVUs received from iRODS. + """ x = copy.copy(self) - x._manager = (x._manager)(admin, timestamps, **opts) + x._manager = (x._manager)(**opts) x._reset_metadata() return x @@ -102,7 +146,11 @@ def __init__(self, manager, model_cls, path): self._reset_metadata() def _reset_metadata(self): - self._meta = self._manager.get(self._model_cls, self._path) + m = self._manager + if not hasattr(self,"_meta"): + self._meta = m.get(None, "") + if m._opts.setdefault('reload',True): + self._meta = m.get(self._model_cls, self._path) def get_all(self, key): """ @@ -129,7 +177,7 @@ def get_one(self, key): def _get_meta(self, *args): if not len(args): raise ValueError("Must specify an iRODSMeta object or key, value, units)") - return args[0] if len(args) == 1 else iRODSMeta(*args) + return args[0] if len(args) == 1 else self._manager._opts['iRODSMeta_type'](*args) def apply_atomic_operations(self, *avu_ops): self._manager.apply_atomic_operations(self._model_cls, self._path, *avu_ops) diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index 071771717..9148f996a 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -3305,20 +3305,23 @@ def test_access_time__issue_700(self): if self.sess.server_version < (5,): self.skipTest("iRODS servers < 5.0.0 do not provide an access_time attribute for data objects.") + data = None prior_ts = datetime.now(timezone.utc) - timedelta(seconds=2) # Create a new, uniquely named test data object. - data = self.sess.data_objects.create( - logical_path:=f'{helpers.home_collection(self.sess)}/{unique_name(my_function_name(), datetime.now())}' - ) + logical_path = f'{helpers.home_collection(self.sess)}/{unique_name(my_function_name(), datetime.now())}' - with data.open('w') as f: - data = self.sess.data_objects.get(logical_path) - self.assertEqual(data.access_time, data.modify_time) - self.assertGreaterEqual(data.access_time, prior_ts) + try: + with self.sess.data_objects.open(logical_path,'w') as f: + data = self.sess.data_objects.get(logical_path) + self.assertEqual(data.access_time, data.modify_time) + self.assertGreaterEqual(data.access_time, prior_ts) - # Test that access_time is there, and of the right type. - self.assertIs(type(data.access_time), datetime) + # Test that access_time is there, and of the right type. + self.assertIs(type(data.access_time), datetime) + finally: + if data: + self.sess.data_objects.unlink(data.path, force = True) if __name__ == "__main__": # let the tests find the parent irods lib diff --git a/irods/test/meta_test.py b/irods/test/meta_test.py index 1a0d01bf4..4dc622ca9 100644 --- a/irods/test/meta_test.py +++ b/irods/test/meta_test.py @@ -14,6 +14,7 @@ AVUOperation, BadAVUOperationValue, BadAVUOperationKeyword, + iRODSBinOrStringMeta, ) from irods.models import DataObject, Collection, Resource, CollectionMeta import irods.test.helpers as helpers @@ -798,6 +799,67 @@ def test_xml_mode_addresses_odd_metadata_characters__issue_582(self): # in use, with the "odd" characters being present in the metadata value. del obj.metadata[attr_str] + def test_binary_avu_fields__issue_707(self): + meta_coll = self.obj.metadata(iRODSMeta_type=iRODSBinOrStringMeta) + illegal_unicode_sequence = '\u1000'.encode('utf8')[:2] + avu_name = 'issue709' + meta_coll.set( + avu_name, + (value:=b'value_'+illegal_unicode_sequence), + (units:=b'units_'+illegal_unicode_sequence) + ) + + self.assertEqual( + meta_coll.get_one(avu_name), + (avu_name, value, units) + ) + meta_coll.add(*(new_avu:=iRODSMeta(avu_name, '\u1000', '\u1001'))) + relevant_avus = meta_coll.get_all(avu_name) + self.assertIn(new_avu, relevant_avus) + + def test_cascading_changes_of_metadata_manager_options__issue_709(self): + d = None + get_option = lambda metacoll, key: metacoll._manager._opts[key] + try: + d = self.sess.data_objects.create(f'{self.coll.path}/issue_709_test_1') + m = d.metadata + self.assertEqual(get_option(m,'admin'),False) + + m2 = m(admin = True) + self.assertEqual(get_option(m2,'timestamps'),False) + self.assertEqual(get_option(m2,'admin'),True) + + m3 = m2(timestamps = True) + self.assertEqual(get_option(m3,'timestamps'), True) + self.assertEqual(get_option(m3,'admin'), True) + self.assertEqual(m3._manager.get_api_keywords().get(kw.ADMIN_KW), "") + + m4 = m3(admin = False) + self.assertEqual(get_option(m4,'admin'), False) + self.assertEqual(m4._manager.get_api_keywords().get(kw.ADMIN_KW), None) + finally: + if d: + d.unlink(force=True) + + def test_reload_can_be_deactivated__issue_768(self): + # Set an initial AVU + metacoll = self.obj.metadata + metacoll.set(item_1:=iRODSMeta('aa','bb','cc')) + + # Initial defaults will always reload the AVU list from the server, so new AVU should be seen. + self.assertIn(item_1, metacoll.items()) + + # Setting reload option to False will prevent reload of object AVUs, so an AVU just set should not be seen. + metacoll_2 = metacoll(reload=False) + metacoll_2.set(item_2:=iRODSMeta('xx','yy','zz')) + items = metacoll_2.items() + self.assertIn(item_1, items) + self.assertNotIn(item_2, items) + + # Restore old setting. Check that both AVUs are seen as present. + items_reloaded = metacoll_2(reload=True).items() + self.assertIn(item_1, items_reloaded) + self.assertIn(item_2, items_reloaded) if __name__ == "__main__": # let the tests find the parent irods lib