From 5797ea43e0b7e789a19a606187ee8af95d58185a Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Fri, 3 Jan 2025 20:12:23 -0500 Subject: [PATCH 1/5] [_491] open raw/managed/buffering managed/raw/buffering --- irods/manager/data_object_manager.py | 65 ++++++++++++++++++---------- 1 file changed, 42 insertions(+), 23 deletions(-) diff --git a/irods/manager/data_object_manager.py b/irods/manager/data_object_manager.py index 6b84f9f46..fa03fa5f0 100644 --- a/irods/manager/data_object_manager.py +++ b/irods/manager/data_object_manager.py @@ -121,23 +121,29 @@ def call___del__if_exists(super_): next_finalizer_in_MRO() -class ManagedBufferedRandom(io.BufferedRandom): +def managed_class(cls): - def __init__(self, *a, **kwd): - # Help ensure proper teardown sequence by storing a reference to the session, - # if provided via keyword '_session'. - self._iRODS_session = kwd.pop("_session", None) - super(ManagedBufferedRandom, self).__init__(*a, **kwd) - import irods.session + class managed(cls): - with irods.session._fds_lock: - irods.session._fds[self] = None + def __init__(self, *a, **kwd): + # Help ensure proper teardown sequence by storing a reference to the session, + # if provided via keyword '_session'. + self._iRODS_session = kwd.pop("_session", None) + super(managed, self).__init__(*a, **kwd) + import irods.session - def __del__(self): - if not self.closed: - self.close() - call___del__if_exists(super(ManagedBufferedRandom, self)) + with irods.session._fds_lock: + irods.session._fds[self] = None + def __del__(self): + if not self.closed: + self.close() + call___del__if_exists(super(managed, self)) + + return managed + +m_BufferedRandom = managed_class(io.BufferedRandom) +m_iRODSDataObjectFileRaw = managed_class(iRODSDataObjectFileRaw) MAXIMUM_SINGLE_THREADED_TRANSFER_SIZE = 32 * (1024**2) @@ -515,6 +521,7 @@ def open( mode, create=True, # (Dis-)allow object creation. finalize_on_close=True, # For PRC internal use. + buffering = -1, auto_close=client_config.getter( "data_objects", "auto_close" ), # The default value will be a lambda returning the @@ -523,6 +530,8 @@ def open( allow_redirect=client_config.getter("data_objects", "allow_redirect"), **options ): + if buffering < 0: + buffering = io.DEFAULT_BUFFER_SIZE _raw_fd_holder = options.get("_raw_fd_holder", []) # If no keywords are used that would influence the server as to the choice of a storage resource, # then use the default resource in the client configuration. @@ -622,23 +631,33 @@ def make_FileOpenRequest(**extra_opts): conn.send(message) desc = conn.recv().int_info - raw = iRODSDataObjectFileRaw( - conn, desc, finalize_on_close=finalize_on_close, **options - ) - raw.session = directed_sess - - (_raw_fd_holder).append(raw) - if callable(auto_close): # Use case: auto_close has defaulted to the irods.configuration getter. # access entry in irods.configuration auto_close = auto_close() - if auto_close: - ret_value = ManagedBufferedRandom(raw, _session=self.sess) + + if buffering or not auto_close: + raw_constructor = iRODSDataObjectFileRaw else: - ret_value = io.BufferedRandom(raw) + options['_session'] = self.sess + raw_constructor = m_iRODSDataObjectFileRaw + + raw = raw_constructor(conn, desc, finalize_on_close=finalize_on_close, **options) + raw.session = directed_sess + + (_raw_fd_holder).append(raw) + + if buffering: + if auto_close: + ret_value = m_BufferedRandom(raw, _session=self.sess) + else: + ret_value = io.BufferedRandom(raw) + else: + ret_value = raw + if "a" in mode: ret_value.seek(0, io.SEEK_END) + return ret_value def replica_truncate(self, path, desired_size, **options): From f35204677930fbfb2d4ccf20f7671436e6fef4d9 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Wed, 8 Jan 2025 09:40:30 -0500 Subject: [PATCH 2/5] line_buffering --- irods/manager/data_object_manager.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/irods/manager/data_object_manager.py b/irods/manager/data_object_manager.py index fa03fa5f0..ac5f6ffba 100644 --- a/irods/manager/data_object_manager.py +++ b/irods/manager/data_object_manager.py @@ -530,8 +530,6 @@ def open( allow_redirect=client_config.getter("data_objects", "allow_redirect"), **options ): - if buffering < 0: - buffering = io.DEFAULT_BUFFER_SIZE _raw_fd_holder = options.get("_raw_fd_holder", []) # If no keywords are used that would influence the server as to the choice of a storage resource, # then use the default resource in the client configuration. @@ -636,9 +634,11 @@ def make_FileOpenRequest(**extra_opts): # access entry in irods.configuration auto_close = auto_close() - if buffering or not auto_close: + if not auto_close or buffering not in (0,1): + # internal-ish / not memory managed raw_constructor = iRODSDataObjectFileRaw else: + # external-ish / memory managed options['_session'] = self.sess raw_constructor = m_iRODSDataObjectFileRaw @@ -647,11 +647,13 @@ def make_FileOpenRequest(**extra_opts): (_raw_fd_holder).append(raw) - if buffering: + if buffering not in (0,1): + buf_options = {} + if buffering > 1: buf_options['buffer_size'] = buffering if auto_close: - ret_value = m_BufferedRandom(raw, _session=self.sess) + ret_value = m_BufferedRandom(raw, _session=self.sess, **buf_options) else: - ret_value = io.BufferedRandom(raw) + ret_value = io.BufferedRandom(raw, **buf_options) else: ret_value = raw From 1c947e8ed87e30287c81aeacd7cb301eaf2f4b41 Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Wed, 8 Jan 2025 09:42:15 -0500 Subject: [PATCH 3/5] line_buffer experiment: raw always managed --- irods/manager/data_object_manager.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/irods/manager/data_object_manager.py b/irods/manager/data_object_manager.py index ac5f6ffba..ace4670ba 100644 --- a/irods/manager/data_object_manager.py +++ b/irods/manager/data_object_manager.py @@ -634,11 +634,12 @@ def make_FileOpenRequest(**extra_opts): # access entry in irods.configuration auto_close = auto_close() - if not auto_close or buffering not in (0,1): - # internal-ish / not memory managed - raw_constructor = iRODSDataObjectFileRaw - else: - # external-ish / memory managed +# if not auto_close or buffering not in (0,1): +# # internal-ish / not memory managed +# raw_constructor = iRODSDataObjectFileRaw +# else: +# # external-ish / memory managed + if 1: options['_session'] = self.sess raw_constructor = m_iRODSDataObjectFileRaw From ab809d1a37d91783cf41662c90ca9cca870641cf Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Wed, 8 Jan 2025 09:48:18 -0500 Subject: [PATCH 4/5] memory-managed raw --- irods/manager/data_object_manager.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/irods/manager/data_object_manager.py b/irods/manager/data_object_manager.py index ace4670ba..c491fc375 100644 --- a/irods/manager/data_object_manager.py +++ b/irods/manager/data_object_manager.py @@ -639,9 +639,11 @@ def make_FileOpenRequest(**extra_opts): # raw_constructor = iRODSDataObjectFileRaw # else: # # external-ish / memory managed - if 1: + if auto_close: options['_session'] = self.sess raw_constructor = m_iRODSDataObjectFileRaw + else: + raw_constructor = iRODSDataObjectFileRaw raw = raw_constructor(conn, desc, finalize_on_close=finalize_on_close, **options) raw.session = directed_sess From 928eadd32f1c4e3f1778e2ab8691f69f2681a69d Mon Sep 17 00:00:00 2001 From: d-w-moore Date: Thu, 9 Jan 2025 00:53:17 -0500 Subject: [PATCH 5/5] buffering parameter now can set line buffering and variable buf sizes correctly. line buffering only applicable in cases of e.g. TextIOWrapper opened via iRODSFS (https://github.com/d-w-moore/fs-irods@pass_root_path) --- irods/manager/data_object_manager.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/irods/manager/data_object_manager.py b/irods/manager/data_object_manager.py index c491fc375..a98def753 100644 --- a/irods/manager/data_object_manager.py +++ b/irods/manager/data_object_manager.py @@ -634,11 +634,6 @@ def make_FileOpenRequest(**extra_opts): # access entry in irods.configuration auto_close = auto_close() -# if not auto_close or buffering not in (0,1): -# # internal-ish / not memory managed -# raw_constructor = iRODSDataObjectFileRaw -# else: -# # external-ish / memory managed if auto_close: options['_session'] = self.sess raw_constructor = m_iRODSDataObjectFileRaw @@ -652,14 +647,15 @@ def make_FileOpenRequest(**extra_opts): if buffering not in (0,1): buf_options = {} - if buffering > 1: buf_options['buffer_size'] = buffering + if buffering > 1: + buf_options['buffer_size'] = buffering if auto_close: ret_value = m_BufferedRandom(raw, _session=self.sess, **buf_options) else: ret_value = io.BufferedRandom(raw, **buf_options) else: ret_value = raw - + if "a" in mode: ret_value.seek(0, io.SEEK_END)