From f406e17cdf35f3ad3db09aeb9c82a9154d9ecb1b Mon Sep 17 00:00:00 2001
From: d-w-moore <dmoore@renci.org>
Date: Tue, 18 Jul 2023 13:13:39 -0400
Subject: [PATCH 01/12] [_465] introduce irods.client.http.Session and
 supporting classes

irods.client.http.Session is a new way of connecting and reflects one
possible future of iRODS clients, ie communicating via HTTP instead of
the traditional iRODS protocol.
---
 irods/client/experimental/http/__init__.py | 116 +++++++++++++++++++++
 irods/prc_http_client_demo.py              |  26 +++++
 setup.py                                   |   1 +
 3 files changed, 143 insertions(+)
 create mode 100644 irods/client/experimental/http/__init__.py
 create mode 100644 irods/prc_http_client_demo.py

diff --git a/irods/client/experimental/http/__init__.py b/irods/client/experimental/http/__init__.py
new file mode 100644
index 000000000..9a613923e
--- /dev/null
+++ b/irods/client/experimental/http/__init__.py
@@ -0,0 +1,116 @@
+import collections
+import json
+import logging
+import requests
+import sys
+
+def _normalized_columns(columns):
+    if not isinstance(columns,(list,tuple)):
+        columns = filter(None, (_.strip() for _ in columns.split(',')))
+
+    # de-duplicate
+    columns = collections.OrderedDict((col,None) for col in columns)
+
+    col_names = tuple(columns.keys())
+    cls = collections.namedtuple('row', col_names)
+    return cls, ",".join(col_names)
+
+logger = logging.getLogger(__name__)
+
+class HTTP_operation_error(RuntimeError):
+    pass
+
+class Collection:
+
+    def __init__(self, mgr, id_):
+        self.id = id_
+        self.mgr = mgr
+
+    @property
+    def name(self):
+        return self.mgr.value_by_column_name( self.id, 'COLL_NAME' )
+
+# -----------------
+
+class Manager:
+    def __init__(self, session):
+        sess = self.sess = session
+
+    def value_by_column_name(self, id_, column_name:str):
+        first_row = self.sess.genquery1(columns = [column_name],
+                                        condition = "COLL_ID = '{}'", args = [id_])[0]
+        return getattr(first_row, column_name)
+
+class CollManager(Manager):
+
+    def name_from_id(self, id_):
+        return self.sess.genquery1(columns = ['COLL_NAME'],
+                                   condition = "COLL_ID = '{}'", args = [id_])[0].COLL_NAME
+
+    def get(self, collname):
+        jr = self.sess.genquery1( columns = 'COLL_ID',
+                                  condition = "COLL_NAME = '{}'", args = [collname] )
+        return Collection(self, int(jr[0].COLL_ID))
+
+# -----------------
+
+class Session:
+
+    url_base_template = 'http://{self.host}:{self.port}/irods-http/{self.version}'
+
+    # Convenient object properties.
+
+    @property
+    def url_base(self):
+        return self.url_base_template.format(**locals())
+
+    def url(self, endpoint_name):
+        return self.url_base + "/" + endpoint_name.strip("/")
+
+    @property
+    def auth_header(self):
+        return {'Authorization': 'Bearer ' + self.bearer_token}
+
+    # Low-level basis for implementing an endpoint via HTTP 'GET'.
+
+    def http_get(self, endpoint_name, **param_key_value_pairs):
+        r = requests.get( self.url(endpoint_name),
+                          headers = self.auth_header,
+                          params = param_key_value_pairs )
+        if not r.ok:
+            raise HTTP_operation_error("Failed in GET.")
+        return r.content.decode()
+
+    # Each endpoint can have its own method definition.
+
+    def genquery1(self, columns, condition='', *, args=(), extra_query_options = ()):
+        ## maybe require Python3.8 so we can have format strings, for example -
+        # query_text = f"SELECT {columns} where {condition.format(*args)}"
+        condition = condition.format(*args)
+        cls, columns = _normalized_columns(columns)
+        where = '' if condition == '' else ' WHERE '
+        r = self.http_get( '/query',
+                           op = "execute_genquery",
+                           query = "SELECT {columns}{where}{condition}".format(**locals()),
+                           **dict(extra_query_options))
+        J = json.loads(r)
+        errcode = J['irods_response']['error_code']
+        if errcode != 0:
+            logger.warn('irods error code of [%s] in genquery1',errcode)
+        return [cls(*i) for i in J['rows']]
+
+    def __init__(self, username, password, *,
+                 host = 'localhost',
+                 port = 9000,
+                 version = '0.9.5'):
+
+        self.username = username
+        self.password = password
+        (self.host, self.port, self.version) = (host, port, version)
+        url = self.url_base + '/authenticate'
+        r = requests.post(url, auth = (self.username, self.password))
+        if not r.ok:
+            raise HTTP_operation_error("Failed to connect: url = '%s', status code = %s",
+                                       url, r.status_code)
+        self.bearer_token = r.text
+
diff --git a/irods/prc_http_client_demo.py b/irods/prc_http_client_demo.py
new file mode 100644
index 000000000..a095c2343
--- /dev/null
+++ b/irods/prc_http_client_demo.py
@@ -0,0 +1,26 @@
+import pprint
+
+from irods.client.experimental.http import *
+
+s = Session('rods','rods',host='prec3431')
+c = CollManager(s).get("/tempZone/home/rods")
+
+print ("Got a collection {c.name}, id = {c.id}".format(**locals()))
+
+# TODO: a *_generator or *_pager method which iterates or pages through results
+
+# collections
+
+result = s.genquery1(['COLL_ID', 'COLL_NAME'], # columns
+                     "COLL_NAME like '%'",     # condition
+                     extra_query_options=dict(count='512'))
+
+pprint.pprint(result)
+print('len=',len(result))
+
+# data objects, list full paths
+
+for row in s.genquery1('COLL_NAME,DATA_NAME',                         # note 1 - we can also parse the <columns> from a string
+                                                                      # note 2 - <conditions> argument is optional
+                       extra_query_options=dict(count='512')):
+    print('path = {row.COLL_NAME}/{row.DATA_NAME}'.format(**locals()))
diff --git a/setup.py b/setup.py
index d280ced1c..bb3ee8826 100644
--- a/setup.py
+++ b/setup.py
@@ -40,6 +40,7 @@
                         'six>=1.10.0',
                         'PrettyTable>=0.7.2',
                         'defusedxml',
+                        'requests',
                         # - the new syntax:
                         #'futures; python_version == "2.7"'
                         ],

From f7c5910847b9f83128f98ca6552c5b979d348543 Mon Sep 17 00:00:00 2001
From: d-w-moore <dmoore@renci.org>
Date: Wed, 19 Jul 2023 16:54:20 -0400
Subject: [PATCH 02/12] more lightweight objects & getters.

(See Session.data_object_replicas).

Also, swap the client and experimental namespaces.
And rework the demo slightly.
---
 .../client}/http/__init__.py                  | 51 ++++++++++++++++---
 irods/prc_http_client_demo.py                 | 23 +++++----
 2 files changed, 58 insertions(+), 16 deletions(-)
 rename irods/{client/experimental => experimental/client}/http/__init__.py (72%)

diff --git a/irods/client/experimental/http/__init__.py b/irods/experimental/client/http/__init__.py
similarity index 72%
rename from irods/client/experimental/http/__init__.py
rename to irods/experimental/client/http/__init__.py
index 9a613923e..71d99cc0e 100644
--- a/irods/client/experimental/http/__init__.py
+++ b/irods/experimental/client/http/__init__.py
@@ -1,9 +1,15 @@
 import collections
+import enum
 import json
 import logging
 import requests
 import sys
 
+logger = logging.getLogger(__name__)
+
+class HTTP_operation_error(RuntimeError):
+    pass
+
 def _normalized_columns(columns):
     if not isinstance(columns,(list,tuple)):
         columns = filter(None, (_.strip() for _ in columns.split(',')))
@@ -15,13 +21,25 @@ def _normalized_columns(columns):
     cls = collections.namedtuple('row', col_names)
     return cls, ",".join(col_names)
 
-logger = logging.getLogger(__name__)
-
-class HTTP_operation_error(RuntimeError):
-    pass
+class DataObject:
+    class column:
+        class enum(enum.Enum):
+            DATA_ID = 401
+            DATA_COLL_ID = 402
+            DATA_NAME = 403
+            DATA_REPL_NUM = 404
+            # TODO: complete this list
+        names = [k for k in enum.__members__.keys()]
 
 class Collection:
-
+    class column:
+        class enum(enum.Enum):
+            COLL_ID = 500
+            COLL_NAME = 501
+            # TODO: complete this list
+        names = [k for k in enum.__members__.keys()]
+
+    # for heavyweight style of getter only!
     def __init__(self, mgr, id_):
         self.id = id_
         self.mgr = mgr
@@ -31,6 +49,14 @@ def name(self):
         return self.mgr.value_by_column_name( self.id, 'COLL_NAME' )
 
 # -----------------
+# Manager/heavyweight approach to a catalog object "getter":
+#
+# This is an approximation of the old PRC approach
+#                   for getting an instance of a collection by its nain
+#                   identifying data, the logical pathname.
+#
+# We most likely will not be doing things this way.
+# (See Session.data_object_replicas() method below.)
 
 class Manager:
     def __init__(self, session):
@@ -81,11 +107,24 @@ def http_get(self, endpoint_name, **param_key_value_pairs):
             raise HTTP_operation_error("Failed in GET.")
         return r.content.decode()
 
+    # -----------------
+    # Thin/lightweight approach to catalog object "getter":
+    #
+    def data_object_replicas(self, logical_path):
+        coll,data = logical_path.rsplit('/',1)
+        # TODO: embedded quotes in object names will not work here.
+        return self.genquery1(DataObject.column.names + Collection.column.names,
+                "COLL_NAME = '{}' and DATA_NAME = '{}'".format(coll,data),
+                extra_query_options={'count':500})
+
     # Each endpoint can have its own method definition.
 
     def genquery1(self, columns, condition='', *, args=(), extra_query_options = ()):
-        ## maybe require Python3.8 so we can have format strings, for example -
+
+        # TODO/discuss:
+        # Should we require Python3.8 so we can have format strings, e.g.:
         # query_text = f"SELECT {columns} where {condition.format(*args)}"
+
         condition = condition.format(*args)
         cls, columns = _normalized_columns(columns)
         where = '' if condition == '' else ' WHERE '
diff --git a/irods/prc_http_client_demo.py b/irods/prc_http_client_demo.py
index a095c2343..d3ad4f013 100644
--- a/irods/prc_http_client_demo.py
+++ b/irods/prc_http_client_demo.py
@@ -1,6 +1,6 @@
 import pprint
 
-from irods.client.experimental.http import *
+from irods.experimental.client.http import *
 
 s = Session('rods','rods',host='prec3431')
 c = CollManager(s).get("/tempZone/home/rods")
@@ -9,18 +9,21 @@
 
 # TODO: a *_generator or *_pager method which iterates or pages through results
 
-# collections
-
+# Query collections by explicit column list.
 result = s.genquery1(['COLL_ID', 'COLL_NAME'], # columns
                      "COLL_NAME like '%'",     # condition
                      extra_query_options=dict(count='512'))
-
+print("Result of collection query:\n"
+      "---------------------------\n")
 pprint.pprint(result)
-print('len=',len(result))
+print('Length of result was:',len(result))
 
-# data objects, list full paths
-
-for row in s.genquery1('COLL_NAME,DATA_NAME',                         # note 1 - we can also parse the <columns> from a string
-                                                                      # note 2 - <conditions> argument is optional
+# For a query of all data objects (note lack of condition argument), list full paths.
+for row in s.genquery1('COLL_NAME,DATA_NAME',
                        extra_query_options=dict(count='512')):
-    print('path = {row.COLL_NAME}/{row.DATA_NAME}'.format(**locals()))
+    print('path = {COLL_NAME}/{DATA_NAME}'.format(**row._asdict()))
+
+# Fetch all columns for the data object requested.
+data_path = "/tempZone/home/alice/new_alice.dat"
+x = s.data_object_replicas(data_path)
+print("'{}' has {} replicas we can access".format(data_path, len(x)))

From 91bf56315c2cf9a477116d05a365e94de3399d44 Mon Sep 17 00:00:00 2001
From: d-w-moore <dmoore@renci.org>
Date: Thu, 20 Jul 2023 02:31:00 -0400
Subject: [PATCH 03/12] allow genquery1 to be both paged and iterated by row

---
 irods/experimental/client/http/README.md      | 61 +++++++++++++++
 irods/experimental/client/http/__init__.py    | 78 ++++++++++++++++---
 .../experimental/client/http/iter_or_page.sh  | 26 +++++++
 3 files changed, 155 insertions(+), 10 deletions(-)
 create mode 100644 irods/experimental/client/http/README.md
 create mode 100644 irods/experimental/client/http/iter_or_page.sh

diff --git a/irods/experimental/client/http/README.md b/irods/experimental/client/http/README.md
new file mode 100644
index 000000000..9e7f7acf9
--- /dev/null
+++ b/irods/experimental/client/http/README.md
@@ -0,0 +1,61 @@
+```
+(py3) userXY@HOSTNAME:~/python-irodsclient/irods/experimental/client/http$ bash iter_or_page.sh page
+---
+[row(COLL_NAME='/'),
+ row(COLL_NAME='/tempZone'),
+ row(COLL_NAME='/tempZone/home')]
+---
+[row(COLL_NAME='/tempZone/home/alice'),
+ row(COLL_NAME="/tempZone/home/alice/a'b"),
+ row(COLL_NAME='/tempZone/home/public')]
+---
+[row(COLL_NAME='/tempZone/home/public/rods'),
+ row(COLL_NAME='/tempZone/home/public/thing'),
+ row(COLL_NAME='/tempZone/home/rods')]
+---
+[row(COLL_NAME='/tempZone/home/rods/c_files'),
+ row(COLL_NAME='/tempZone/home/rods/hello'),
+ row(COLL_NAME='/tempZone/trash')]
+---
+[row(COLL_NAME='/tempZone/trash/home'),
+ row(COLL_NAME='/tempZone/trash/home/alice'),
+ row(COLL_NAME='/tempZone/trash/home/public')]
+---
+[row(COLL_NAME='/tempZone/trash/home/rods')]
+---
+(py3) userXY@HOSTNAME:~/python-irodsclient/irods/experimental/client/http$ bash iter_or_page.sh iter
+---
+row(COLL_NAME='/')
+---
+row(COLL_NAME='/tempZone')
+---
+row(COLL_NAME='/tempZone/home')
+---
+row(COLL_NAME='/tempZone/home/alice')
+---
+row(COLL_NAME="/tempZone/home/alice/a'b")
+---
+row(COLL_NAME='/tempZone/home/public')
+---
+row(COLL_NAME='/tempZone/home/public/rods')
+---
+row(COLL_NAME='/tempZone/home/public/thing')
+---
+row(COLL_NAME='/tempZone/home/rods')
+---
+row(COLL_NAME='/tempZone/home/rods/c_files')
+---
+row(COLL_NAME='/tempZone/home/rods/hello')
+---
+row(COLL_NAME='/tempZone/trash')
+---
+row(COLL_NAME='/tempZone/trash/home')
+---
+row(COLL_NAME='/tempZone/trash/home/alice')
+---
+row(COLL_NAME='/tempZone/trash/home/public')
+---
+row(COLL_NAME='/tempZone/trash/home/rods')
+
+>>> 
+```
diff --git a/irods/experimental/client/http/__init__.py b/irods/experimental/client/http/__init__.py
index 71d99cc0e..19b94a583 100644
--- a/irods/experimental/client/http/__init__.py
+++ b/irods/experimental/client/http/__init__.py
@@ -1,5 +1,6 @@
 import collections
 import enum
+import itertools
 import json
 import logging
 import requests
@@ -7,6 +8,39 @@
 
 logger = logging.getLogger(__name__)
 
+# -----
+
+# Abstractions that let us either page through a general query <count> items at a time,
+#  or treat it like a Pythonic generator aka stateful iterator.
+#  (See the README.md in this directory.)
+
+# TODO: The README is temporary. Make some better docs.
+
+class _pageable:
+    def __init__(self, callable_):
+        self.callable_ = callable_
+    def next_page(self):
+        page = list(self.callable_())
+        return page
+
+class _iterable(_pageable):
+    def __init__(self,*_):
+        super().__init__(*_)
+        self.__P = None
+        self.index = 0
+    def __iter__(self): return self
+    def __next__(self):
+        if self.__P is None or self.index >= len(self.__P):
+            self.__P = self.next_page()
+            self.index = 0
+        if 0 == len(self.__P):
+            raise StopIteration
+        element = self.__P[self.index]
+        self.index += 1
+        return element
+
+# -----
+
 class HTTP_operation_error(RuntimeError):
     pass
 
@@ -119,7 +153,7 @@ def data_object_replicas(self, logical_path):
 
     # Each endpoint can have its own method definition.
 
-    def genquery1(self, columns, condition='', *, args=(), extra_query_options = ()):
+    def genquery1(self, columns, condition='', *, args=(), extra_query_options = (('offset',0),)):
 
         # TODO/discuss:
         # Should we require Python3.8 so we can have format strings, e.g.:
@@ -128,15 +162,39 @@ def genquery1(self, columns, condition='', *, args=(), extra_query_options = ())
         condition = condition.format(*args)
         cls, columns = _normalized_columns(columns)
         where = '' if condition == '' else ' WHERE '
-        r = self.http_get( '/query',
-                           op = "execute_genquery",
-                           query = "SELECT {columns}{where}{condition}".format(**locals()),
-                           **dict(extra_query_options))
-        J = json.loads(r)
-        errcode = J['irods_response']['error_code']
-        if errcode != 0:
-            logger.warn('irods error code of [%s] in genquery1',errcode)
-        return [cls(*i) for i in J['rows']]
+
+        extra_query_options_d = dict(extra_query_options)
+
+        # --- For the time being, genquery1 returns variable types depending on offset parameter.
+        #
+        # If *NO* offset is given (ie extra_query_options parameter is forced to {} or {'count':C},
+        # we return a result than can be either paged or row-iterated. (Again, see README)
+
+        # But if an offset is given, we just return what the API hands us, which seems to be
+        # one result (count=1) by default.
+
+        def get_r(local_ = locals(), d = extra_query_options_d.copy()):
+            if 'offset' not in d:
+                d['offset'] = 0
+            r = self.http_get( '/query',
+                               op = "execute_genquery",
+                               query = "SELECT {columns}{where}{condition}".format(**local_),
+                               **d)
+
+            d['offset'] += d.get('count',512)
+
+            J = json.loads(r)
+            errcode = J['irods_response']['error_code']
+            if errcode != 0:
+                logger.warn('irods error code of [%s] in genquery1',errcode)
+            return [cls(*i) for i in J['rows']]
+            return r
+
+        if 'offset' in extra_query_options_d:
+            return get_r()
+        else:
+            return _iterable(get_r)
+            #return (get_r)
 
     def __init__(self, username, password, *,
                  host = 'localhost',
diff --git a/irods/experimental/client/http/iter_or_page.sh b/irods/experimental/client/http/iter_or_page.sh
new file mode 100644
index 000000000..83e5b2aac
--- /dev/null
+++ b/irods/experimental/client/http/iter_or_page.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+if [ $# -gt 0 ]; then
+  arg=${1:=iter}
+else
+  echo >&2 "usage: $0 [page|iter]"; exit 1
+fi
+
+python -c "
+import pprint
+from irods.experimental.client.http import *
+s = Session('rods','rods')
+i = s.genquery1('COLL_NAME', condition='',  args=(), extra_query_options=dict(count=3))
+import sys
+if sys.argv[1] == 'page':
+    while True:
+        print('---')
+        p = i.next_page()
+        if not p:
+            break
+        pprint.pprint(p)
+elif sys.argv[1] == 'iter':
+    for j in i:
+        print('---')
+        pprint.pprint(j)
+        " ${arg}

From 92217c3bd7a7c4045907c3c496de87c8d351e2af Mon Sep 17 00:00:00 2001
From: d-w-moore <dmoore@renci.org>
Date: Thu, 20 Jul 2023 03:07:50 -0400
Subject: [PATCH 04/12] [_465] fix minor things about demo and module

regularize use of int and string for offset, count

data_object gets one replica by default

can pass genquery options to session.data_object
---
 irods/experimental/client/http/__init__.py |  8 +++++---
 irods/prc_http_client_demo.py              | 19 ++++++++++++++++---
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/irods/experimental/client/http/__init__.py b/irods/experimental/client/http/__init__.py
index 19b94a583..331c53ff7 100644
--- a/irods/experimental/client/http/__init__.py
+++ b/irods/experimental/client/http/__init__.py
@@ -144,12 +144,13 @@ def http_get(self, endpoint_name, **param_key_value_pairs):
     # -----------------
     # Thin/lightweight approach to catalog object "getter":
     #
-    def data_object_replicas(self, logical_path):
+    def data_object(self, logical_path, *, 
+                    query_options=(('offset',0),('count',1))):
         coll,data = logical_path.rsplit('/',1)
         # TODO: embedded quotes in object names will not work here.
         return self.genquery1(DataObject.column.names + Collection.column.names,
                 "COLL_NAME = '{}' and DATA_NAME = '{}'".format(coll,data),
-                extra_query_options={'count':500})
+                extra_query_options=dict(query_options))
 
     # Each endpoint can have its own method definition.
 
@@ -176,12 +177,13 @@ def genquery1(self, columns, condition='', *, args=(), extra_query_options = (('
         def get_r(local_ = locals(), d = extra_query_options_d.copy()):
             if 'offset' not in d:
                 d['offset'] = 0
+            d['offset'] = int(d['offset'])
             r = self.http_get( '/query',
                                op = "execute_genquery",
                                query = "SELECT {columns}{where}{condition}".format(**local_),
                                **d)
 
-            d['offset'] += d.get('count',512)
+            d['offset'] += int(d.get('count','512'))
 
             J = json.loads(r)
             errcode = J['irods_response']['error_code']
diff --git a/irods/prc_http_client_demo.py b/irods/prc_http_client_demo.py
index d3ad4f013..05a74fbd5 100644
--- a/irods/prc_http_client_demo.py
+++ b/irods/prc_http_client_demo.py
@@ -15,15 +15,28 @@
                      extra_query_options=dict(count='512'))
 print("Result of collection query:\n"
       "---------------------------\n")
+
+result = list(result)
 pprint.pprint(result)
 print('Length of result was:',len(result))
 
+#exit()#dwm
+
 # For a query of all data objects (note lack of condition argument), list full paths.
 for row in s.genquery1('COLL_NAME,DATA_NAME',
                        extra_query_options=dict(count='512')):
     print('path = {COLL_NAME}/{DATA_NAME}'.format(**row._asdict()))
 
-# Fetch all columns for the data object requested.
+# Fetch the data object requested.
 data_path = "/tempZone/home/alice/new_alice.dat"
-x = s.data_object_replicas(data_path)
-print("'{}' has {} replicas we can access".format(data_path, len(x)))
+
+print ('-- fetch first replica --')
+
+data_obj = s.data_object(data_path)
+print(data_obj)
+
+print ('-- fetch all replicas --')
+
+MAX_REPLICAS = 2**31-1
+data_obj_replicas = list(s.data_object(data_path, query_options=dict(count=MAX_REPLICAS)))
+pprint.pprint(data_obj_replicas)

From dd4acc8326aeddd94ee7cc8e5132f1438681e3ac Mon Sep 17 00:00:00 2001
From: d-w-moore <dmoore@renci.org>
Date: Thu, 20 Jul 2023 16:23:08 -0400
Subject: [PATCH 05/12] correct the genquery code

offset no longer used to indicate return type.
len of page properly used to update offset in each call to get_r
to get initial page, we can call next_page() init on returned iterator.
other offsets of pages should be done Pythonically ie with itertools.islice
---
 irods/experimental/client/http/__init__.py | 27 +++++++---------------
 1 file changed, 8 insertions(+), 19 deletions(-)

diff --git a/irods/experimental/client/http/__init__.py b/irods/experimental/client/http/__init__.py
index 331c53ff7..5fd038780 100644
--- a/irods/experimental/client/http/__init__.py
+++ b/irods/experimental/client/http/__init__.py
@@ -1,5 +1,6 @@
 import collections
 import enum
+import functools
 import itertools
 import json
 import logging
@@ -154,7 +155,7 @@ def data_object(self, logical_path, *,
 
     # Each endpoint can have its own method definition.
 
-    def genquery1(self, columns, condition='', *, args=(), extra_query_options = (('offset',0),)):
+    def genquery1(self, columns, condition='', *, args=(), extra_query_options = ()):
 
         # TODO/discuss:
         # Should we require Python3.8 so we can have format strings, e.g.:
@@ -166,14 +167,6 @@ def genquery1(self, columns, condition='', *, args=(), extra_query_options = (('
 
         extra_query_options_d = dict(extra_query_options)
 
-        # --- For the time being, genquery1 returns variable types depending on offset parameter.
-        #
-        # If *NO* offset is given (ie extra_query_options parameter is forced to {} or {'count':C},
-        # we return a result than can be either paged or row-iterated. (Again, see README)
-
-        # But if an offset is given, we just return what the API hands us, which seems to be
-        # one result (count=1) by default.
-
         def get_r(local_ = locals(), d = extra_query_options_d.copy()):
             if 'offset' not in d:
                 d['offset'] = 0
@@ -183,20 +176,16 @@ def get_r(local_ = locals(), d = extra_query_options_d.copy()):
                                query = "SELECT {columns}{where}{condition}".format(**local_),
                                **d)
 
-            d['offset'] += int(d.get('count','512'))
-
             J = json.loads(r)
             errcode = J['irods_response']['error_code']
             if errcode != 0:
                 logger.warn('irods error code of [%s] in genquery1',errcode)
-            return [cls(*i) for i in J['rows']]
-            return r
-
-        if 'offset' in extra_query_options_d:
-            return get_r()
-        else:
-            return _iterable(get_r)
-            #return (get_r)
+
+            rows = [cls(*i) for i in J['rows']]
+            d['offset'] += len(rows)
+            return rows
+
+        return _iterable(get_r)
 
     def __init__(self, username, password, *,
                  host = 'localhost',

From 807697a58cb46a1ab42abe808a07055d991654ff Mon Sep 17 00:00:00 2001
From: d-w-moore <dmoore@renci.org>
Date: Thu, 20 Jul 2023 17:39:21 -0400
Subject: [PATCH 06/12] tidy; improve variable names

---
 irods/experimental/client/http/__init__.py | 26 ++++++++++++----------
 1 file changed, 14 insertions(+), 12 deletions(-)

diff --git a/irods/experimental/client/http/__init__.py b/irods/experimental/client/http/__init__.py
index 5fd038780..2e692af68 100644
--- a/irods/experimental/client/http/__init__.py
+++ b/irods/experimental/client/http/__init__.py
@@ -162,26 +162,28 @@ def genquery1(self, columns, condition='', *, args=(), extra_query_options = ())
         # query_text = f"SELECT {columns} where {condition.format(*args)}"
 
         condition = condition.format(*args)
-        cls, columns = _normalized_columns(columns)
+        row_class, columns = _normalized_columns(columns)
         where = '' if condition == '' else ' WHERE '
 
-        extra_query_options_d = dict(extra_query_options)
+        # d's default argument (being mutable) gets memoized in the context of the
+        # current closure, which persists beyond in the genquery1 call frame in which it
+        # originated and persists and across multiple calls to get_r.
+        # This can be leveraged to increment the query offset at the end of each get_r call
+        # by the length of the rows array retrieved.
 
-        def get_r(local_ = locals(), d = extra_query_options_d.copy()):
+        def get_r(local_ = locals(), d = dict(extra_query_options)):
             if 'offset' not in d:
                 d['offset'] = 0
             d['offset'] = int(d['offset'])
-            r = self.http_get( '/query',
-                               op = "execute_genquery",
-                               query = "SELECT {columns}{where}{condition}".format(**local_),
-                               **d)
-
-            J = json.loads(r)
-            errcode = J['irods_response']['error_code']
+            result = self.http_get('/query',
+                                   op = "execute_genquery",
+                                   query = "SELECT {columns}{where}{condition}".format(**local_),
+                                   **d)
+            json_result = json.loads(result)
+            errcode = json_result['irods_response']['error_code']
             if errcode != 0:
                 logger.warn('irods error code of [%s] in genquery1',errcode)
-
-            rows = [cls(*i) for i in J['rows']]
+            rows = [row_class(*i) for i in json_result['rows']]
             d['offset'] += len(rows)
             return rows
 

From ec7ca36ab298dbc04e496e50cbaf52aef59dcb9c Mon Sep 17 00:00:00 2001
From: d-w-moore <dmoore@renci.org>
Date: Sun, 23 Jul 2023 07:54:13 -0400
Subject: [PATCH 07/12] docstrings for _pageable and _iterable interfaces

---
 irods/experimental/client/http/__init__.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/irods/experimental/client/http/__init__.py b/irods/experimental/client/http/__init__.py
index 2e692af68..42838ca56 100644
--- a/irods/experimental/client/http/__init__.py
+++ b/irods/experimental/client/http/__init__.py
@@ -17,20 +17,32 @@
 
 # TODO: The README is temporary. Make some better docs.
 
-class _pageable:
+class _pageable: 
     def __init__(self, callable_):
+        """callable_ is a function-like object called without parameters.
+           It pages once through the set of query results and should be
+           stateful in terms of maintaining current offset within the query.
+        """
         self.callable_ = callable_
     def next_page(self):
         page = list(self.callable_())
         return page
 
 class _iterable(_pageable):
+    """Adapts a pageable interface to return one query row at a time.  An
+       empty [] returned from next_page signals the end of query results.
+    """
+    @functools.wraps(_pageable.__init__)
     def __init__(self,*_):
         super().__init__(*_)
         self.__P = None
         self.index = 0
+    # Allow iter() on instances.
     def __iter__(self): return self
     def __next__(self):
+        """Called implicitly by any iteration over the _iterable instance.
+           Returns one query row.
+        """
         if self.__P is None or self.index >= len(self.__P):
             self.__P = self.next_page()
             self.index = 0

From 1b729376f2123a9cd1343802107def63d9f0823f Mon Sep 17 00:00:00 2001
From: d-w-moore <dmoore@renci.org>
Date: Sun, 23 Jul 2023 09:56:50 -0400
Subject: [PATCH 08/12] add iterator functions

---
 irods/experimental/client/http/__init__.py    |  4 ---
 .../client/http/iterator_functions.py         | 31 +++++++++++++++++++
 2 files changed, 31 insertions(+), 4 deletions(-)
 create mode 100644 irods/experimental/client/http/iterator_functions.py

diff --git a/irods/experimental/client/http/__init__.py b/irods/experimental/client/http/__init__.py
index 42838ca56..eb9c3dfdf 100644
--- a/irods/experimental/client/http/__init__.py
+++ b/irods/experimental/client/http/__init__.py
@@ -169,10 +169,6 @@ def data_object(self, logical_path, *,
 
     def genquery1(self, columns, condition='', *, args=(), extra_query_options = ()):
 
-        # TODO/discuss:
-        # Should we require Python3.8 so we can have format strings, e.g.:
-        # query_text = f"SELECT {columns} where {condition.format(*args)}"
-
         condition = condition.format(*args)
         row_class, columns = _normalized_columns(columns)
         where = '' if condition == '' else ' WHERE '
diff --git a/irods/experimental/client/http/iterator_functions.py b/irods/experimental/client/http/iterator_functions.py
new file mode 100644
index 000000000..bf0950845
--- /dev/null
+++ b/irods/experimental/client/http/iterator_functions.py
@@ -0,0 +1,31 @@
+#/usr/bin/env python3
+import itertools
+import sys
+import typing
+
+class too_many_results(Exception): pass
+class too_few_results(Exception): pass
+
+def first_n(iterable: typing.Iterable, n: int):
+    return list(itertools.islice(iterable,n))
+
+def one(iterable: typing.Iterable):
+    i = first_n(iterable,2)
+    if i[1:]:
+        raise too_many_results
+    if not i:
+        raise too_few_results
+    return i[0]
+
+def test_one():
+    assert(
+            one(iter(range(10,10+i))) == 10
+    )
+
+def test_first_n():
+    assert(
+            first_n(iter(range(10,10+i)),2) == [10,11]
+    )
+
+if __name__=='__main__':
+    test_one()

From 92b915ab1a577a111071e88c6c292ba0219e972f Mon Sep 17 00:00:00 2001
From: d-w-moore <dmoore@renci.org>
Date: Sun, 23 Jul 2023 10:23:23 -0400
Subject: [PATCH 09/12] use iterator_functions in lightweight getter for
 data_object

---
 irods/experimental/client/http/__init__.py    | 20 ++++++++++---------
 .../client/http/iterator_functions.py         |  2 ++
 irods/prc_http_client_demo.py                 |  7 +++----
 3 files changed, 16 insertions(+), 13 deletions(-)

diff --git a/irods/experimental/client/http/__init__.py b/irods/experimental/client/http/__init__.py
index eb9c3dfdf..f913d4579 100644
--- a/irods/experimental/client/http/__init__.py
+++ b/irods/experimental/client/http/__init__.py
@@ -6,8 +6,11 @@
 import logging
 import requests
 import sys
+from .iterator_functions import *
 
 logger = logging.getLogger(__name__)
+MAX_INT32 = 2**31-1
+DEFAULT_PAGE_SIZE = 512
 
 # -----
 
@@ -110,20 +113,20 @@ def __init__(self, session):
         sess = self.sess = session
 
     def value_by_column_name(self, id_, column_name:str):
-        first_row = self.sess.genquery1(columns = [column_name],
-                                        condition = "COLL_ID = '{}'", args = [id_])[0]
+        first_row = one(self.sess.genquery1(columns = [column_name],
+                                            condition = "COLL_ID = '{}'", args = [id_]))
         return getattr(first_row, column_name)
 
 class CollManager(Manager):
 
     def name_from_id(self, id_):
-        return self.sess.genquery1(columns = ['COLL_NAME'],
-                                   condition = "COLL_ID = '{}'", args = [id_])[0].COLL_NAME
+        return one(self.sess.genquery1(columns = ['COLL_NAME'],
+                                   condition = "COLL_ID = '{}'", args = [id_])).COLL_NAME
 
     def get(self, collname):
-        jr = self.sess.genquery1( columns = 'COLL_ID',
-                                  condition = "COLL_NAME = '{}'", args = [collname] )
-        return Collection(self, int(jr[0].COLL_ID))
+        r = self.sess.genquery1( columns = 'COLL_ID',
+                                 condition = "COLL_NAME = '{}'", args = [collname] )
+        return Collection(self, int(one(r).COLL_ID))
 
 # -----------------
 
@@ -158,7 +161,7 @@ def http_get(self, endpoint_name, **param_key_value_pairs):
     # Thin/lightweight approach to catalog object "getter":
     #
     def data_object(self, logical_path, *, 
-                    query_options=(('offset',0),('count',1))):
+                    query_options=(('offset',0),('count',DEFAULT_PAGE_SIZE))):
         coll,data = logical_path.rsplit('/',1)
         # TODO: embedded quotes in object names will not work here.
         return self.genquery1(DataObject.column.names + Collection.column.names,
@@ -211,4 +214,3 @@ def __init__(self, username, password, *,
             raise HTTP_operation_error("Failed to connect: url = '%s', status code = %s",
                                        url, r.status_code)
         self.bearer_token = r.text
-
diff --git a/irods/experimental/client/http/iterator_functions.py b/irods/experimental/client/http/iterator_functions.py
index bf0950845..c42404500 100644
--- a/irods/experimental/client/http/iterator_functions.py
+++ b/irods/experimental/client/http/iterator_functions.py
@@ -6,6 +6,8 @@
 class too_many_results(Exception): pass
 class too_few_results(Exception): pass
 
+__all__ = ['first_n','one','too_many_results','too_few_results']
+
 def first_n(iterable: typing.Iterable, n: int):
     return list(itertools.islice(iterable,n))
 
diff --git a/irods/prc_http_client_demo.py b/irods/prc_http_client_demo.py
index 05a74fbd5..c5c08be8e 100644
--- a/irods/prc_http_client_demo.py
+++ b/irods/prc_http_client_demo.py
@@ -1,6 +1,7 @@
 import pprint
 
 from irods.experimental.client.http import *
+from irods.experimental.client.http.iterator_functions import *
 
 s = Session('rods','rods',host='prec3431')
 c = CollManager(s).get("/tempZone/home/rods")
@@ -20,8 +21,6 @@
 pprint.pprint(result)
 print('Length of result was:',len(result))
 
-#exit()#dwm
-
 # For a query of all data objects (note lack of condition argument), list full paths.
 for row in s.genquery1('COLL_NAME,DATA_NAME',
                        extra_query_options=dict(count='512')):
@@ -32,10 +31,10 @@
 
 print ('-- fetch first replica --')
 
-data_obj = s.data_object(data_path)
+data_obj = first_n(s.data_object(data_path),n=1)
 print(data_obj)
 
-print ('-- fetch all replicas --')
+print ('-- fetch all replicas without paging --')
 
 MAX_REPLICAS = 2**31-1
 data_obj_replicas = list(s.data_object(data_path, query_options=dict(count=MAX_REPLICAS)))

From 4f7692b15b9aab6a7783861332964b81ec352ef5 Mon Sep 17 00:00:00 2001
From: d-w-moore <dmoore@renci.org>
Date: Sun, 23 Jul 2023 10:30:15 -0400
Subject: [PATCH 10/12] delete TODO comment, have now imp'd paging

---
 irods/prc_http_client_demo.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/irods/prc_http_client_demo.py b/irods/prc_http_client_demo.py
index c5c08be8e..258f7eb7a 100644
--- a/irods/prc_http_client_demo.py
+++ b/irods/prc_http_client_demo.py
@@ -8,8 +8,6 @@
 
 print ("Got a collection {c.name}, id = {c.id}".format(**locals()))
 
-# TODO: a *_generator or *_pager method which iterates or pages through results
-
 # Query collections by explicit column list.
 result = s.genquery1(['COLL_ID', 'COLL_NAME'], # columns
                      "COLL_NAME like '%'",     # condition

From ddf6b3eb9fb1e3c818d8b3e50002273f4c15ab75 Mon Sep 17 00:00:00 2001
From: d-w-moore <dmoore@renci.org>
Date: Sun, 23 Jul 2023 10:56:30 -0400
Subject: [PATCH 11/12] document genquery1()'s pagesize-agnostic, rowwise
 iterative behavior

---
 irods/experimental/client/http/__init__.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/irods/experimental/client/http/__init__.py b/irods/experimental/client/http/__init__.py
index f913d4579..85a724c6d 100644
--- a/irods/experimental/client/http/__init__.py
+++ b/irods/experimental/client/http/__init__.py
@@ -171,7 +171,20 @@ def data_object(self, logical_path, *,
     # Each endpoint can have its own method definition.
 
     def genquery1(self, columns, condition='', *, args=(), extra_query_options = ()):
+        """Return a generator-style iterator over all row results.
+           Example:
+               for row in session.genquery1( 'COLL_NAME' ):
+                   print(row.COLL_NAME)
 
+           By default, one HTTP call to the server returns a single "row", which is not`
+           terribly efficient.  We can override the "count" option with an arbitrary
+           positive integer:
+
+               session.genquery1(columns, extra_query_options=dict(count=512)).
+
+           and since this function is agnostic to pagesize and simply returns a row-wise
+           iterator, its row-wise iterative behavior will not change.
+        """
         condition = condition.format(*args)
         row_class, columns = _normalized_columns(columns)
         where = '' if condition == '' else ' WHERE '

From 5385e79eaef3dd0753689d1915b82cf3750cbfbe Mon Sep 17 00:00:00 2001
From: d-w-moore <dmoore@renci.org>
Date: Tue, 25 Jul 2023 08:35:57 -0400
Subject: [PATCH 12/12] rephrase the doc

---
 irods/experimental/client/http/__init__.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/irods/experimental/client/http/__init__.py b/irods/experimental/client/http/__init__.py
index 85a724c6d..7f1e33e8c 100644
--- a/irods/experimental/client/http/__init__.py
+++ b/irods/experimental/client/http/__init__.py
@@ -178,12 +178,13 @@ def genquery1(self, columns, condition='', *, args=(), extra_query_options = ())
 
            By default, one HTTP call to the server returns a single "row", which is not`
            terribly efficient.  We can override the "count" option with an arbitrary
-           positive integer:
+           positive integer, effectively increasing the paging size for the query:
 
                session.genquery1(columns, extra_query_options=dict(count=512)).
 
-           and since this function is agnostic to pagesize and simply returns a row-wise
-           iterator, its row-wise iterative behavior will not change.
+           Since this function's result (a row-wise iterator) is page-size agnostic, its
+           usage is not altered, whereas the efficiency for large queries will greatly
+           improve due to the 512-fold decrease in the number of API calls.
         """
         condition = condition.format(*args)
         row_class, columns = _normalized_columns(columns)