Skip to content

Commit 1602ffc

Browse files
zeryxlemonez
andauthored
Makes DataFiles FileIO compatible (#106)
* added the AdvancedDataFile and AdvancedDataDirectory types * fixed some test case failures (good thing I added tests!) * removed debug statement * added more if checks to ensure that we don't get type errors with older versions of python * updated to use the filelike type as the default, with legacy support available under a flag * swapped default of datafile and datadirectory types to advanced * renamed AdvancedDataFile * Update Algorithmia/datadirectory.py Co-authored-by: lemonez <36384768+lemonez@users.noreply.github.com> Co-authored-by: lemonez <36384768+lemonez@users.noreply.github.com>
1 parent 6bf349a commit 1602ffc

File tree

4 files changed

+143
-21
lines changed

4 files changed

+143
-21
lines changed

Algorithmia/client.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
import Algorithmia
44
from Algorithmia.insights import Insights
55
from Algorithmia.algorithm import Algorithm
6-
from Algorithmia.datafile import DataFile, LocalDataFile
7-
from Algorithmia.datadirectory import DataDirectory, LocalDataDirectory
6+
from Algorithmia.datafile import DataFile, LocalDataFile, AdvancedDataFile
7+
from Algorithmia.datadirectory import DataDirectory, LocalDataDirectory, AdvancedDataDirectory
88
from algorithmia_api_client import Configuration, DefaultApi, ApiClient
99

1010
from tempfile import mkstemp
@@ -63,13 +63,17 @@ def username(self):
6363
username = next(self.dir("").list()).path
6464
return username
6565

66-
def file(self, dataUrl):
67-
if dataUrl.startswith('file://'): return LocalDataFile(self, dataUrl)
68-
else: return DataFile(self, dataUrl)
66+
def file(self, dataUrl, cleanup=False):
67+
if dataUrl.startswith('file://'):
68+
return LocalDataFile(self, dataUrl)
69+
else:
70+
return AdvancedDataFile(self, dataUrl, cleanup)
6971

7072
def dir(self, dataUrl):
71-
if dataUrl.startswith('file://'): return LocalDataDirectory(self, dataUrl)
72-
else: return DataDirectory(self, dataUrl)
73+
if dataUrl.startswith('file://'):
74+
return LocalDataDirectory(self, dataUrl)
75+
else:
76+
return AdvancedDataDirectory(self, dataUrl)
7377

7478
def create_user(self, requestString):
7579
url = "/v1/users"

Algorithmia/datadirectory.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@
55
import os
66
import six
77
import tempfile
8-
98
import Algorithmia
10-
from Algorithmia.datafile import DataFile
9+
10+
from Algorithmia.datafile import DataFile, AdvancedDataFile, LocalDataFile
1111
from Algorithmia.data import DataObject, DataObjectType
1212
from Algorithmia.errors import DataApiError
1313
from Algorithmia.util import getParentAndBase, pathJoin
1414
from Algorithmia.acl import Acl
1515

16+
1617
class DataDirectory(DataObject):
1718
def __init__(self, client, dataUrl):
1819
super(DataDirectory, self).__init__(DataObjectType.directory)
@@ -41,7 +42,7 @@ def exists(self):
4142
def create(self, acl=None):
4243
'''Creates a directory, optionally include Acl argument to set permissions'''
4344
parent, name = getParentAndBase(self.path)
44-
json = { 'name': name }
45+
json = {'name': name}
4546
if acl is not None:
4647
json['acl'] = acl.to_api_param()
4748
response = self.client.postJsonHelper(DataDirectory._getUrl(parent), json, False)
@@ -90,7 +91,7 @@ def get_permissions(self):
9091
return None
9192

9293
def update_permissions(self, acl):
93-
params = {'acl':acl.to_api_param()}
94+
params = {'acl': acl.to_api_param()}
9495
response = self.client.patchHelper(self.url, params)
9596
if response.status_code != 200:
9697
raise DataApiError('Unable to update permissions: ' + response.json()['error']['message'])
@@ -102,7 +103,7 @@ def _get_directory_iterator(self, type_filter=None):
102103
while first or (marker is not None and len(marker) > 0):
103104
first = False
104105
url = self.url
105-
query_params= {}
106+
query_params = {}
106107
if marker:
107108
query_params['marker'] = marker
108109
response = self.client.getHelper(url, **query_params)
@@ -177,8 +178,17 @@ def list(self):
177178

178179
def dirs(self, content):
179180
for x in os.listdir(self.path):
180-
if os.path.isdir(self.path+'/'+x): yield x
181+
if os.path.isdir(self.path + '/' + x): yield x
181182

182183
def files(self, content):
183184
for x in os.listdir(self.path):
184-
if os.path.isfile(self.path+'/'+x): yield x
185+
if os.path.isfile(self.path + '/' + x):
186+
yield x
187+
188+
189+
class AdvancedDataDirectory(DataDirectory):
190+
def __init__(self, client, dataUrl):
191+
super(AdvancedDataDirectory, self).__init__(client, dataUrl)
192+
193+
def file(self, name, cleanup=True):
194+
return AdvancedDataFile(self.client, pathJoin(self.path, name), cleanup)

Algorithmia/datafile.py

Lines changed: 76 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from Algorithmia.util import getParentAndBase
1212
from Algorithmia.data import DataObject, DataObjectType
1313
from Algorithmia.errors import DataApiError, raiseDataApiError
14+
from io import RawIOBase
1415

1516

1617
class DataFile(DataObject):
@@ -24,7 +25,7 @@ def __init__(self, client, dataUrl):
2425
self.size = None
2526

2627
def set_attributes(self, attributes):
27-
self.last_modified = datetime.strptime(attributes['last_modified'],'%Y-%m-%dT%H:%M:%S.%fZ')
28+
self.last_modified = datetime.strptime(attributes['last_modified'], '%Y-%m-%dT%H:%M:%S.%fZ')
2829
self.size = attributes['size']
2930

3031
# Deprecated:
@@ -38,13 +39,13 @@ def getFile(self):
3839
raise DataApiError('unable to get file {} - {}'.format(self.path, error))
3940
# Make HTTP get request
4041
response = self.client.getHelper(self.url)
41-
with tempfile.NamedTemporaryFile(delete = False) as f:
42+
with tempfile.NamedTemporaryFile(delete=False) as f:
4243
for block in response.iter_content(1024):
4344
if not block:
44-
break;
45+
break
4546
f.write(block)
4647
f.flush()
47-
return open(f.name)
48+
return open(f.name)
4849

4950
def getName(self):
5051
_, name = getParentAndBase(self.path)
@@ -129,6 +130,7 @@ def putFile(self, path):
129130
raise raiseDataApiError(result)
130131
else:
131132
return self
133+
132134
def putNumpy(self, array):
133135
# Post numpy array as json payload
134136
np_loader = pkgutil.find_loader('numpy')
@@ -148,6 +150,7 @@ def delete(self):
148150
else:
149151
return True
150152

153+
151154
class LocalDataFile():
152155
def __init__(self, client, filePath):
153156
self.client = client
@@ -158,7 +161,7 @@ def __init__(self, client, filePath):
158161
self.size = None
159162

160163
def set_attributes(self, attributes):
161-
self.last_modified = datetime.strptime(attributes['last_modified'],'%Y-%m-%dT%H:%M:%S.%fZ')
164+
self.last_modified = datetime.strptime(attributes['last_modified'], '%Y-%m-%dT%H:%M:%S.%fZ')
162165
self.size = attributes['size']
163166

164167
# Get file from the data api
@@ -229,9 +232,76 @@ def delete(self):
229232
except:
230233
raise DataApiError('Failed to delete local file ' + self.path)
231234

235+
232236
def localPutHelper(path, contents):
233237
try:
234238
with open(path, 'wb') as f:
235239
f.write(contents)
236240
return dict(status='success')
237-
except Exception as e: return dict(error=str(e))
241+
except Exception as e:
242+
return dict(error=str(e))
243+
244+
245+
class AdvancedDataFile(DataFile, RawIOBase):
246+
def __init__(self, client, dataUrl, cleanup=True):
247+
super(AdvancedDataFile, self).__init__(client, dataUrl)
248+
self.cleanup = cleanup
249+
self.local_file = None
250+
251+
def __del__(self):
252+
if self.local_file:
253+
self.local_file.close()
254+
if self.cleanup:
255+
os.remove(self.local_file)
256+
257+
def readable(self):
258+
return True
259+
260+
def seekable(self):
261+
return True
262+
263+
def writable(self):
264+
return False
265+
266+
def read(self, __size=None):
267+
if not self.local_file:
268+
self.local_file = self.getFile()
269+
if __size:
270+
output = self.local_file.read(__size)
271+
else:
272+
output = self.local_file.read()
273+
return output
274+
275+
def readline(self, __size=None):
276+
if not self.local_file:
277+
self.local_file = self.getFile()
278+
with self.local_file as f:
279+
if __size:
280+
output = f.readline(__size)
281+
else:
282+
output = f.readline()
283+
return output
284+
285+
def readlines(self, __hint=None):
286+
if not self.local_file:
287+
self.local_file = self.getFile()
288+
if __hint:
289+
output = self.local_file.readlines(__hint)
290+
else:
291+
output = self.local_file.readlines()
292+
return output
293+
294+
def tell(self):
295+
if not self.local_file:
296+
self.local_file = self.getFile()
297+
output = self.local_file.tell()
298+
return output
299+
300+
def seek(self, __offset, __whence=None):
301+
if not self.local_file:
302+
self.local_file = self.getFile()
303+
if __whence:
304+
output = self.local_file.seek(__offset, __whence)
305+
else:
306+
output = self.local_file.seek(__offset)
307+
return output

Test/datafile_test.py

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
import unittest, os, uuid
88
import numpy as np
99
import Algorithmia
10-
from Algorithmia.datafile import DataFile, LocalDataFile
10+
import json
11+
from Algorithmia.datafile import DataFile, LocalDataFile, AdvancedDataFile
1112

1213
class DataFileTest(unittest.TestCase):
1314
def setUp(self):
@@ -113,5 +114,42 @@ def test_read_types(self):
113114
txt = self.client.file(self.EXISTING_FILE).getFile().read()
114115
self.assertEqual(txt, self.EXISTING_TEXT)
115116

117+
class AdvancedDataFileTest(unittest.TestCase):
118+
def setUp(self):
119+
self.client = Algorithmia.client()
120+
if not self.client.dir("data://.my/empty").exists():
121+
self.client.dir("data://.my/empty").create()
122+
123+
def test_get_nonexistant(self):
124+
try:
125+
with self.client.file('data://.my/nonexistant/nonreal') as f:
126+
_ = f.read()
127+
retrieved_file = True
128+
except Exception as e:
129+
retrieved_file = False
130+
self.assertFalse(retrieved_file)
131+
132+
def test_get_str(self):
133+
df = self.client.file('data://.my/nonexistant/nonreal', cleanup=True)
134+
try:
135+
print(df.getString())
136+
retrieved_file = True
137+
except Exception as e:
138+
retrieved_file = False
139+
self.assertFalse(retrieved_file)
140+
141+
def test_putJson_getJson(self):
142+
file = '.my/empty/test.json'
143+
df = AdvancedDataFile(self.client, 'data://' + file, cleanup=True)
144+
if sys.version_info[0] < 3:
145+
payload = {u"hello":u"world"}
146+
else:
147+
payload = {"hello": "world"}
148+
response = df.putJson(payload)
149+
self.assertEqual(response.path,file)
150+
result = json.loads(df.read())
151+
self.assertDictEqual(result, payload)
152+
self.assertEqual(str(result), str(payload))
153+
116154
if __name__ == '__main__':
117155
unittest.main()

0 commit comments

Comments
 (0)