Skip to content

Commit 116897b

Browse files
committed
Add JDBC's protocol tests as integration tests (#149)
* add JDBC's protocol tests as integration tests This commit adds the tests used by the JDBC driver as protocol tests along ODBC's integration tests. JDBC's tests are lifted from the source, the queries executed through the ODBC driver and the results converted and compared to values in the tests. * remove deprecated code - addressing PR review note (cherry picked from commit 1e214be)
1 parent 072496c commit 116897b

File tree

2 files changed

+167
-3
lines changed

2 files changed

+167
-3
lines changed

test/integration/data.py

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,11 +208,14 @@
208208
}
209209

210210

211-
ES_DATASET_BASE_URL = "https://raw.githubusercontent.com/elastic/elasticsearch/6857d305270be3d987689fda37cc84b7bc18fbb3/x-pack/plugin/sql/qa/src/main/resources/"
211+
ES_DATASET_BASE_URL = "https://raw.githubusercontent.com/elastic/elasticsearch/eda31b0ac00c952a52885902be59ac429b0ca81a/x-pack/plugin/sql/qa/src/main/resources/"
212212

213-
KIBANA_SAMPLES_BASE_URL = "https://raw.githubusercontent.com/elastic/kibana/54e498200b8b1a265becf1b27a6958e613acc3d1/src/legacy/server/sample_data/data_sets"
213+
ES_PROTO_CASE_BASE_URL = "https://raw.githubusercontent.com/elastic/elasticsearch/eda31b0ac00c952a52885902be59ac429b0ca81a/x-pack/plugin/sql/qa/src/main/java/org/elasticsearch/xpack/sql/qa/"
214+
215+
KIBANA_SAMPLES_BASE_URL = "https://raw.githubusercontent.com/elastic/kibana/3c3c9b2a154656f25e980ba3fa03d7325561c526/src/legacy/server/sample_data/data_sets"
214216
KIBANA_INDEX_PREFIX = "kibana_sample_data_"
215217

218+
216219
# python seems to slow down when operating on multiple long strings?
217220
BATCH_SIZE = 500
218221

@@ -266,6 +269,8 @@ class TestData(object):
266269
LIBRARY_INDEX = "library"
267270
EMPLOYEES_FILE = "employees.csv"
268271
EMPLOYEES_INDEX = "employees"
272+
PROTO_CASE_FILE = "SqlProtocolTestCase.java"
273+
269274

270275
ECOMMERCE_INDEX = KIBANA_INDEX_PREFIX + "ecommerce"
271276
FLIGHTS_INDEX = KIBANA_INDEX_PREFIX + "flights"
@@ -500,6 +505,34 @@ def _load_kibana_sample(self, index_name):
500505
self._put_sample_template(sample_name, index_name)
501506
self._index_sample_data(sample_name, index_name)
502507

508+
def _load_proto_tests(self):
509+
print("Loading SQL proto tests")
510+
if self._offline_dir:
511+
path = os.path.join(self._offline_dir, self.PROTO_CASE_FILE)
512+
with open(path) as f:
513+
case_src = f.read()
514+
else:
515+
url = ES_PROTO_CASE_BASE_URL + "/" + self.PROTO_CASE_FILE
516+
req = requests.get(url, timeout=Elasticsearch.REQ_TIMEOUT)
517+
if req.status_code != 200:
518+
raise Exception("failed to fetch %s with code %s" % (url, req.status_code))
519+
case_src = req.text
520+
521+
tests = re.findall("^\s+assertQuery\((\"[^;]*)\);", case_src, re.ASCII|re.DOTALL|re.MULTILINE)
522+
tests = [re.sub("\n\s*", "", x) for x in tests]
523+
# use a CSV reader to deal with commas within SQL statements
524+
creader = csv.reader(tests)
525+
self._proto_tests = []
526+
for t in creader:
527+
t = [x.strip('" ') for x in t]
528+
529+
assert(5 <= len(t) <= 6)
530+
if len(t) == 5:
531+
(query, col_name, data_type, data_val, disp_size) = t
532+
cli_val = data_val
533+
else:
534+
(query, col_name, data_type, data_val, cli_val, disp_size) = t
535+
self._proto_tests.append((query, col_name, data_type, data_val, cli_val, disp_size))
503536

504537
def load(self):
505538
self._load_tableau_sample(self.CALCS_FILE, self.CALCS_INDEX, CALCS_TEMPLATE, CALCS_PIPELINE)
@@ -513,10 +546,15 @@ def load(self):
513546
self._load_kibana_sample(self.FLIGHTS_INDEX)
514547
self._load_kibana_sample(self.LOGS_INDEX)
515548

549+
self._load_proto_tests()
550+
516551
print("Data %s." % ("meta-processed" if self._mode == self.MODE_NOINDEX else "reindexed" if self._mode == \
517552
self.MODE_REINDEX else "indexed"))
518553

519554
def csv_attributes(self, csv_name):
520555
return (self._csv_md5[csv_name], self._csv_header[csv_name], self._csv_lines[csv_name])
521556

557+
def proto_tests(self):
558+
return self._proto_tests
559+
522560
# vim: set noet fenc=utf-8 ff=dos sts=0 sw=4 ts=4 tw=118 :

test/integration/testing.py

Lines changed: 127 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,17 @@
88
import datetime
99
import hashlib
1010
import unittest
11+
import re
12+
import struct
13+
import ctypes
1114

1215
from elasticsearch import Elasticsearch
1316
from data import TestData, BATTERS_TEMPLATE
1417

1518
UID = "elastic"
1619
CONNECT_STRING = 'Driver={Elasticsearch Driver};UID=%s;PWD=%s;Secure=0;' % (UID, Elasticsearch.AUTH_PASSWORD)
17-
CATALOG = "elasticsearch"
20+
CATALOG = "elasticsearch" # nightly built
21+
#CATALOG = "distribution_run" # source built
1822

1923
class Testing(unittest.TestCase):
2024

@@ -152,6 +156,126 @@ def _catalog_columns(self, use_catalog=False, use_surrogate=True):
152156
cols_expect.sort()
153157
self.assertEqual(cols_have, cols_expect)
154158

159+
160+
# pyodbc doesn't support INTERVAL types; when installing an "output converter", it asks the ODBC driver for the
161+
# binary format and currently, this is the same as a wchar_t string for INTERVALs.
162+
# Also, just return None for data type 0 -- NULL
163+
def _install_output_converters(self, cnxn):
164+
wchar_sz = ctypes.sizeof(ctypes.c_wchar)
165+
if wchar_sz == ctypes.sizeof(ctypes.c_ushort):
166+
unit = "H"
167+
elif wchar_sz == ctypes.sizeof(ctypes.c_uint32):
168+
unit = "I"
169+
else:
170+
raise Exception("unsupported wchar_t size")
171+
172+
# wchar_t to python string
173+
def _convert_interval(value):
174+
cnt = len(value)
175+
assert(cnt % wchar_sz == 0)
176+
cnt //= wchar_sz
177+
ret = ""
178+
fmt = "=" + str(cnt) + unit
179+
for c in struct.unpack(fmt, value):
180+
ret += chr(c)
181+
return ret
182+
183+
for x in range(101, 114): # INTERVAL types IDs
184+
cnxn.add_output_converter(x, _convert_interval)
185+
186+
def _convert_null(value):
187+
return None
188+
cnxn.add_output_converter(0, _convert_null) # NULL type ID
189+
190+
# produce an instance of the 'data_type' out of the 'data_val' string
191+
def _type_to_instance(self, data_type, data_val):
192+
# Change the value read in the tests to type and format of the result expected to be
193+
# returned by driver.
194+
if data_type == "null":
195+
instance = None
196+
elif data_type.startswith("bool"):
197+
instance = data_val.lower() == "true"
198+
elif data_type in ["byte", "short", "integer"]:
199+
instance = int(data_val)
200+
elif data_type == "long":
201+
instance = int(data_val.strip("lL"))
202+
elif data_type == "double":
203+
instance = float(data_val)
204+
elif data_type == "float":
205+
instance = float(data_val.strip("fF"))
206+
elif data_type in ["datetime", "date", "time"]:
207+
fmt = "%H:%M:%S"
208+
fmt = "%Y-%m-%dT" + fmt
209+
# no explicit second with microseconds directive??
210+
if "." in data_val:
211+
fmt += ".%f"
212+
# always specify the timezone so that local-to-UTC conversion can take place
213+
fmt += "%z"
214+
val = data_val
215+
if data_type == "time":
216+
# parse Time as a Datetime, since some tests uses the ES/SQL-specific
217+
# Time-with-timezone which then needs converting to UTC (as the driver does).
218+
# and this conversion won't work for strptime()'ed Time values, as this uses
219+
# year 1900, not UTC convertible.
220+
val = "1970-02-02T" + val
221+
# strptime() won't recognize Z as Zulu/UTC
222+
val = val.replace("Z", "+00:00")
223+
instance = datetime.datetime.strptime(val, fmt)
224+
# if local time is provided, change it to UTC (as the driver does)
225+
try:
226+
timestamp = instance.timestamp()
227+
if data_type != "datetime":
228+
# The microsecond component only makes sense with Timestamp/Datetime with
229+
# ODBC (the TIME_STRUCT lacks a fractional second field).
230+
timestamp = int(timestamp)
231+
instance = instance.utcfromtimestamp(timestamp)
232+
except OSError:
233+
# The value can't be UTC converted, since the test uses Datetime years before
234+
# 1970 => convert it to timestamp w/o timezone.
235+
instance = datetime.datetime(instance.year, instance.month, instance.day,
236+
instance.hour, instance.minute, instance.second, instance.microsecond)
237+
238+
if data_type == "date":
239+
instance = instance.date()
240+
elif data_type == "time":
241+
instance = instance.time()
242+
else:
243+
instance = data_val
244+
245+
return instance
246+
247+
def _proto_tests(self):
248+
tests = self._data.proto_tests()
249+
with pyodbc.connect(self._dsn) as cnxn:
250+
cnxn.autocommit = True
251+
self._install_output_converters(cnxn)
252+
try:
253+
for t in tests:
254+
(query, col_name, data_type, data_val, cli_val, disp_size) = t
255+
# print("T: %s, %s, %s, %s, %s, %s" % (query, col_name, data_type, data_val, cli_val, disp_size))
256+
with cnxn.execute(query) as curs:
257+
self.assertEqual(curs.rowcount, 1)
258+
res = curs.fetchone()[0]
259+
260+
if data_val != cli_val: # INTERVAL tests
261+
assert(query.lower().startswith("select interval"))
262+
# extract the literal value (`INTERVAL -'1 1' -> `-1 1``)
263+
expect = re.match("[^-]*(-?\s*'[^']*').*", query).groups()[0]
264+
expect = expect.replace("'", "")
265+
# filter out tests with fractional seconds:
266+
# https://github.com/elastic/elasticsearch/issues/41635
267+
if re.search("\d*\.\d+", expect):
268+
continue
269+
else: # non-INTERVAL tests
270+
assert(data_type.lower() == data_type)
271+
# Change the value read in the tests to type and format of the result expected to be
272+
# returned by driver.
273+
expect = self._type_to_instance(data_type, data_val)
274+
275+
self.assertEqual(res, expect)
276+
finally:
277+
cnxn.clear_output_converters()
278+
155279
def perform(self):
156280
self._check_info(pyodbc.SQL_USER_NAME, UID)
157281
self._check_info(pyodbc.SQL_DATABASE_NAME, CATALOG)
@@ -175,6 +299,8 @@ def perform(self):
175299
self._select_columns(TestData.ECOMMERCE_INDEX, "*")
176300
self._select_columns(TestData.LOGS_INDEX, "*")
177301

302+
self._proto_tests()
303+
178304
print("Tests successful.")
179305

180306
# vim: set noet fenc=utf-8 ff=dos sts=0 sw=4 ts=4 tw=118 :

0 commit comments

Comments
 (0)