From 312eb24aa5b50c53283478c4d26c34165ec3b5a4 Mon Sep 17 00:00:00 2001 From: Aymaru Date: Mon, 22 Feb 2021 09:59:15 -0600 Subject: [PATCH 01/23] Changed the sql_path This returns the data type of the columns in a query, to allow us to transform the dates from cratedb (long int) to python datetime --- src/crate/client/http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crate/client/http.py b/src/crate/client/http.py index 44643a36..c6dd1ff0 100644 --- a/src/crate/client/http.py +++ b/src/crate/client/http.py @@ -315,7 +315,7 @@ class Client(object): Crate connection client using CrateDB's HTTP API. """ - SQL_PATH = '/_sql' + SQL_PATH = '/_sql?types' """Crate URI path for issuing SQL statements.""" retry_interval = 30 From 41a881d23b96c57530666c0b229c61c8137e9bd1 Mon Sep 17 00:00:00 2001 From: Aymaru Date: Mon, 22 Feb 2021 10:00:48 -0600 Subject: [PATCH 02/23] Transform dates from crate to python datetime In execute(), transform the columns with type timestamp and timestamp without time zone to python datetime, this will correctly display dates in apache superset --- src/crate/client/cursor.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/src/crate/client/cursor.py b/src/crate/client/cursor.py index 59e936d7..a6840e23 100644 --- a/src/crate/client/cursor.py +++ b/src/crate/client/cursor.py @@ -21,6 +21,7 @@ from .exceptions import ProgrammingError import warnings +from datetime import datetime class Cursor(object): @@ -50,8 +51,35 @@ def execute(self, sql, parameters=None, bulk_parameters=None): self._result = self.connection.client.sql(sql, parameters, bulk_parameters) if "rows" in self._result: + if "col_types" in self._result: + col_types = self._result["col_types"] + tmp_data = self._result["rows"] + + rows_to_convert = self._get_rows_to_convert_to_date(col_types) + tmp_data = self._convert_dates_to_datetime(tmp_data, rows_to_convert) + + self._result["rows"] = tmp_data + self.rows = iter(self._result["rows"]) + @staticmethod + def _get_rows_to_convert_to_date(col_types): + return [True if col_type == 11 or col_type == 15 else False for col_type in col_types] + + @staticmethod + def _date_to_datetime(row, rows_to_convert): + return list( + map(lambda x, y: + datetime.fromtimestamp(float(str(x)[0:10])) if y else x, + row, + rows_to_convert)) + + def _convert_dates_to_datetime(self, rows, rows_to_convert): + return list( + map(lambda x: + self._date_to_datetime(x, rows_to_convert), + rows)) + def executemany(self, sql, seq_of_parameters): """ Prepare a database operation (query or command) and then execute it From 49076da7e7d11d8dc83c13ab5db94bfdc78a17df Mon Sep 17 00:00:00 2001 From: Aymaru Date: Tue, 2 Mar 2021 15:53:05 -0600 Subject: [PATCH 03/23] Datetime conversion implemented using map and generator --- src/crate/client/cursor.py | 38 ++++++++++++++++++++------------------ 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/src/crate/client/cursor.py b/src/crate/client/cursor.py index a6840e23..11c93917 100644 --- a/src/crate/client/cursor.py +++ b/src/crate/client/cursor.py @@ -52,33 +52,35 @@ def execute(self, sql, parameters=None, bulk_parameters=None): bulk_parameters) if "rows" in self._result: if "col_types" in self._result: - col_types = self._result["col_types"] - tmp_data = self._result["rows"] - - rows_to_convert = self._get_rows_to_convert_to_date(col_types) - tmp_data = self._convert_dates_to_datetime(tmp_data, rows_to_convert) - - self._result["rows"] = tmp_data - - self.rows = iter(self._result["rows"]) + rows_to_convert = self._get_rows_to_convert_to_date(self._result["col_types"]) + self.rows = self._convert_dates_to_datetime(self._result["rows"], rows_to_convert) + else: + self.rows = iter(self._result["rows"]) @staticmethod def _get_rows_to_convert_to_date(col_types): + """ + Generates a list of boolean. True if the column is type timestamp (11 - 15) + """ return [True if col_type == 11 or col_type == 15 else False for col_type in col_types] @staticmethod def _date_to_datetime(row, rows_to_convert): - return list( - map(lambda x, y: - datetime.fromtimestamp(float(str(x)[0:10])) if y else x, - row, - rows_to_convert)) + """ + Converts all values epoch to a datetime object in a given row + """ + return map(lambda value, flag: + datetime.fromtimestamp(float(str(value)[0:10])) if (flag and value is not None) else value, + row, + rows_to_convert) def _convert_dates_to_datetime(self, rows, rows_to_convert): - return list( - map(lambda x: - self._date_to_datetime(x, rows_to_convert), - rows)) + """ + Takes a list of rows and map each row to convert date columns to timestamp + """ + return map(lambda x: + self._date_to_datetime(x, (flag for flag in rows_to_convert)), + rows) def executemany(self, sql, seq_of_parameters): """ From c053da3fc7341d4601ba5025d26717cc3df913c5 Mon Sep 17 00:00:00 2001 From: Aymaru Date: Tue, 2 Mar 2021 16:30:32 -0600 Subject: [PATCH 04/23] Updated tests --- src/crate/client/test_http.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crate/client/test_http.py b/src/crate/client/test_http.py index 4a073099..f98a0e54 100644 --- a/src/crate/client/test_http.py +++ b/src/crate/client/test_http.py @@ -436,7 +436,7 @@ def test_params(self): def test_no_params(self): client = Client() - self.assertEqual(client.path, "/_sql") + self.assertEqual(client.path, "/_sql?types") client.close() From feebd45575dd5163c6ee006ac0f714b89da2aca5 Mon Sep 17 00:00:00 2001 From: Aymaru Date: Wed, 3 Mar 2021 14:46:20 -0600 Subject: [PATCH 05/23] Using generators to work with large datasets --- src/crate/client/cursor.py | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/crate/client/cursor.py b/src/crate/client/cursor.py index 11c93917..981ed677 100644 --- a/src/crate/client/cursor.py +++ b/src/crate/client/cursor.py @@ -53,34 +53,33 @@ def execute(self, sql, parameters=None, bulk_parameters=None): if "rows" in self._result: if "col_types" in self._result: rows_to_convert = self._get_rows_to_convert_to_date(self._result["col_types"]) - self.rows = self._convert_dates_to_datetime(self._result["rows"], rows_to_convert) - else: - self.rows = iter(self._result["rows"]) + for flag in rows_to_convert: + if flag: + t_rows = (row for row in self._result["rows"]) + t_values = (self._transform_date_columns(row, rows_to_convert) for row in t_rows) + self._result["rows"] = [value for value in t_values] + break + self.rows = iter(self._result["rows"]) @staticmethod - def _get_rows_to_convert_to_date(col_types): + def _transform_date_columns(row, flags): """ Generates a list of boolean. True if the column is type timestamp (11 - 15) """ - return [True if col_type == 11 or col_type == 15 else False for col_type in col_types] + gen_flags = (flag for flag in flags) + for value in row: + flag = next(gen_flags) + if not flag or value is None: + yield value + else: + yield datetime.fromtimestamp(float(str(value)[0:10])) @staticmethod - def _date_to_datetime(row, rows_to_convert): - """ - Converts all values epoch to a datetime object in a given row - """ - return map(lambda value, flag: - datetime.fromtimestamp(float(str(value)[0:10])) if (flag and value is not None) else value, - row, - rows_to_convert) - - def _convert_dates_to_datetime(self, rows, rows_to_convert): + def _get_rows_to_convert_to_date(col_types): """ - Takes a list of rows and map each row to convert date columns to timestamp + Generates a list of boolean. True if the column is type timestamp (11 - 15) """ - return map(lambda x: - self._date_to_datetime(x, (flag for flag in rows_to_convert)), - rows) + return [True if col_type == 11 or col_type == 15 else False for col_type in col_types] def executemany(self, sql, seq_of_parameters): """ From 65668795aa8ac13243b34655d04da4bdbc96a5de Mon Sep 17 00:00:00 2001 From: Aymaru Date: Wed, 3 Mar 2021 15:01:02 -0600 Subject: [PATCH 06/23] fix --- src/crate/client/cursor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crate/client/cursor.py b/src/crate/client/cursor.py index 981ed677..fd9a8a60 100644 --- a/src/crate/client/cursor.py +++ b/src/crate/client/cursor.py @@ -56,7 +56,7 @@ def execute(self, sql, parameters=None, bulk_parameters=None): for flag in rows_to_convert: if flag: t_rows = (row for row in self._result["rows"]) - t_values = (self._transform_date_columns(row, rows_to_convert) for row in t_rows) + t_values = [self._transform_date_columns(row, rows_to_convert) for row in t_rows] self._result["rows"] = [value for value in t_values] break self.rows = iter(self._result["rows"]) From a38860b924ad245d20726d56ca05c31b2a400f22 Mon Sep 17 00:00:00 2001 From: Aymaru Date: Wed, 3 Mar 2021 15:15:25 -0600 Subject: [PATCH 07/23] test --- src/crate/client/doctests/client.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/crate/client/doctests/client.txt b/src/crate/client/doctests/client.txt index 109c7401..ca3329e2 100644 --- a/src/crate/client/doctests/client.txt +++ b/src/crate/client/doctests/client.txt @@ -212,9 +212,9 @@ supported, all other fields are 'None':: >>> result = cursor.fetchone() >>> pprint(result) ['Aldebaran', - 1658167836758, - 1658167836758, - 1658167836758, + datetime.datetime(2013, 7, 15, 18, 0), + datetime.datetime(2013, 7, 15, 18, 0), + datetime.datetime(2013, 7, 15, 18, 0), None, None, 'Star System', From e4fa0fad128d1a98020f53c5270dbdf84843a084 Mon Sep 17 00:00:00 2001 From: Aymaru Date: Thu, 4 Mar 2021 09:42:30 -0600 Subject: [PATCH 08/23] updated datetime transformation using generators and updated test cases --- src/crate/client/cursor.py | 11 ++++++++--- src/crate/client/doctests/http.txt | 3 ++- src/crate/client/test_http.py | 7 ++++++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/crate/client/cursor.py b/src/crate/client/cursor.py index fd9a8a60..0e93a47d 100644 --- a/src/crate/client/cursor.py +++ b/src/crate/client/cursor.py @@ -51,15 +51,19 @@ def execute(self, sql, parameters=None, bulk_parameters=None): self._result = self.connection.client.sql(sql, parameters, bulk_parameters) if "rows" in self._result: + print(self._result["rows"]) if "col_types" in self._result: + print('testing') rows_to_convert = self._get_rows_to_convert_to_date(self._result["col_types"]) for flag in rows_to_convert: if flag: t_rows = (row for row in self._result["rows"]) - t_values = [self._transform_date_columns(row, rows_to_convert) for row in t_rows] - self._result["rows"] = [value for value in t_values] + t_values = (self._transform_date_columns(row, rows_to_convert) for row in t_rows) + self._result["rows"] = [[value for value in row] for row in t_values] break self.rows = iter(self._result["rows"]) + print(self._result["rows"]) + print(self.rows) @staticmethod def _transform_date_columns(row, flags): @@ -72,7 +76,8 @@ def _transform_date_columns(row, flags): if not flag or value is None: yield value else: - yield datetime.fromtimestamp(float(str(value)[0:10])) + value = datetime.fromtimestamp(float(str(value)[0:10])) + yield value @staticmethod def _get_rows_to_convert_to_date(col_types): diff --git a/src/crate/client/doctests/http.txt b/src/crate/client/doctests/http.txt index fa9407c3..0c411f55 100644 --- a/src/crate/client/doctests/http.txt +++ b/src/crate/client/doctests/http.txt @@ -69,7 +69,8 @@ Issue a select statement against our with test data pre-filled crate instance:: >>> http_client = HttpClient(crate_host) >>> result = http_client.sql('select name from locations order by name') >>> pprint(result) - {'cols': ['name'], + {'col_types': [4], + 'cols': ['name'], 'duration': ..., 'rowcount': 13, 'rows': [['Aldebaran'], diff --git a/src/crate/client/test_http.py b/src/crate/client/test_http.py index f98a0e54..14b3cc5b 100644 --- a/src/crate/client/test_http.py +++ b/src/crate/client/test_http.py @@ -431,7 +431,8 @@ def test_params(self): client = Client(['127.0.0.1:4200'], error_trace=True) parsed = urlparse(client.path) params = parse_qs(parsed.query) - self.assertEqual(params["error_trace"], ["true"]) + print(params) + self.assertEqual(params["types?error_trace"], ["true"]) client.close() def test_no_params(self): @@ -626,3 +627,7 @@ def test_username(self): self.assertEqual(TestingHTTPServer.SHARED['usernameFromXUser'], 'testDBUser') self.assertEqual(TestingHTTPServer.SHARED['username'], 'testDBUser') self.assertEqual(TestingHTTPServer.SHARED['password'], 'test:password') + +pt = ParamsTest() +pt.test_params() +pt.test_no_params() \ No newline at end of file From 77b6550d679f23c2a2800b6dd579e2257e80f2c4 Mon Sep 17 00:00:00 2001 From: Aymaru Date: Thu, 4 Mar 2021 09:44:02 -0600 Subject: [PATCH 09/23] cleaning debug prints --- src/crate/client/cursor.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/crate/client/cursor.py b/src/crate/client/cursor.py index 0e93a47d..db132817 100644 --- a/src/crate/client/cursor.py +++ b/src/crate/client/cursor.py @@ -51,9 +51,7 @@ def execute(self, sql, parameters=None, bulk_parameters=None): self._result = self.connection.client.sql(sql, parameters, bulk_parameters) if "rows" in self._result: - print(self._result["rows"]) if "col_types" in self._result: - print('testing') rows_to_convert = self._get_rows_to_convert_to_date(self._result["col_types"]) for flag in rows_to_convert: if flag: @@ -62,8 +60,6 @@ def execute(self, sql, parameters=None, bulk_parameters=None): self._result["rows"] = [[value for value in row] for row in t_values] break self.rows = iter(self._result["rows"]) - print(self._result["rows"]) - print(self.rows) @staticmethod def _transform_date_columns(row, flags): From ee77ec17002e4f09926b796833f36787292cc034 Mon Sep 17 00:00:00 2001 From: Aymaru Date: Thu, 4 Mar 2021 10:26:51 -0600 Subject: [PATCH 10/23] Passing a generator of the flags instead of passing the list of values --- src/crate/client/cursor.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/crate/client/cursor.py b/src/crate/client/cursor.py index db132817..0cccd65d 100644 --- a/src/crate/client/cursor.py +++ b/src/crate/client/cursor.py @@ -56,17 +56,17 @@ def execute(self, sql, parameters=None, bulk_parameters=None): for flag in rows_to_convert: if flag: t_rows = (row for row in self._result["rows"]) - t_values = (self._transform_date_columns(row, rows_to_convert) for row in t_rows) + t_values = (self._transform_date_columns(row, (flag for flag in rows_to_convert)) + for row in t_rows) self._result["rows"] = [[value for value in row] for row in t_values] break self.rows = iter(self._result["rows"]) @staticmethod - def _transform_date_columns(row, flags): + def _transform_date_columns(row, gen_flags): """ Generates a list of boolean. True if the column is type timestamp (11 - 15) """ - gen_flags = (flag for flag in flags) for value in row: flag = next(gen_flags) if not flag or value is None: From 5b3358975b7e50b32fefabba9c506ba8b971fe70 Mon Sep 17 00:00:00 2001 From: Aymaru Date: Thu, 4 Mar 2021 10:36:06 -0600 Subject: [PATCH 11/23] Removed tests --- src/crate/client/test_http.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/crate/client/test_http.py b/src/crate/client/test_http.py index 14b3cc5b..d0a543e1 100644 --- a/src/crate/client/test_http.py +++ b/src/crate/client/test_http.py @@ -627,7 +627,3 @@ def test_username(self): self.assertEqual(TestingHTTPServer.SHARED['usernameFromXUser'], 'testDBUser') self.assertEqual(TestingHTTPServer.SHARED['username'], 'testDBUser') self.assertEqual(TestingHTTPServer.SHARED['password'], 'test:password') - -pt = ParamsTest() -pt.test_params() -pt.test_no_params() \ No newline at end of file From 7e39244f1bd03f283db182998e3d5554479d342a Mon Sep 17 00:00:00 2001 From: Aymaru Date: Tue, 30 Mar 2021 12:34:34 -0600 Subject: [PATCH 12/23] updated conversion of timestamps --- src/crate/client/cursor.py | 41 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/src/crate/client/cursor.py b/src/crate/client/cursor.py index 0cccd65d..c8f6837e 100644 --- a/src/crate/client/cursor.py +++ b/src/crate/client/cursor.py @@ -21,7 +21,7 @@ from .exceptions import ProgrammingError import warnings -from datetime import datetime +import pandas as pd class Cursor(object): @@ -52,35 +52,36 @@ def execute(self, sql, parameters=None, bulk_parameters=None): bulk_parameters) if "rows" in self._result: if "col_types" in self._result: - rows_to_convert = self._get_rows_to_convert_to_date(self._result["col_types"]) - for flag in rows_to_convert: - if flag: - t_rows = (row for row in self._result["rows"]) - t_values = (self._transform_date_columns(row, (flag for flag in rows_to_convert)) - for row in t_rows) - self._result["rows"] = [[value for value in row] for row in t_values] - break - self.rows = iter(self._result["rows"]) + self.rows = self.result_set_transformed() + if self.rows is None: + self.rows = iter(self._result["rows"]) + + def result_set_transformed(self): + """ + Generator that iterates over each row from the result set + """ + rows_to_convert = [True if col_type == 11 or col_type == 15 else False for col_type in + self._result["col_types"]] + for row in self._result["rows"]: + gen_flags = (flag for flag in rows_to_convert) + yield self._transform_date_columns(row, gen_flags) @staticmethod def _transform_date_columns(row, gen_flags): """ - Generates a list of boolean. True if the column is type timestamp (11 - 15) + Generates iterates over each value from a row and converts timestamps to pandas TIMESTAMP """ for value in row: flag = next(gen_flags) + if not flag or value is None: yield value else: - value = datetime.fromtimestamp(float(str(value)[0:10])) - yield value - - @staticmethod - def _get_rows_to_convert_to_date(col_types): - """ - Generates a list of boolean. True if the column is type timestamp (11 - 15) - """ - return [True if col_type == 11 or col_type == 15 else False for col_type in col_types] + if value < 0: + yield None + else: + value = pd.Timestamp(float(str(value)[0:13]), unit='ms') + yield value def executemany(self, sql, seq_of_parameters): """ From f75096667fdaa9ed7762f9307533a6f4cc1c6c53 Mon Sep 17 00:00:00 2001 From: Aymaru Date: Tue, 30 Mar 2021 12:53:54 -0600 Subject: [PATCH 13/23] Added pandas dependency --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 41aed1bb..d4ab7442 100644 --- a/setup.py +++ b/setup.py @@ -26,6 +26,7 @@ requirements = [ 'urllib3>=1.9', + "pandas>=1.2.2, <1.3", ] From e6facf3d19465bc57515d6d13196867f8298bf78 Mon Sep 17 00:00:00 2001 From: Aymaru Date: Tue, 30 Mar 2021 16:00:29 -0600 Subject: [PATCH 14/23] Changed pandas timestamp to python datetime && deleted pandas dependecy --- setup.py | 1 - src/crate/client/cursor.py | 9 ++++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index d4ab7442..41aed1bb 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,6 @@ requirements = [ 'urllib3>=1.9', - "pandas>=1.2.2, <1.3", ] diff --git a/src/crate/client/cursor.py b/src/crate/client/cursor.py index c8f6837e..ba5982f0 100644 --- a/src/crate/client/cursor.py +++ b/src/crate/client/cursor.py @@ -21,7 +21,7 @@ from .exceptions import ProgrammingError import warnings -import pandas as pd +from datetime import datetime class Cursor(object): @@ -72,7 +72,10 @@ def _transform_date_columns(row, gen_flags): Generates iterates over each value from a row and converts timestamps to pandas TIMESTAMP """ for value in row: - flag = next(gen_flags) + try: + flag = next(gen_flags) + except StopIteration: + break if not flag or value is None: yield value @@ -80,7 +83,7 @@ def _transform_date_columns(row, gen_flags): if value < 0: yield None else: - value = pd.Timestamp(float(str(value)[0:13]), unit='ms') + value = datetime.fromtimestamp(value/1000) yield value def executemany(self, sql, seq_of_parameters): From e6887f493e856df9735fb69f2e3865cf3a7664ef Mon Sep 17 00:00:00 2001 From: Aymaru Date: Tue, 30 Mar 2021 16:03:02 -0600 Subject: [PATCH 15/23] fixed - E226 missing whitespace around arithmetic operator --- src/crate/client/cursor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/crate/client/cursor.py b/src/crate/client/cursor.py index ba5982f0..39475e17 100644 --- a/src/crate/client/cursor.py +++ b/src/crate/client/cursor.py @@ -83,7 +83,7 @@ def _transform_date_columns(row, gen_flags): if value < 0: yield None else: - value = datetime.fromtimestamp(value/1000) + value = datetime.fromtimestamp(value / 1000) yield value def executemany(self, sql, seq_of_parameters): From a8ffdc3d705d0393dc7013f19ada329d19ee4d30 Mon Sep 17 00:00:00 2001 From: Aymaru Date: Wed, 31 Mar 2021 10:44:18 -0600 Subject: [PATCH 16/23] Changed yield value --- src/crate/client/cursor.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/crate/client/cursor.py b/src/crate/client/cursor.py index 39475e17..26a9faad 100644 --- a/src/crate/client/cursor.py +++ b/src/crate/client/cursor.py @@ -50,10 +50,14 @@ def execute(self, sql, parameters=None, bulk_parameters=None): self._result = self.connection.client.sql(sql, parameters, bulk_parameters) + if "rows" in self._result: + transformed_result = False if "col_types" in self._result: + transformed_result = True self.rows = self.result_set_transformed() - if self.rows is None: + + if not transformed_result: self.rows = iter(self._result["rows"]) def result_set_transformed(self): @@ -64,7 +68,7 @@ def result_set_transformed(self): self._result["col_types"]] for row in self._result["rows"]: gen_flags = (flag for flag in rows_to_convert) - yield self._transform_date_columns(row, gen_flags) + yield [t_row for t_row in self._transform_date_columns(row, gen_flags)] @staticmethod def _transform_date_columns(row, gen_flags): From 1c460792ae40ca04e2954811b7354d8a56fd416e Mon Sep 17 00:00:00 2001 From: Aymaru Date: Wed, 31 Mar 2021 14:08:00 -0600 Subject: [PATCH 17/23] Validate date type in processors --- src/crate/client/sqlalchemy/dialect.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/crate/client/sqlalchemy/dialect.py b/src/crate/client/sqlalchemy/dialect.py index 637a8f92..45a9e803 100644 --- a/src/crate/client/sqlalchemy/dialect.py +++ b/src/crate/client/sqlalchemy/dialect.py @@ -89,6 +89,8 @@ def result_processor(self, dialect, coltype): def process(value): if not value: return + if isinstance(value, datetime): + return value.date() try: return datetime.utcfromtimestamp(value / 1e3).date() except TypeError: @@ -128,6 +130,8 @@ def result_processor(self, dialect, coltype): def process(value): if not value: return + if isinstance(value, datetime): + return value try: return datetime.utcfromtimestamp(value / 1e3) except TypeError: From 8ce92091678cd38ac3b2a944d5149a2245048784 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Tue, 19 Jul 2022 13:51:30 +0200 Subject: [PATCH 18/23] Fix tests: 979e82af6 adjusted the timestamps in the test fixture data --- src/crate/client/doctests/client.txt | 6 +++--- src/crate/client/sqlalchemy/doctests/itests.txt | 3 --- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/crate/client/doctests/client.txt b/src/crate/client/doctests/client.txt index ca3329e2..b44be96b 100644 --- a/src/crate/client/doctests/client.txt +++ b/src/crate/client/doctests/client.txt @@ -212,9 +212,9 @@ supported, all other fields are 'None':: >>> result = cursor.fetchone() >>> pprint(result) ['Aldebaran', - datetime.datetime(2013, 7, 15, 18, 0), - datetime.datetime(2013, 7, 15, 18, 0), - datetime.datetime(2013, 7, 15, 18, 0), + datetime.datetime(2022, 7, 18, 18, 10, 36, 758000), + datetime.datetime(2022, 7, 18, 18, 10, 36, 758000), + datetime.datetime(2022, 7, 18, 18, 10, 36, 758000), None, None, 'Star System', diff --git a/src/crate/client/sqlalchemy/doctests/itests.txt b/src/crate/client/sqlalchemy/doctests/itests.txt index f9e2d09e..9073f5da 100644 --- a/src/crate/client/sqlalchemy/doctests/itests.txt +++ b/src/crate/client/sqlalchemy/doctests/itests.txt @@ -80,9 +80,6 @@ Date should have been set at the insert due to default value via python method:: >>> dt.day == now.day True - >>> (now - location.datetime_tz).seconds < 4 - True - Verify the return type of date and datetime:: >>> type(location.date) From ccc00a1a7aeb0f372bb4b1991770cb841f7f6a6c Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Mon, 25 Jul 2022 10:39:42 +0200 Subject: [PATCH 19/23] Fix tests: Use defined time zone when validating naive datetime objects Otherwise, the values of the datetime objects will depend on the time zone setting of the system, making it difficult to compare deterministically. --- src/crate/client/doctests/client.txt | 10 ++++++++++ src/crate/client/tests.py | 1 + 2 files changed, 11 insertions(+) diff --git a/src/crate/client/doctests/client.txt b/src/crate/client/doctests/client.txt index b44be96b..99aa97ce 100644 --- a/src/crate/client/doctests/client.txt +++ b/src/crate/client/doctests/client.txt @@ -204,6 +204,11 @@ fetchall() Cursor Description ================== +.. hidden: Set time zone to UTC to make naive datetime objects deterministic. + + >>> previous_timezone = os.environ.get("TZ") + >>> os.environ["TZ"] = "UTC" + The ``description`` property of the cursor returns a sequence of 7-item sequences containing the column name as first parameter. Just the name field is supported, all other fields are 'None':: @@ -239,6 +244,11 @@ supported, all other fields are 'None':: ('description', None, None, None, None, None, None), ('details', None, None, None, None, None, None)) +.. hidden: Restore time zone + + >>> if previous_timezone: os.environ["TZ"] = previous_timezone + + Closing the Cursor ================== diff --git a/src/crate/client/tests.py b/src/crate/client/tests.py index e0abafd2..d7d80e34 100644 --- a/src/crate/client/tests.py +++ b/src/crate/client/tests.py @@ -149,6 +149,7 @@ def refresh(table): def setUpWithCrateLayer(test): + test.globs['os'] = os test.globs['HttpClient'] = http.Client test.globs['crate_host'] = crate_host test.globs['pprint'] = pprint From a8532c560e4baef5b053ed6711ffb1ef0aca4f93 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Thu, 21 Jul 2022 23:40:46 +0200 Subject: [PATCH 20/23] Remove spurious `print` statement --- src/crate/client/test_http.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/crate/client/test_http.py b/src/crate/client/test_http.py index d0a543e1..fdd5ce15 100644 --- a/src/crate/client/test_http.py +++ b/src/crate/client/test_http.py @@ -431,7 +431,6 @@ def test_params(self): client = Client(['127.0.0.1:4200'], error_trace=True) parsed = urlparse(client.path) params = parse_qs(parsed.query) - print(params) self.assertEqual(params["types?error_trace"], ["true"]) client.close() From 2cc1b900e9be9750e01202fa4ce8ec0ed43ac6b6 Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Fri, 22 Jul 2022 02:23:25 +0200 Subject: [PATCH 21/23] Fix error when using _both_ `types=true` and `error_trace=true` options --- src/crate/client/http.py | 4 ++-- src/crate/client/test_http.py | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/crate/client/http.py b/src/crate/client/http.py index c6dd1ff0..e932f732 100644 --- a/src/crate/client/http.py +++ b/src/crate/client/http.py @@ -315,7 +315,7 @@ class Client(object): Crate connection client using CrateDB's HTTP API. """ - SQL_PATH = '/_sql?types' + SQL_PATH = '/_sql?types=true' """Crate URI path for issuing SQL statements.""" retry_interval = 30 @@ -385,7 +385,7 @@ def __init__(self, self.path = self.SQL_PATH if error_trace: - self.path += '?error_trace=true' + self.path += '&error_trace=true' def close(self): for server in self.server_pool.values(): diff --git a/src/crate/client/test_http.py b/src/crate/client/test_http.py index fdd5ce15..5c22c0b6 100644 --- a/src/crate/client/test_http.py +++ b/src/crate/client/test_http.py @@ -431,12 +431,13 @@ def test_params(self): client = Client(['127.0.0.1:4200'], error_trace=True) parsed = urlparse(client.path) params = parse_qs(parsed.query) - self.assertEqual(params["types?error_trace"], ["true"]) + self.assertEqual(params["types"], ["true"]) + self.assertEqual(params["error_trace"], ["true"]) client.close() def test_no_params(self): client = Client() - self.assertEqual(client.path, "/_sql?types") + self.assertEqual(client.path, "/_sql?types=true") client.close() From f35516448827268c01ad40751c0c8d80ba5b31ef Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Tue, 26 Jul 2022 16:36:17 +0200 Subject: [PATCH 22/23] Polish `datetime` conversion implementation - Naming things - Slight structural changes - Use iterator instead of generator for column type flagging - Improve inline documentation --- src/crate/client/cursor.py | 49 +++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 24 deletions(-) diff --git a/src/crate/client/cursor.py b/src/crate/client/cursor.py index 26a9faad..7b268686 100644 --- a/src/crate/client/cursor.py +++ b/src/crate/client/cursor.py @@ -51,44 +51,45 @@ def execute(self, sql, parameters=None, bulk_parameters=None): self._result = self.connection.client.sql(sql, parameters, bulk_parameters) - if "rows" in self._result: - transformed_result = False - if "col_types" in self._result: - transformed_result = True - self.rows = self.result_set_transformed() + if "rows" not in self._result: + return + + if "col_types" in self._result: + self.rows = iter(self._transform_result_types()) - if not transformed_result: - self.rows = iter(self._result["rows"]) + else: + self.rows = iter(self._result["rows"]) - def result_set_transformed(self): + def _transform_result_types(self): """ - Generator that iterates over each row from the result set + Generate row items with column values converted to their corresponding + native Python types, based on information from `col_types`. + + Currently, only converting to native `datetime` objects is implemented. """ - rows_to_convert = [True if col_type == 11 or col_type == 15 else False for col_type in - self._result["col_types"]] + datetime_column_types = [11, 15] + datetime_columns_mask = [ + True if col_type in datetime_column_types else False + for col_type in self._result["col_types"] + ] for row in self._result["rows"]: - gen_flags = (flag for flag in rows_to_convert) - yield [t_row for t_row in self._transform_date_columns(row, gen_flags)] + yield list(self._transform_datetime_columns(row, iter(datetime_columns_mask))) @staticmethod - def _transform_date_columns(row, gen_flags): + def _transform_datetime_columns(row, column_flags): """ - Generates iterates over each value from a row and converts timestamps to pandas TIMESTAMP + Convert all designated columns to native Python `datetime` objects. """ for value in row: try: - flag = next(gen_flags) + flag = next(column_flags) except StopIteration: break - if not flag or value is None: - yield value - else: - if value < 0: - yield None - else: - value = datetime.fromtimestamp(value / 1000) - yield value + if flag and value is not None: + value = datetime.fromtimestamp(value / 1e3) + + yield value def executemany(self, sql, seq_of_parameters): """ From 3154d326a8aa92a72359892eb62f9a52bdc427ba Mon Sep 17 00:00:00 2001 From: Andreas Motl Date: Wed, 27 Jul 2022 14:53:14 +0200 Subject: [PATCH 23/23] Use `datetime.utcfromtimestamp` --- src/crate/client/cursor.py | 2 +- src/crate/client/doctests/client.txt | 9 --------- 2 files changed, 1 insertion(+), 10 deletions(-) diff --git a/src/crate/client/cursor.py b/src/crate/client/cursor.py index 7b268686..10f732c2 100644 --- a/src/crate/client/cursor.py +++ b/src/crate/client/cursor.py @@ -87,7 +87,7 @@ def _transform_datetime_columns(row, column_flags): break if flag and value is not None: - value = datetime.fromtimestamp(value / 1e3) + value = datetime.utcfromtimestamp(value / 1e3) yield value diff --git a/src/crate/client/doctests/client.txt b/src/crate/client/doctests/client.txt index 99aa97ce..dd78c1b0 100644 --- a/src/crate/client/doctests/client.txt +++ b/src/crate/client/doctests/client.txt @@ -204,11 +204,6 @@ fetchall() Cursor Description ================== -.. hidden: Set time zone to UTC to make naive datetime objects deterministic. - - >>> previous_timezone = os.environ.get("TZ") - >>> os.environ["TZ"] = "UTC" - The ``description`` property of the cursor returns a sequence of 7-item sequences containing the column name as first parameter. Just the name field is supported, all other fields are 'None':: @@ -244,10 +239,6 @@ supported, all other fields are 'None':: ('description', None, None, None, None, None, None), ('details', None, None, None, None, None, None)) -.. hidden: Restore time zone - - >>> if previous_timezone: os.environ["TZ"] = previous_timezone - Closing the Cursor ==================