Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Cater for ExtractEpochSeconds overflowing integer #1397

Merged
Merged
Changes from all commits
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions tests/resources/bigquery_test_tables.sql
Original file line number Diff line number Diff line change
@@ -646,3 +646,16 @@ INSERT INTO `pso_data_validator`.`dvt_group_by_timestamp` VALUES
(4,2,DATE'2022-02-02',DATETIME'2022-02-02 12:00:00'),
(5,2,DATE'2022-02-02',DATETIME'2022-02-02 13:00:00'),
(6,3,DATE'2023-03-03',DATETIME'2023-03-03 12:00:00');

DROP TABLE `pso_data_validator`.`dvt_tricky_dates`;
CREATE TABLE `pso_data_validator`.`dvt_tricky_dates` (
id INT64 NOT NULL
, col_dt_low DATE
, col_dt_epoch DATE
, col_dt_high DATE
, col_ts_low DATETIME
, col_ts_epoch DATETIME
, col_ts_high DATETIME);
INSERT INTO `pso_data_validator`.`dvt_tricky_dates` VALUES
(1,DATE'1000-01-01',DATE'1970-01-01',DATE'9999-12-31'
,DATETIME'1000-01-01 00:00:00',DATETIME'1970-01-01 00:00:00',DATETIME'9999-12-31 23:59:59');
14 changes: 14 additions & 0 deletions tests/resources/mysql_test_tables.sql
Original file line number Diff line number Diff line change
@@ -553,3 +553,17 @@ CREATE TABLE `pso_data_validator`.`dvt_many_cols`
, col_399 decimal(1)
) COMMENT 'Integration test table used to test validating many columns.';
INSERT INTO `pso_data_validator`.`dvt_many_cols` (id) values (1);

DROP TABLE `pso_data_validator`.`dvt_tricky_dates`;
CREATE TABLE `pso_data_validator`.`dvt_tricky_dates` (
id integer NOT NULL PRIMARY KEY
, col_dt_low date
, col_dt_epoch date
, col_dt_high date
, col_ts_low datetime(0)
, col_ts_epoch datetime(0)
, col_ts_high datetime(0));
SET time_zone = '+00:00';
INSERT INTO `pso_data_validator`.`dvt_tricky_dates` VALUES
(1,'1000-01-01','1970-01-01','9999-12-31'
,'1000-01-01 00:00:00','1970-01-01 00:00:00','9999-12-31 23:59:59');
14 changes: 14 additions & 0 deletions tests/resources/oracle_test_tables.sql
Original file line number Diff line number Diff line change
@@ -754,3 +754,17 @@ INSERT INTO pso_data_validator.dvt_group_by_timestamp VALUES (4,2,DATE'2022-02-0
INSERT INTO pso_data_validator.dvt_group_by_timestamp VALUES (5,2,DATE'2022-02-02',TIMESTAMP'2022-02-02 13:00:00');
INSERT INTO pso_data_validator.dvt_group_by_timestamp VALUES (6,3,DATE'2023-03-03',TIMESTAMP'2023-03-03 12:00:00');
COMMIT;

DROP TABLE pso_data_validator.dvt_tricky_dates;
CREATE TABLE pso_data_validator.dvt_tricky_dates (
id NUMBER(5) NOT NULL PRIMARY KEY
, col_dt_low DATE
, col_dt_epoch DATE
, col_dt_high DATE
, col_ts_low TIMESTAMP(0)
, col_ts_epoch TIMESTAMP(0)
, col_ts_high TIMESTAMP(0));
INSERT INTO pso_data_validator.dvt_tricky_dates VALUES
(1,DATE'1000-01-01',DATE'1970-01-01',DATE'9999-12-31'
,TIMESTAMP'1000-01-01 00:00:00',TIMESTAMP'1970-01-01 00:00:00',TIMESTAMP'9999-12-31 23:59:59');
COMMIT;
13 changes: 13 additions & 0 deletions tests/resources/postgresql_test_tables.sql
Original file line number Diff line number Diff line change
@@ -804,3 +804,16 @@ INSERT INTO pso_data_validator.dvt_group_by_timestamp VALUES
(4,2,DATE'2022-02-02',TIMESTAMP'2022-02-02 12:00:00'),
(5,2,DATE'2022-02-02',TIMESTAMP'2022-02-02 13:00:00'),
(6,3,DATE'2023-03-03',TIMESTAMP'2023-03-03 12:00:00');

DROP TABLE pso_data_validator.dvt_tricky_dates;
CREATE TABLE pso_data_validator.dvt_tricky_dates (
id integer NOT NULL PRIMARY KEY
, col_dt_low date
, col_dt_epoch date
, col_dt_high date
, col_ts_low timestamp(0)
, col_ts_epoch timestamp(0)
, col_ts_high timestamp(0));
INSERT INTO pso_data_validator.dvt_tricky_dates VALUES
(1,DATE'1000-01-01',DATE'1970-01-01',DATE'9999-12-31'
,TIMESTAMP'1000-01-01 00:00:00',TIMESTAMP'1970-01-01 00:00:00',TIMESTAMP'9999-12-31 23:59:59');
13 changes: 13 additions & 0 deletions tests/resources/snowflake_test_tables.sql
Original file line number Diff line number Diff line change
@@ -617,3 +617,16 @@ VALUES
(3,'#','$','-','@','Row 3'),
(4,'#','$','-','@','Row 4'),
(5,'#','$','-','@','Row 5');

DROP TABLE PSO_DATA_VALIDATOR.PUBLIC.DVT_TRICKY_DATES;
CREATE TABLE PSO_DATA_VALIDATOR.PUBLIC.DVT_TRICKY_DATES (
id NUMBER(5) NOT NULL PRIMARY KEY
, col_dt_low DATE
, col_dt_epoch DATE
, col_dt_high DATE
, col_ts_low TIMESTAMP(0)
, col_ts_epoch TIMESTAMP(0)
, col_ts_high TIMESTAMP(0));
INSERT INTO PSO_DATA_VALIDATOR.PUBLIC.DVT_TRICKY_DATES VALUES
(1,DATE'1000-01-01',DATE'1970-01-01',DATE'9999-12-31'
,TIMESTAMP'1000-01-01 00:00:00',TIMESTAMP'1970-01-01 00:00:00',TIMESTAMP'9999-12-31 23:59:59');
13 changes: 13 additions & 0 deletions tests/resources/sqlserver_test_tables.sql
Original file line number Diff line number Diff line change
@@ -575,3 +575,16 @@ INSERT INTO pso_data_validator.dvt_uuid_id VALUES
('387bdc3b-2184-43b2-8ec2-3ac791c5b0f1','387bdc3b-2184-43b2-8ec2-3ac791c5b0f1','A');
INSERT INTO pso_data_validator.dvt_uuid_id VALUES
('397bdc3b-2184-43b2-8ec2-3ac791c5b0f1','397bdc3b-2184-43b2-8ec2-3ac791c5b0f1','B');

DROP TABLE pso_data_validator.dvt_tricky_dates;
CREATE TABLE pso_data_validator.dvt_tricky_dates (
id integer NOT NULL PRIMARY KEY
, col_dt_low date
, col_dt_epoch date
, col_dt_high date
, col_ts_low datetime2
, col_ts_epoch datetime2
, col_ts_high datetime2);
INSERT INTO pso_data_validator.dvt_tricky_dates VALUES
(1,'1000-01-01','1970-01-01','9999-12-31'
,'1000-01-01 00:00:00','1970-01-01 00:00:00','9999-12-31 23:59:59');
15 changes: 15 additions & 0 deletions tests/resources/teradata_test_tables.sql
Original file line number Diff line number Diff line change
@@ -680,3 +680,18 @@ CREATE TABLE udf.dvt_bool
COMMENT ON TABLE udf.dvt_bool AS 'Integration test table used to test boolean data type, especially in non-boolean columns.';
INSERT INTO udf.dvt_bool VALUES (1,1,1,'1','Y');
INSERT INTO udf.dvt_bool VALUES (2,0,0,'0','N');

DROP TABLE udf.dvt_tricky_dates;
CREATE TABLE udf.dvt_tricky_dates (
id NUMBER(5) NOT NULL PRIMARY KEY
, col_dt_low DATE
, col_dt_epoch DATE
, col_dt_high DATE
, col_ts_low TIMESTAMP(0)
, col_ts_epoch TIMESTAMP(0)
, col_ts_high TIMESTAMP(0));
INSERT INTO udf.dvt_tricky_dates VALUES
(1,DATE'1000-01-01',DATE'1970-01-01',DATE'9999-12-31'
,TIMESTAMP'1000-01-01 00:00:00',TIMESTAMP'1970-01-01 00:00:00',TIMESTAMP'9999-12-31 23:59:59+00:00');
-- col_ts_high value above forced to UTC based on article below, but we still get wrong answer from the test:
-- https://support.teradata.com/knowledge?id=kb_article_view&sys_kb_id=0e81918ac36da9103eb2d88f05013138
4 changes: 4 additions & 0 deletions tests/system/data_sources/common_functions.py
Original file line number Diff line number Diff line change
@@ -281,6 +281,7 @@ def column_validation_test_args(
filters: str = None,
grouped_columns: str = None,
filter_status: str = "fail",
wildcard_include_timestamp: bool = False,
):
parser = cli_tools.configure_arg_parser()
cli_arg_list = [
@@ -296,6 +297,7 @@ def column_validation_test_args(
f"--max={max_cols}" if max_cols else None,
f"--filters={filters}" if filters else None,
f"--grouped-columns={grouped_columns}" if grouped_columns else None,
"--wildcard-include-timestamp" if wildcard_include_timestamp else None,
]
cli_arg_list = [_ for _ in cli_arg_list if _]
return parser.parse_args(cli_arg_list)
@@ -310,6 +312,7 @@ def column_validation_test(
max_cols=None,
filters=None,
grouped_columns=None,
wildcard_include_timestamp: bool = False,
expected_rows=0,
):
"""Generic column validation test.
@@ -325,6 +328,7 @@ def column_validation_test(
max_cols=max_cols,
filters=filters,
grouped_columns=grouped_columns,
wildcard_include_timestamp=wildcard_include_timestamp,
)
df = run_test_from_cli_args(args)
assert len(df) == expected_rows
40 changes: 40 additions & 0 deletions tests/system/data_sources/test_mysql.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would have loved to have some test data with the max MySQL Timestamp - i.e. 3001-01-19 03:14:07.999999 - one way I suggest is to put it in its own row and filter it out using --fliter for those engines that it does not make sense I am wondering - might we have an issue like 1396, where the combiner has a fit.

Thank you.

Sundar Mudupalli

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks, you were absolutely right that MySQL falls into the same issue as other engines for min/max. I made a mistake when excluding the low/high columns, the limitation only applies to SUM.

I've rectified this mistake. Good spot.

Original file line number Diff line number Diff line change
@@ -285,6 +285,33 @@ def test_column_validation_view_core_types_vw():
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_column_validation_tricky_dates_to_bigquery():
"""Test with date values that are at the extremes, e.g. 9999-12-31.

Excluded low/high columns from SUM due to MySQL UNIX_TIMESTAMP() limiting input values
to the range of TIMESTAMP (not DATE or DATETIME):
https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_unix-timestamp
"The valid range of argument values is the same as for the TIMESTAMP data type:
'1970-01-01 00:00:01.000000' UTC to '2038-01-19 03:14:07.999999' UTC for 32-bit
platforms; for MySQL running on 64-bit platforms, the valid range of argument
values for UNIX_TIMESTAMP() is '1970-01-01 00:00:01.000000' UTC to
3001-01-19 03:14:07.999999' UTC (corresponding to 32536771199.999999 seconds)."
"""
# TODO We can uncomment the min/max lines below once issue-1396 has been resolved.
column_validation_test(
tc="bq-conn",
tables="pso_data_validator.dvt_tricky_dates",
# min_cols="*",
# max_cols="*",
sum_cols="col_dt_epoch,col_ts_epoch",
wildcard_include_timestamp=True,
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
@@ -412,6 +439,19 @@ def test_row_validation_pangrams_to_bigquery():
id_type_test_assertions(df)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_row_validation_tricky_dates_to_bigquery():
"""Test with date values that are at the extremes, e.g. 9999-12-31."""
row_validation_test(
tables="pso_data_validator.dvt_tricky_dates",
tc="bq-conn",
hash="*",
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
31 changes: 31 additions & 0 deletions tests/system/data_sources/test_oracle.py
Original file line number Diff line number Diff line change
@@ -390,6 +390,24 @@ def test_column_validation_view_core_types_vw():
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_column_validation_tricky_dates_to_bigquery():
"""Test with date values that are at the extremes, e.g. 9999-12-31."""
# TODO We can uncomment the min/max lines below once issue-1396 has been resolved.
# TODO We can uncomment the sum line below once issue-1391 has been resolved.
column_validation_test(
tc="bq-conn",
tables="pso_data_validator.dvt_tricky_dates",
# min_cols="*",
# max_cols="*",
# sum_cols="*",
wildcard_include_timestamp=True,
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
@@ -913,6 +931,19 @@ def test_row_validation_uuid_rr_oracle_to_postgres():
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_row_validation_tricky_dates_to_bigquery():
"""Test with date values that are at the extremes, e.g. 9999-12-31."""
row_validation_test(
tables="pso_data_validator.dvt_tricky_dates",
tc="bq-conn",
hash="*",
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
30 changes: 30 additions & 0 deletions tests/system/data_sources/test_postgres.py
Original file line number Diff line number Diff line change
@@ -981,6 +981,23 @@ def test_column_validation_group_by_timestamp():
), "Not all records are marked as success"


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_column_validation_tricky_dates_to_bigquery():
"""Test with date values that are at the extremes, e.g. 9999-12-31."""
# TODO We can uncomment the min/max lines below once issue-1396 has been resolved.
column_validation_test(
tc="bq-conn",
tables="pso_data_validator.dvt_tricky_dates",
# min_cols="*",
# max_cols="*",
sum_cols="*",
wildcard_include_timestamp=True,
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
@@ -994,6 +1011,19 @@ def test_row_validation_identifiers():
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_row_validation_tricky_dates_to_bigquery():
"""Test with date values that are at the extremes, e.g. 9999-12-31."""
row_validation_test(
tables="pso_data_validator.dvt_tricky_dates",
tc="bq-conn",
hash="*",
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
30 changes: 30 additions & 0 deletions tests/system/data_sources/test_snowflake.py
Original file line number Diff line number Diff line change
@@ -251,6 +251,23 @@ def test_column_validation_core_types_to_bigquery():
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_column_validation_tricky_dates_to_bigquery():
"""Test with date values that are at the extremes, e.g. 9999-12-31."""
# TODO We can uncomment the min/max lines below once issue-1396 has been resolved.
column_validation_test(
tc="bq-conn",
tables="PSO_DATA_VALIDATOR.PUBLIC.DVT_TRICKY_DATES=pso_data_validator.dvt_tricky_dates",
# min_cols="*",
# max_cols="*",
sum_cols="*",
wildcard_include_timestamp=True,
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
@@ -435,6 +452,19 @@ def test_row_validation_pangrams_to_bigquery():
id_type_test_assertions(df)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_row_validation_tricky_dates_to_bigquery():
"""Test with date values that are at the extremes, e.g. 9999-12-31."""
row_validation_test(
tables="PSO_DATA_VALIDATOR.PUBLIC.DVT_TRICKY_DATES=pso_data_validator.dvt_tricky_dates",
tc="bq-conn",
hash="*",
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
30 changes: 30 additions & 0 deletions tests/system/data_sources/test_sql_server.py
Original file line number Diff line number Diff line change
@@ -338,6 +338,23 @@ def test_column_validation_core_types_to_bigquery():
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_column_validation_tricky_dates_to_bigquery():
"""Test with date values that are at the extremes, e.g. 9999-12-31."""
# TODO We can uncomment the min/max lines below once issue-1396 has been resolved.
column_validation_test(
tc="bq-conn",
tables="pso_data_validator.dvt_tricky_dates",
# min_cols="*",
# max_cols="*",
sum_cols="*",
wildcard_include_timestamp=True,
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
@@ -491,6 +508,19 @@ def test_row_validation_pangrams_to_bigquery():
id_type_test_assertions(df)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
)
def test_row_validation_tricky_dates_to_bigquery():
"""Test with date values that are at the extremes, e.g. 9999-12-31."""
row_validation_test(
tables="pso_data_validator.dvt_tricky_dates",
tc="bq-conn",
hash="*",
)


@mock.patch(
"data_validation.state_manager.StateManager.get_connection_config",
new=mock_get_connection_config,
Loading