From 2067931caffd1a9b9fea348a9e33c7128dffee0a Mon Sep 17 00:00:00 2001 From: Edward Gao Date: Tue, 9 Apr 2024 10:28:02 -0700 Subject: [PATCH] allow destinations to opt out of safe cast support --- .../BaseSqlGeneratorIntegrationTest.kt | 305 +++++++++++++----- .../dat/sync1_cursorchange_messages.jsonl | 2 +- .../resources/dat/sync1_messages.jsonl | 4 +- .../all_types_v1_inputrecords.jsonl | 3 - .../sqlgenerator/alltypes_inputrecords.jsonl | 5 +- .../alltypes_unsafe_inputrecords.jsonl | 3 - .../cdcupdate_inputrecords_raw.jsonl | 2 - .../incrementaldedup_inputrecords.jsonl | 2 +- .../mixedcasecolumnname_inputrecords.jsonl | 1 + .../safe_cast/all_types_v1_inputrecords.jsonl | 2 + .../safe_cast/alltypes_inputrecords.jsonl | 2 + .../cdcupdate_inputrecords_raw.jsonl | 2 + .../incrementaldedup_inputrecords.jsonl | 2 + 13 files changed, 232 insertions(+), 103 deletions(-) delete mode 100644 airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_unsafe_inputrecords.jsonl create mode 100644 airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/mixedcasecolumnname_inputrecords.jsonl create mode 100644 airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/all_types_v1_inputrecords.jsonl create mode 100644 airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/alltypes_inputrecords.jsonl create mode 100644 airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/cdcupdate_inputrecords_raw.jsonl create mode 100644 airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/incrementaldedup_inputrecords.jsonl diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.kt b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.kt index 93976bbe1d54..1b3919bf7a1b 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.kt +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/kotlin/io/airbyte/integrations/base/destination/typing_deduping/BaseSqlGeneratorIntegrationTest.kt @@ -21,8 +21,12 @@ import java.util.function.Consumer import java.util.function.Function import java.util.stream.Collectors import java.util.stream.Stream +import kotlin.test.assertFails import org.junit.jupiter.api.AfterEach import org.junit.jupiter.api.Assertions +import org.junit.jupiter.api.Assertions.assertFalse +import org.junit.jupiter.api.Assumptions.assumeFalse +import org.junit.jupiter.api.Assumptions.assumeTrue import org.junit.jupiter.api.BeforeEach import org.junit.jupiter.api.Test import org.junit.jupiter.api.assertAll @@ -74,7 +78,7 @@ abstract class BaseSqlGeneratorIntegrationTest = mock() protected abstract val sqlGenerator: SqlGenerator - get + protected abstract val supportsSafeCast: Boolean /** * Subclasses should override this method if they need to make changes to the stream ID. For @@ -287,8 +291,8 @@ abstract class BaseSqlGeneratorIntegrationTest = + ArrayList( + BaseTypingDedupingTest.readRecords("sqlgenerator/alltypes_inputrecords.jsonl") + ) + if (supportsSafeCast) { + inputRecords.addAll( + BaseTypingDedupingTest.readRecords( + "sqlgenerator/safe_cast/alltypes_inputrecords.jsonl" + ) + ) + } + insertRawTableRecords(streamId, inputRecords) executeTypeAndDedupe( generator, @@ -856,12 +952,20 @@ abstract class BaseSqlGeneratorIntegrationTest = + ArrayList( + BaseTypingDedupingTest.readRecords( + "sqlgenerator/incrementaldedup_inputrecords.jsonl" + ) ) - ) + if (supportsSafeCast) { + inputRecords.addAll( + BaseTypingDedupingTest.readRecords( + "sqlgenerator/safe_cast/incrementaldedup_inputrecords.jsonl" + ) + ) + } + insertRawTableRecords(streamId, inputRecords) executeTypeAndDedupe( generator, @@ -947,12 +1051,20 @@ abstract class BaseSqlGeneratorIntegrationTest = + ArrayList( + BaseTypingDedupingTest.readRecords( + "sqlgenerator/incrementaldedup_inputrecords.jsonl" + ) ) - ) + if (supportsSafeCast) { + inputRecords.addAll( + BaseTypingDedupingTest.readRecords( + "sqlgenerator/safe_cast/incrementaldedup_inputrecords.jsonl" + ) + ) + } + insertRawTableRecords(streamId, inputRecords) executeTypeAndDedupe( generator, @@ -962,7 +1074,12 @@ abstract class BaseSqlGeneratorIntegrationTest = + ArrayList( + BaseTypingDedupingTest.readRecords("sqlgenerator/cdcupdate_inputrecords_raw.jsonl") ) - ) + if (supportsSafeCast) { + inputRecords.addAll( + BaseTypingDedupingTest.readRecords( + "sqlgenerator/safe_cast/cdcupdate_inputrecords_raw.jsonl" + ) + ) + } + insertRawTableRecords(streamId, inputRecords) insertFinalTableRecords( true, streamId, @@ -1109,7 +1232,7 @@ abstract class BaseSqlGeneratorIntegrationTest = + ArrayList( + BaseTypingDedupingTest.readRecords("sqlgenerator/all_types_v1_inputrecords.jsonl") ) - ) + if (supportsSafeCast) { + inputRecords.addAll( + BaseTypingDedupingTest.readRecords( + "sqlgenerator/safe_cast/all_types_v1_inputrecords.jsonl" + ) + ) + } + insertV1RawTableRecords(v1RawTableStreamId, inputRecords) val migration = generator.migrateFromV1toV2( streamId, @@ -1536,9 +1660,16 @@ abstract class BaseSqlGeneratorIntegrationTest @@ -1606,8 +1737,8 @@ abstract class BaseSqlGeneratorIntegrationTest?"}}} diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/all_types_v1_inputrecords.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/all_types_v1_inputrecords.jsonl index e2cde49ad980..d54194eadac2 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/all_types_v1_inputrecords.jsonl +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/all_types_v1_inputrecords.jsonl @@ -1,7 +1,4 @@ {"_airbyte_ab_id": "14ba7c7f-e398-4e69-ac22-28d578400dbc", "_airbyte_emitted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "foo", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} {"_airbyte_ab_id": "53ce75a5-5bcc-47a3-b45c-96c2015cfe35", "_airbyte_emitted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": null, "struct": null, "string": null, "number": null, "integer": null, "boolean": null, "timestamp_with_timezone": null, "timestamp_without_timezone": null, "time_with_timezone": null, "time_without_timezone": null, "date": null, "unknown": null}} {"_airbyte_ab_id": "7e1fac0c-017e-4ad6-bc78-334a34d64fbe", "_airbyte_emitted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 3, "id2": 100, "updated_at": "2023-01-01T01:00:00Z"}} -// Note that array and struct have invalid values ({} and [] respectively). -{"_airbyte_ab_id": "84242b60-3a34-4531-ad75-a26702960a9a", "_airbyte_emitted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 4, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": {}, "struct": [], "string": null, "number": "foo", "integer": "bar", "boolean": "fizz", "timestamp_with_timezone": {}, "timestamp_without_timezone": {}, "time_with_timezone": {}, "time_without_timezone": {}, "date": "airbyte", "unknown": null}} {"_airbyte_ab_id": "a4a783b5-7729-4d0b-b659-48ceb08713f1", "_airbyte_emitted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 5, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "number": 67.174118, "struct": {"nested_number": 67.174118}, "array": [67.174118], "unknown": 67.174118}} -{"_airbyte_ab_id": "7e1fac0c-017e-4ad6-bc78-334a34d64fce", "_airbyte_emitted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 6, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "IamACaseSensitiveColumnName": "Case senstive value"}} \ No newline at end of file diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_inputrecords.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_inputrecords.jsonl index 8f8ced8a26a1..b965f22e8bad 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_inputrecords.jsonl +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_inputrecords.jsonl @@ -1,7 +1,4 @@ {"_airbyte_raw_id": "14ba7c7f-e398-4e69-ac22-28d578400dbc", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "foo", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} {"_airbyte_raw_id": "53ce75a5-5bcc-47a3-b45c-96c2015cfe35", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": null, "struct": null, "string": null, "number": null, "integer": null, "boolean": null, "timestamp_with_timezone": null, "timestamp_without_timezone": null, "time_with_timezone": null, "time_without_timezone": null, "date": null, "unknown": null}} -{"_airbyte_raw_id": "7e1fac0c-017e-4ad6-bc78-334a34d64fbe", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 3, "id2": 100, "updated_at": "2023-01-01T01:00:00Z"}} -// Note that array and struct have invalid values ({} and [] respectively). -{"_airbyte_raw_id": "84242b60-3a34-4531-ad75-a26702960a9a", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 4, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": {}, "struct": [], "string": null, "number": "foo", "integer": "bar", "boolean": "fizz", "timestamp_with_timezone": {}, "timestamp_without_timezone": {}, "time_with_timezone": {}, "time_without_timezone": {}, "date": "airbyte", "unknown": null}, "_airbyte_meta": {"changes": [{"field": "string", "change": "NULLED", "reason": "SOURCE_SERIALIZATION_ERROR"}]}} +{"_airbyte_raw_id": "7e1fac0c-017e-4ad6-bc78-334a34d64fbe", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 3, "id2": 100, "updated_at": "2023-01-01T01:00:00Z"}, "_airbyte_meta": {"changes": [{"field": "string", "change": "NULLED", "reason": "SOURCE_SERIALIZATION_ERROR"}]}} {"_airbyte_raw_id": "a4a783b5-7729-4d0b-b659-48ceb08713f1", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 5, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "number": 67.174118, "struct": {"nested_number": 67.174118}, "array": [67.174118], "unknown": 67.174118}} -{"_airbyte_raw_id": "7e1fac0c-017e-4ad6-bc78-334a34d64fce", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 6, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "IamACaseSensitiveColumnName": "Case senstive value"}} \ No newline at end of file diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_unsafe_inputrecords.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_unsafe_inputrecords.jsonl deleted file mode 100644 index 55a509408d14..000000000000 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/alltypes_unsafe_inputrecords.jsonl +++ /dev/null @@ -1,3 +0,0 @@ -// this is a strict subset of the alltypes_inputrecords file. All these records have valid values, i.e. can be processed with unsafe casting. -{"_airbyte_raw_id": "14ba7c7f-e398-4e69-ac22-28d578400dbc", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": ["foo"], "struct": {"foo": "bar"}, "string": "foo", "number": 42.1, "integer": 42, "boolean": true, "timestamp_with_timezone": "2023-01-23T12:34:56Z", "timestamp_without_timezone": "2023-01-23T12:34:56", "time_with_timezone": "12:34:56Z", "time_without_timezone": "12:34:56", "date": "2023-01-23", "unknown": {}}} -{"_airbyte_raw_id": "53ce75a5-5bcc-47a3-b45c-96c2015cfe35", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": null, "struct": null, "string": null, "number": null, "integer": null, "boolean": null, "timestamp_with_timezone": null, "timestamp_without_timezone": null, "time_with_timezone": null, "time_without_timezone": null, "date": null, "unknown": null}} diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/cdcupdate_inputrecords_raw.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/cdcupdate_inputrecords_raw.jsonl index e5752b06c025..2de80d327906 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/cdcupdate_inputrecords_raw.jsonl +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/cdcupdate_inputrecords_raw.jsonl @@ -12,5 +12,3 @@ {"_airbyte_raw_id": "4d8674a5-eb6e-41ca-a310-69c64c88d101", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 0, "id2": 100, "updated_at": "2023-01-01T05:00:00Z", "_ab_cdc_deleted_at": null, "string": "zombie_returned"}} // CDC generally outputs an explicit null for deleted_at, but verify that we can also handle the case where deleted_at is unset. {"_airbyte_raw_id": "f0b59e49-8c74-4101-9f14-cb4d1193fd5a", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 4, "id2": 100, "updated_at": "2023-01-01T06:00:00Z", "string": "charlie"}} -// Invalid values in _ab_cdc_deleted_at result in the record NOT being deleted. This behavior is up for debate, but it's an extreme edge case so not a high priority. -{"_airbyte_raw_id": "d4e1d989-c115-403c-9e68-5d320e6376bb", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 5, "id2": 100, "updated_at": "2023-01-01T07:00:00Z", "_ab_cdc_deleted_at": {}, "string": "david1"}} diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/incrementaldedup_inputrecords.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/incrementaldedup_inputrecords.jsonl index 1d850d9dc74b..b2e0d7925650 100644 --- a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/incrementaldedup_inputrecords.jsonl +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/incrementaldedup_inputrecords.jsonl @@ -1,3 +1,3 @@ {"_airbyte_raw_id": "d7b81af0-01da-4846-a650-cc398986bc99", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "string": "Alice", "struct": {"city": "San Francisco", "state": "CA"}, "integer": 42}} {"_airbyte_raw_id": "80c99b54-54b4-43bd-b51b-1f67dafa2c52", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 1, "id2": 100, "updated_at": "2023-01-01T02:00:00Z", "string": "Alice", "struct": {"city": "San Diego", "state": "CA"}, "integer": 84}} -{"_airbyte_raw_id": "ad690bfb-c2c2-4172-bd73-a16c86ccbb67", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": "oops"}} +{"_airbyte_raw_id": "ad690bfb-c2c2-4172-bd73-a16c86ccbb67", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T03:00:00Z", "string": "Bob", "integer": 126}} diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/mixedcasecolumnname_inputrecords.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/mixedcasecolumnname_inputrecords.jsonl new file mode 100644 index 000000000000..c3ba7ea1519e --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/mixedcasecolumnname_inputrecords.jsonl @@ -0,0 +1 @@ +{"_airbyte_raw_id": "7e1fac0c-017e-4ad6-bc78-334a34d64fce", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 6, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "IamACaseSensitiveColumnName": "Case senstive value"}} diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/all_types_v1_inputrecords.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/all_types_v1_inputrecords.jsonl new file mode 100644 index 000000000000..b2f0357034ae --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/all_types_v1_inputrecords.jsonl @@ -0,0 +1,2 @@ +// Note that array and struct have invalid values ({} and [] respectively). +{"_airbyte_ab_id": "84242b60-3a34-4531-ad75-a26702960a9a", "_airbyte_emitted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 4, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": {}, "struct": [], "string": null, "number": "foo", "integer": "bar", "boolean": "fizz", "timestamp_with_timezone": {}, "timestamp_without_timezone": {}, "time_with_timezone": {}, "time_without_timezone": {}, "date": "airbyte", "unknown": null}} diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/alltypes_inputrecords.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/alltypes_inputrecords.jsonl new file mode 100644 index 000000000000..2eb8fae2374d --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/alltypes_inputrecords.jsonl @@ -0,0 +1,2 @@ +// Note that array and struct have invalid values ({} and [] respectively). +{"_airbyte_raw_id": "84242b60-3a34-4531-ad75-a26702960a9a", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 4, "id2": 100, "updated_at": "2023-01-01T01:00:00Z", "array": {}, "struct": [], "string": null, "number": "foo", "integer": "bar", "boolean": "fizz", "timestamp_with_timezone": {}, "timestamp_without_timezone": {}, "time_with_timezone": {}, "time_without_timezone": {}, "date": "airbyte", "unknown": null}} diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/cdcupdate_inputrecords_raw.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/cdcupdate_inputrecords_raw.jsonl new file mode 100644 index 000000000000..31439993f8e1 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/cdcupdate_inputrecords_raw.jsonl @@ -0,0 +1,2 @@ +// Invalid values in _ab_cdc_deleted_at result in the record NOT being deleted. This behavior is up for debate, but it's an extreme edge case so not a high priority. +{"_airbyte_raw_id": "d4e1d989-c115-403c-9e68-5d320e6376bb", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 5, "id2": 100, "updated_at": "2023-01-01T07:00:00Z", "_ab_cdc_deleted_at": {}, "string": "david1"}} diff --git a/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/incrementaldedup_inputrecords.jsonl b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/incrementaldedup_inputrecords.jsonl new file mode 100644 index 000000000000..b67035c060f2 --- /dev/null +++ b/airbyte-cdk/java/airbyte-cdk/typing-deduping/src/testFixtures/resources/sqlgenerator/safe_cast/incrementaldedup_inputrecords.jsonl @@ -0,0 +1,2 @@ +// This record verifies that we can update an existing record, with a new record where one column has an invalid value. +{"_airbyte_raw_id": "b9ac9f01-abc1-4e7c-89e5-eac9223d5726", "_airbyte_extracted_at": "2023-01-01T00:00:00Z", "_airbyte_data": {"id1": 2, "id2": 100, "updated_at": "2023-01-01T03:00:01Z", "string": "Bob", "integer": "oops"}}