From 98fe851568d3496f603305406a0481e851b4f82c Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Fri, 16 Feb 2024 08:39:48 -0800 Subject: [PATCH 1/8] redshift/cdk-td-changes --- .../destination-redshift/build.gradle | 2 +- .../RedshiftDestinationHandler.java | 69 ++++++++++--------- .../typing_deduping/RedshiftSqlGenerator.java | 36 ---------- .../AbstractRedshiftTypingDedupingTest.java | 2 +- .../RedshiftSqlGeneratorIntegrationTest.java | 8 ++- 5 files changed, 45 insertions(+), 72 deletions(-) diff --git a/airbyte-integrations/connectors/destination-redshift/build.gradle b/airbyte-integrations/connectors/destination-redshift/build.gradle index de1540f7811a..48c1abf89ff0 100644 --- a/airbyte-integrations/connectors/destination-redshift/build.gradle +++ b/airbyte-integrations/connectors/destination-redshift/build.gradle @@ -6,7 +6,7 @@ plugins { airbyteJavaConnector { cdkVersionRequired = '0.20.0' features = ['db-destinations', 's3-destinations', 'typing-deduping'] - useLocalCdk = false + useLocalCdk = true } java { diff --git a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftDestinationHandler.java b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftDestinationHandler.java index a9b0ec0330e0..5a47c2436d00 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftDestinationHandler.java +++ b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftDestinationHandler.java @@ -4,11 +4,17 @@ package io.airbyte.integrations.destination.redshift.typing_deduping; -import com.fasterxml.jackson.databind.JsonNode; +import static io.airbyte.cdk.integrations.base.JavaBaseConstants.*; + import io.airbyte.cdk.db.jdbc.JdbcDatabase; import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcDestinationHandler; +import io.airbyte.integrations.base.destination.typing_deduping.AirbyteProtocolType; +import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType; +import io.airbyte.integrations.base.destination.typing_deduping.Array; import io.airbyte.integrations.base.destination.typing_deduping.Sql; -import io.airbyte.integrations.base.destination.typing_deduping.StreamId; +import io.airbyte.integrations.base.destination.typing_deduping.Struct; +import io.airbyte.integrations.base.destination.typing_deduping.Union; +import io.airbyte.integrations.base.destination.typing_deduping.UnsupportedOneOf; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; @@ -49,37 +55,36 @@ public void execute(final Sql sql) throws Exception { } } - /** - * Issuing a select 1 limit 1 query can be expensive, so relying on SVV_TABLE_INFO system table. - * EXPLAIN of the select 1 from table limit 1 query: (seq scan and then limit is applied, read from - * bottom to top) XN Lim it (co st=0. 0 .0.01 rows=1 width=0) -> XN Seq Scan on _airbyte_raw_ users - * (cost=0.00..1000.00 rows=100000 width=0) - * - * @param id - * @return - * @throws Exception - */ @Override - public boolean isFinalTableEmpty(final StreamId id) throws Exception { - // Redshift doesn't have an information_schema.tables table, so we have to use SVV_TABLE_INFO. - // From https://docs.aws.amazon.com/redshift/latest/dg/r_SVV_TABLE_INFO.html: - // > The SVV_TABLE_INFO view doesn't return any information for empty tables. - // So we just query for our specific table, and if we get no rows back, - // then we assume the table is empty. - // Note that because the column names are reserved words (table, schema, database), - // we need to enquote them. - final List query = jdbcDatabase.queryJsons( - """ - SELECT 1 - FROM SVV_TABLE_INFO - WHERE "database" = ? - AND "schema" = ? - AND "table" = ? - """, - databaseName, - id.finalNamespace(), - id.finalName()); - return query.isEmpty(); + protected String toJdbcTypeName(AirbyteType airbyteType) { + // This is mostly identical to the postgres implementation, but swaps jsonb to super + if (airbyteType instanceof final AirbyteProtocolType airbyteProtocolType) { + return toJdbcTypeName(airbyteProtocolType); + } + return switch (airbyteType.getTypeName()) { + case Struct.TYPE, UnsupportedOneOf.TYPE, Array.TYPE -> "super"; + // No nested Unions supported so this will definitely not result in infinite recursion. + case Union.TYPE -> toJdbcTypeName(((Union) airbyteType).chooseType()); + default -> throw new IllegalArgumentException("Unsupported AirbyteType: " + airbyteType); + }; } + private String toJdbcTypeName(final AirbyteProtocolType airbyteProtocolType) { + return switch (airbyteProtocolType) { + case STRING -> "varchar"; + case NUMBER -> "numeric"; + case INTEGER -> "int8"; + case BOOLEAN -> "bool"; + case TIMESTAMP_WITH_TIMEZONE -> "timestamptz"; + case TIMESTAMP_WITHOUT_TIMEZONE -> "timestamp"; + case TIME_WITH_TIMEZONE -> "timetz"; + case TIME_WITHOUT_TIMEZONE -> "time"; + case DATE -> "date"; + case UNKNOWN -> "super"; + }; + } + + // Do not use SVV_TABLE_INFO to get isFinalTableEmpty. + // See https://github.com/airbytehq/airbyte/issues/34357 + } diff --git a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGenerator.java b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGenerator.java index 4c110d7a20cd..4562e35307dc 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGenerator.java +++ b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGenerator.java @@ -17,15 +17,12 @@ import static org.jooq.impl.DSL.val; import com.google.common.collect.ImmutableMap; -import io.airbyte.cdk.integrations.base.JavaBaseConstants; import io.airbyte.cdk.integrations.destination.NamingConventionTransformer; -import io.airbyte.cdk.integrations.destination.jdbc.TableDefinition; import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcSqlGenerator; import io.airbyte.integrations.base.destination.typing_deduping.AirbyteProtocolType; import io.airbyte.integrations.base.destination.typing_deduping.AirbyteType; import io.airbyte.integrations.base.destination.typing_deduping.Array; import io.airbyte.integrations.base.destination.typing_deduping.ColumnId; -import io.airbyte.integrations.base.destination.typing_deduping.StreamConfig; import io.airbyte.integrations.base.destination.typing_deduping.Struct; import io.airbyte.integrations.base.destination.typing_deduping.Union; import io.airbyte.integrations.base.destination.typing_deduping.UnsupportedOneOf; @@ -47,12 +44,6 @@ public class RedshiftSqlGenerator extends JdbcSqlGenerator { public static final String CASE_STATEMENT_SQL_TEMPLATE = "CASE WHEN {0} THEN {1} ELSE {2} END "; public static final String CASE_STATEMENT_NO_ELSE_SQL_TEMPLATE = "CASE WHEN {0} THEN {1} END "; - private static final Map REDSHIFT_TYPE_NAME_TO_JDBC_TYPE = ImmutableMap.of( - "numeric", "decimal", - "int8", "bigint", - "bool", "boolean", - "timestamptz", "timestamp with time zone", - "timetz", "time with time zone"); private static final String COLUMN_ERROR_MESSAGE_FORMAT = "Problem with `%s`"; private static final String AIRBYTE_META_COLUMN_ERRORS_KEY = "errors"; @@ -199,29 +190,6 @@ protected Field buildAirbyteMetaColumn(final LinkedHashMap intendedColumns = stream.columns().entrySet().stream() - .collect(LinkedHashMap::new, - (map, column) -> map.put(column.getKey().name(), toDialectType(column.getValue()).getTypeName()), - LinkedHashMap::putAll); - final LinkedHashMap actualColumns = existingTable.columns().entrySet().stream() - .filter(column -> JavaBaseConstants.V2_FINAL_TABLE_METADATA_COLUMNS.stream() - .noneMatch(airbyteColumnName -> airbyteColumnName.equals(column.getKey()))) - .collect(LinkedHashMap::new, - (map, column) -> map.put(column.getKey(), jdbcTypeNameFromRedshiftTypeName(column.getValue().type())), - LinkedHashMap::putAll); - - final boolean sameColumns = actualColumns.equals(intendedColumns) - && "varchar".equals(existingTable.columns().get(JavaBaseConstants.COLUMN_NAME_AB_RAW_ID).type()) - && "timestamptz".equals(existingTable.columns().get(JavaBaseConstants.COLUMN_NAME_AB_EXTRACTED_AT).type()) - && "super".equals(existingTable.columns().get(JavaBaseConstants.COLUMN_NAME_AB_META).type()); - - return sameColumns; - } - /** * Return ROW_NUMBER() OVER (PARTITION BY primaryKeys ORDER BY cursor DESC NULLS LAST, * _airbyte_extracted_at DESC) @@ -265,8 +233,4 @@ public boolean shouldRetry(final Exception e) { return false; } - private static String jdbcTypeNameFromRedshiftTypeName(final String redshiftType) { - return REDSHIFT_TYPE_NAME_TO_JDBC_TYPE.getOrDefault(redshiftType, redshiftType); - } - } diff --git a/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/typing_deduping/AbstractRedshiftTypingDedupingTest.java b/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/typing_deduping/AbstractRedshiftTypingDedupingTest.java index c5928ab534d6..514fd14363a8 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/typing_deduping/AbstractRedshiftTypingDedupingTest.java +++ b/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/typing_deduping/AbstractRedshiftTypingDedupingTest.java @@ -34,7 +34,7 @@ protected JdbcCompatibleSourceOperations getSourceOperations() { } @Override - protected SqlGenerator getSqlGenerator() { + protected SqlGenerator getSqlGenerator() { return new RedshiftSqlGenerator(new RedshiftSQLNameTransformer()) { // Override only for tests to print formatted SQL. The actual implementation should use unformatted diff --git a/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGeneratorIntegrationTest.java index 1ddca62a2bcd..119c84ec5a82 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGeneratorIntegrationTest.java @@ -22,6 +22,7 @@ import io.airbyte.cdk.integrations.standardtest.destination.typing_deduping.JdbcSqlGeneratorIntegrationTest; import io.airbyte.commons.json.Jsons; import io.airbyte.integrations.base.destination.typing_deduping.DestinationHandler; +import io.airbyte.integrations.base.destination.typing_deduping.DestinationInitialState; import io.airbyte.integrations.base.destination.typing_deduping.Sql; import io.airbyte.integrations.destination.redshift.RedshiftInsertDestination; import io.airbyte.integrations.destination.redshift.RedshiftSQLNameTransformer; @@ -33,7 +34,9 @@ import java.time.LocalDateTime; import java.time.OffsetTime; import java.time.ZoneOffset; +import java.util.List; import java.util.Optional; +import java.util.concurrent.CompletableFuture; import javax.sql.DataSource; import org.jooq.DSLContext; import org.jooq.DataType; @@ -180,8 +183,9 @@ protected Field toJsonValue(final String valueAsString) { public void testCreateTableIncremental() throws Exception { final Sql sql = generator.createTable(incrementalDedupStream, "", false); destinationHandler.execute(sql); - - final Optional existingTable = destinationHandler.findExistingTable(incrementalDedupStream.id()); + List>> initialStates = destinationHandler.gatherInitialState(List.of(incrementalDedupStream)); + assertEquals(1, initialStates.size()); + final Optional existingTable = initialStates.getFirst().get().finalTableDefinition(); assertTrue(existingTable.isPresent()); assertAll( From cd7f08b30dc219aac634a50b4755b7002d28095b Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Fri, 16 Feb 2024 16:39:44 -0800 Subject: [PATCH 2/8] minor changes --- .../RedshiftStagingS3Destination.java | 2 +- .../RedshiftSqlGeneratorIntegrationTest.java | 57 ++++++++++--------- 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStagingS3Destination.java b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStagingS3Destination.java index f1855447cf3c..159146545095 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStagingS3Destination.java +++ b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStagingS3Destination.java @@ -230,7 +230,7 @@ public SerializedAirbyteMessageConsumer getSerializedMessageConsumer(final JsonN final boolean disableTypeDedupe = config.has(DISABLE_TYPE_DEDUPE) && config.get(DISABLE_TYPE_DEDUPE).asBoolean(false); final int defaultThreadCount = 8; if (disableTypeDedupe) { - typerDeduper = new NoOpTyperDeduperWithV1V2Migrations<>(sqlGenerator, redshiftDestinationHandler, parsedCatalog, migrator, v2TableMigrator, + typerDeduper = new NoOpTyperDeduperWithV1V2Migrations(sqlGenerator, redshiftDestinationHandler, parsedCatalog, migrator, v2TableMigrator, defaultThreadCount); } else { typerDeduper = diff --git a/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGeneratorIntegrationTest.java index 119c84ec5a82..0d5705c3c5af 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGeneratorIntegrationTest.java @@ -6,8 +6,8 @@ import static io.airbyte.cdk.db.jdbc.DateTimeConverter.putJavaSQLTime; import static io.airbyte.integrations.destination.redshift.operations.RedshiftSqlOperations.escapeStringLiteral; -import static org.junit.jupiter.api.Assertions.assertAll; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertTrue; import com.fasterxml.jackson.databind.JsonNode; @@ -17,7 +17,6 @@ import io.airbyte.cdk.db.jdbc.JdbcDatabase; import io.airbyte.cdk.db.jdbc.JdbcSourceOperations; import io.airbyte.cdk.db.jdbc.JdbcUtils; -import io.airbyte.cdk.integrations.destination.jdbc.TableDefinition; import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcSqlGenerator; import io.airbyte.cdk.integrations.standardtest.destination.typing_deduping.JdbcSqlGeneratorIntegrationTest; import io.airbyte.commons.json.Jsons; @@ -35,8 +34,6 @@ import java.time.OffsetTime; import java.time.ZoneOffset; import java.util.List; -import java.util.Optional; -import java.util.concurrent.CompletableFuture; import javax.sql.DataSource; import org.jooq.DSLContext; import org.jooq.DataType; @@ -154,7 +151,7 @@ protected DSLContext getDslContext() { } @Override - protected DestinationHandler getDestinationHandler() { + protected DestinationHandler getDestinationHandler() { return new RedshiftDestinationHandler(databaseName, database); } @@ -183,30 +180,34 @@ protected Field toJsonValue(final String valueAsString) { public void testCreateTableIncremental() throws Exception { final Sql sql = generator.createTable(incrementalDedupStream, "", false); destinationHandler.execute(sql); - List>> initialStates = destinationHandler.gatherInitialState(List.of(incrementalDedupStream)); + List initialStates = destinationHandler.gatherInitialState(List.of(incrementalDedupStream)); assertEquals(1, initialStates.size()); - final Optional existingTable = initialStates.getFirst().get().finalTableDefinition(); - - assertTrue(existingTable.isPresent()); - assertAll( - () -> assertEquals("varchar", existingTable.get().columns().get("_airbyte_raw_id").type()), - () -> assertEquals("timestamptz", existingTable.get().columns().get("_airbyte_extracted_at").type()), - () -> assertEquals("super", existingTable.get().columns().get("_airbyte_meta").type()), - () -> assertEquals("int8", existingTable.get().columns().get("id1").type()), - () -> assertEquals("int8", existingTable.get().columns().get("id2").type()), - () -> assertEquals("timestamptz", existingTable.get().columns().get("updated_at").type()), - () -> assertEquals("super", existingTable.get().columns().get("struct").type()), - () -> assertEquals("super", existingTable.get().columns().get("array").type()), - () -> assertEquals("varchar", existingTable.get().columns().get("string").type()), - () -> assertEquals("numeric", existingTable.get().columns().get("number").type()), - () -> assertEquals("int8", existingTable.get().columns().get("integer").type()), - () -> assertEquals("bool", existingTable.get().columns().get("boolean").type()), - () -> assertEquals("timestamptz", existingTable.get().columns().get("timestamp_with_timezone").type()), - () -> assertEquals("timestamp", existingTable.get().columns().get("timestamp_without_timezone").type()), - () -> assertEquals("timetz", existingTable.get().columns().get("time_with_timezone").type()), - () -> assertEquals("time", existingTable.get().columns().get("time_without_timezone").type()), - () -> assertEquals("date", existingTable.get().columns().get("date").type()), - () -> assertEquals("super", existingTable.get().columns().get("unknown").type())); + final DestinationInitialState initialState = initialStates.getFirst(); + assertTrue(initialState.isFinalTablePresent()); + assertFalse(initialState.isSchemaMismatch()); + + // final Optional existingTable = initialStates.getFirst().get().finalTableDefinition(); + +// assertTrue(existingTable.isPresent()); +// assertAll( +// () -> assertEquals("varchar", existingTable.get().columns().get("_airbyte_raw_id").type()), +// () -> assertEquals("timestamptz", existingTable.get().columns().get("_airbyte_extracted_at").type()), +// () -> assertEquals("super", existingTable.get().columns().get("_airbyte_meta").type()), +// () -> assertEquals("int8", existingTable.get().columns().get("id1").type()), +// () -> assertEquals("int8", existingTable.get().columns().get("id2").type()), +// () -> assertEquals("timestamptz", existingTable.get().columns().get("updated_at").type()), +// () -> assertEquals("super", existingTable.get().columns().get("struct").type()), +// () -> assertEquals("super", existingTable.get().columns().get("array").type()), +// () -> assertEquals("varchar", existingTable.get().columns().get("string").type()), +// () -> assertEquals("numeric", existingTable.get().columns().get("number").type()), +// () -> assertEquals("int8", existingTable.get().columns().get("integer").type()), +// () -> assertEquals("bool", existingTable.get().columns().get("boolean").type()), +// () -> assertEquals("timestamptz", existingTable.get().columns().get("timestamp_with_timezone").type()), +// () -> assertEquals("timestamp", existingTable.get().columns().get("timestamp_without_timezone").type()), +// () -> assertEquals("timetz", existingTable.get().columns().get("time_with_timezone").type()), +// () -> assertEquals("time", existingTable.get().columns().get("time_without_timezone").type()), +// () -> assertEquals("date", existingTable.get().columns().get("date").type()), +// () -> assertEquals("super", existingTable.get().columns().get("unknown").type())); // TODO assert on table clustering, etc. } From 7b8d69d4a6691ab2daf0f2880dee3499b7e9a816 Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Fri, 16 Feb 2024 16:58:34 -0800 Subject: [PATCH 3/8] nit unused Signed-off-by: Gireesh Sreepathi --- .../redshift/typing_deduping/RedshiftSqlGenerator.java | 1 - 1 file changed, 1 deletion(-) diff --git a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGenerator.java b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGenerator.java index 4562e35307dc..203431be5fed 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGenerator.java +++ b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGenerator.java @@ -159,7 +159,6 @@ Field arrayConcatStmt(final List> arrays) { } Field result = arrays.get(0); - String renderedSql = getDslContext().render(result); for (int i = 1; i < arrays.size(); i++) { // We lose some nice indentation but thats ok. Queryparts // are intentionally rendered here to avoid deep stack for function sql rendering. From f5bd5365d26bd1244377ba449029a1a7cac47d16 Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Tue, 20 Feb 2024 20:17:40 -0800 Subject: [PATCH 4/8] fmt --- .../typing_deduping/RedshiftSqlGenerator.java | 2 -- .../RedshiftSqlGeneratorIntegrationTest.java | 23 ------------------- 2 files changed, 25 deletions(-) diff --git a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGenerator.java b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGenerator.java index 203431be5fed..37f72c21c9f1 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGenerator.java +++ b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGenerator.java @@ -16,7 +16,6 @@ import static org.jooq.impl.DSL.rowNumber; import static org.jooq.impl.DSL.val; -import com.google.common.collect.ImmutableMap; import io.airbyte.cdk.integrations.destination.NamingConventionTransformer; import io.airbyte.cdk.integrations.destination.jdbc.typing_deduping.JdbcSqlGenerator; import io.airbyte.integrations.base.destination.typing_deduping.AirbyteProtocolType; @@ -30,7 +29,6 @@ import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; -import java.util.Map; import java.util.Optional; import java.util.stream.Collectors; import org.jooq.Condition; diff --git a/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGeneratorIntegrationTest.java b/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGeneratorIntegrationTest.java index 0d5705c3c5af..854fe35cfff6 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGeneratorIntegrationTest.java +++ b/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/typing_deduping/RedshiftSqlGeneratorIntegrationTest.java @@ -185,29 +185,6 @@ public void testCreateTableIncremental() throws Exception { final DestinationInitialState initialState = initialStates.getFirst(); assertTrue(initialState.isFinalTablePresent()); assertFalse(initialState.isSchemaMismatch()); - - // final Optional existingTable = initialStates.getFirst().get().finalTableDefinition(); - -// assertTrue(existingTable.isPresent()); -// assertAll( -// () -> assertEquals("varchar", existingTable.get().columns().get("_airbyte_raw_id").type()), -// () -> assertEquals("timestamptz", existingTable.get().columns().get("_airbyte_extracted_at").type()), -// () -> assertEquals("super", existingTable.get().columns().get("_airbyte_meta").type()), -// () -> assertEquals("int8", existingTable.get().columns().get("id1").type()), -// () -> assertEquals("int8", existingTable.get().columns().get("id2").type()), -// () -> assertEquals("timestamptz", existingTable.get().columns().get("updated_at").type()), -// () -> assertEquals("super", existingTable.get().columns().get("struct").type()), -// () -> assertEquals("super", existingTable.get().columns().get("array").type()), -// () -> assertEquals("varchar", existingTable.get().columns().get("string").type()), -// () -> assertEquals("numeric", existingTable.get().columns().get("number").type()), -// () -> assertEquals("int8", existingTable.get().columns().get("integer").type()), -// () -> assertEquals("bool", existingTable.get().columns().get("boolean").type()), -// () -> assertEquals("timestamptz", existingTable.get().columns().get("timestamp_with_timezone").type()), -// () -> assertEquals("timestamp", existingTable.get().columns().get("timestamp_without_timezone").type()), -// () -> assertEquals("timetz", existingTable.get().columns().get("time_with_timezone").type()), -// () -> assertEquals("time", existingTable.get().columns().get("time_without_timezone").type()), -// () -> assertEquals("date", existingTable.get().columns().get("date").type()), -// () -> assertEquals("super", existingTable.get().columns().get("unknown").type())); // TODO assert on table clustering, etc. } From a2490cf39a6eac9ab3a70fb300404ae0cc269c8d Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Thu, 22 Feb 2024 10:09:26 -0800 Subject: [PATCH 5/8] fix compilation --- .../destination/redshift/RedshiftStagingS3Destination.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStagingS3Destination.java b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStagingS3Destination.java index 159146545095..16189ce2004b 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStagingS3Destination.java +++ b/airbyte-integrations/connectors/destination-redshift/src/main/java/io/airbyte/integrations/destination/redshift/RedshiftStagingS3Destination.java @@ -228,13 +228,11 @@ public SerializedAirbyteMessageConsumer getSerializedMessageConsumer(final JsonN final JdbcV1V2Migrator migrator = new JdbcV1V2Migrator(getNamingResolver(), database, databaseName); final NoopV2TableMigrator v2TableMigrator = new NoopV2TableMigrator(); final boolean disableTypeDedupe = config.has(DISABLE_TYPE_DEDUPE) && config.get(DISABLE_TYPE_DEDUPE).asBoolean(false); - final int defaultThreadCount = 8; if (disableTypeDedupe) { - typerDeduper = new NoOpTyperDeduperWithV1V2Migrations(sqlGenerator, redshiftDestinationHandler, parsedCatalog, migrator, v2TableMigrator, - defaultThreadCount); + typerDeduper = new NoOpTyperDeduperWithV1V2Migrations(sqlGenerator, redshiftDestinationHandler, parsedCatalog, migrator, v2TableMigrator); } else { typerDeduper = - new DefaultTyperDeduper<>(sqlGenerator, redshiftDestinationHandler, parsedCatalog, migrator, v2TableMigrator, defaultThreadCount); + new DefaultTyperDeduper(sqlGenerator, redshiftDestinationHandler, parsedCatalog, migrator, v2TableMigrator); } return StagingConsumerFactory.builder( outputRecordCollector, From 9d1472f9d33b698bc86abc1cce4ed7276eace5ad Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Thu, 22 Feb 2024 16:08:09 -0800 Subject: [PATCH 6/8] dat compilation failure --- .../redshift/RedshiftDestinationAcceptanceTest.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/RedshiftDestinationAcceptanceTest.java b/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/RedshiftDestinationAcceptanceTest.java index b398c0b9e597..de31216ed222 100644 --- a/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/RedshiftDestinationAcceptanceTest.java +++ b/airbyte-integrations/connectors/destination-redshift/src/test-integration/java/io/airbyte/integrations/destination/redshift/RedshiftDestinationAcceptanceTest.java @@ -241,9 +241,4 @@ protected int getMaxRecordValueLimit() { return RedshiftSqlOperations.REDSHIFT_VARCHAR_MAX_BYTE_SIZE; } - @Override - protected int getGenerateBigStringAddExtraCharacters() { - return 1; - } - } From 57d3eaf3884ad7367493452e12c40c40a109b445 Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Thu, 22 Feb 2024 16:14:40 -0800 Subject: [PATCH 7/8] logistics --- .../destination-redshift/build.gradle | 4 +- .../destination-redshift/metadata.yaml | 2 +- docs/integrations/destinations/redshift.md | 183 +++++++++--------- .../core-concepts/typing-deduping.md | 4 +- 4 files changed, 97 insertions(+), 96 deletions(-) diff --git a/airbyte-integrations/connectors/destination-redshift/build.gradle b/airbyte-integrations/connectors/destination-redshift/build.gradle index 48c1abf89ff0..83cdbd0b451b 100644 --- a/airbyte-integrations/connectors/destination-redshift/build.gradle +++ b/airbyte-integrations/connectors/destination-redshift/build.gradle @@ -4,9 +4,9 @@ plugins { } airbyteJavaConnector { - cdkVersionRequired = '0.20.0' + cdkVersionRequired = '0.23.0' features = ['db-destinations', 's3-destinations', 'typing-deduping'] - useLocalCdk = true + useLocalCdk = false } java { diff --git a/airbyte-integrations/connectors/destination-redshift/metadata.yaml b/airbyte-integrations/connectors/destination-redshift/metadata.yaml index e32903ada77d..368369cfe5d2 100644 --- a/airbyte-integrations/connectors/destination-redshift/metadata.yaml +++ b/airbyte-integrations/connectors/destination-redshift/metadata.yaml @@ -5,7 +5,7 @@ data: connectorSubtype: database connectorType: destination definitionId: f7a7d195-377f-cf5b-70a5-be6b819019dc - dockerImageTag: 2.1.7 + dockerImageTag: 2.1.8 dockerRepository: airbyte/destination-redshift documentationUrl: https://docs.airbyte.com/integrations/destinations/redshift githubIssueLabel: destination-redshift diff --git a/docs/integrations/destinations/redshift.md b/docs/integrations/destinations/redshift.md index ba51ae50ba1f..c37e01dcbb67 100644 --- a/docs/integrations/destinations/redshift.md +++ b/docs/integrations/destinations/redshift.md @@ -235,94 +235,95 @@ Each stream will be output into its own raw table in Redshift. Each table will c ## Changelog -| Version | Date | Pull Request | Subject | -|:--------|:-----------|:------------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| 2.1.7 | 2024-02-09 | [\#34562](https://github.com/airbytehq/airbyte/pull/34562) | Switch back to jooq-based sql execution for standard insert | -| 2.1.6 | 2024-02-08 | [\#34502](https://github.com/airbytehq/airbyte/pull/34502) | Update to CDK version 0.17.0 | -| 2.1.5 | 2024-01-30 | [\#34680](https://github.com/airbytehq/airbyte/pull/34680) | Update to CDK version 0.16.3 | -| 2.1.4 | 2024-01-29 | [\#34634](https://github.com/airbytehq/airbyte/pull/34634) | Use lowercase raw schema and table in T+D [CDK changes](https://github.com/airbytehq/airbyte/pull/34533) | -| 2.1.3 | 2024-01-26 | [\#34544](https://github.com/airbytehq/airbyte/pull/34544) | Proper string-escaping in raw tables | -| 2.1.2 | 2024-01-24 | [\#34451](https://github.com/airbytehq/airbyte/pull/34451) | Improve logging for unparseable input | -| 2.1.1 | 2024-01-24 | [\#34458](https://github.com/airbytehq/airbyte/pull/34458) | Improve error reporting | -| 2.1.0 | 2024-01-24 | [\#34467](https://github.com/airbytehq/airbyte/pull/34467) | Upgrade CDK to 0.14.0 | -| 2.0.0 | 2024-01-23 | [\#34077](https://github.com/airbytehq/airbyte/pull/34077) | Destinations V2 | -| 0.8.0 | 2024-01-18 | [\#34236](https://github.com/airbytehq/airbyte/pull/34236) | Upgrade CDK to 0.13.0 | -| 0.7.15 | 2024-01-11 | [\#34186](https://github.com/airbytehq/airbyte/pull/34186) | Update check method with svv_table_info permission check, fix bug where s3 staging files were not being deleted. | -| 0.7.14 | 2024-01-08 | [\#34014](https://github.com/airbytehq/airbyte/pull/34014) | Update order of options in spec | -| 0.7.13 | 2024-01-05 | [\#33948](https://github.com/airbytehq/airbyte/pull/33948) | Fix NPE when prepare tables fail; Add case sensitive session for super; Bastion heartbeats added | -| 0.7.12 | 2024-01-03 | [\#33924](https://github.com/airbytehq/airbyte/pull/33924) | Add new ap-southeast-3 AWS region | -| 0.7.11 | 2024-01-04 | [\#33730](https://github.com/airbytehq/airbyte/pull/33730) | Internal code structure changes | -| 0.7.10 | 2024-01-04 | [\#33728](https://github.com/airbytehq/airbyte/pull/33728) | Allow users to disable final table creation | -| 0.7.9 | 2024-01-03 | [\#33877](https://github.com/airbytehq/airbyte/pull/33877) | Fix Jooq StackOverflowError | -| 0.7.8 | 2023-12-28 | [\#33788](https://github.com/airbytehq/airbyte/pull/33788) | Thread-safe fix for file part names (s3 staging files) | -| 0.7.7 | 2024-01-04 | [\#33728](https://github.com/airbytehq/airbyte/pull/33728) | Add option to only type and dedupe at the end of the sync | -| 0.7.6 | 2023-12-20 | [\#33704](https://github.com/airbytehq/airbyte/pull/33704) | Only run T+D on a stream if it had any records during the sync | -| 0.7.5 | 2023-12-18 | [\#33124](https://github.com/airbytehq/airbyte/pull/33124) | Make Schema Creation Separate from Table Creation | -| 0.7.4 | 2023-12-13 | [\#33369](https://github.com/airbytehq/airbyte/pull/33369) | Use jdbc common sql implementation | -| 0.7.3 | 2023-12-12 | [\#33367](https://github.com/airbytehq/airbyte/pull/33367) | DV2: fix migration logic | -| 0.7.2 | 2023-12-11 | [\#33335](https://github.com/airbytehq/airbyte/pull/33335) | DV2: improve data type mapping | -| 0.7.1 | 2023-12-11 | [\#33307](https://github.com/airbytehq/airbyte/pull/33307) | ~DV2: improve data type mapping~ No changes | -| 0.7.0 | 2023-12-05 | [\#32326](https://github.com/airbytehq/airbyte/pull/32326) | Opt in beta for v2 destination | -| 0.6.11 | 2023-11-29 | [\#32888](https://github.com/airbytehq/airbyte/pull/32888) | Use the new async framework. | -| 0.6.10 | 2023-11-06 | [\#32193](https://github.com/airbytehq/airbyte/pull/32193) | Adopt java CDK version 0.4.1. | -| 0.6.9 | 2023-10-10 | [\#31083](https://github.com/airbytehq/airbyte/pull/31083) | Fix precision of numeric values in async destinations | -| 0.6.8 | 2023-10-10 | [\#31218](https://github.com/airbytehq/airbyte/pull/31218) | Clarify configuration groups | -| 0.6.7 | 2023-10-06 | [\#31153](https://github.com/airbytehq/airbyte/pull/31153) | Increase jvm GC retries | -| 0.6.6 | 2023-10-06 | [\#31129](https://github.com/airbytehq/airbyte/pull/31129) | Reduce async buffer size | -| 0.6.5 | 2023-08-18 | [\#28619](https://github.com/airbytehq/airbyte/pull/29640) | Fix duplicate staging object names in concurrent environment (e.g. async) | -| 0.6.4 | 2023-08-10 | [\#28619](https://github.com/airbytehq/airbyte/pull/28619) | Use async method for staging | -| 0.6.3 | 2023-08-07 | [\#29188](https://github.com/airbytehq/airbyte/pull/29188) | Internal code refactoring | -| 0.6.2 | 2023-07-24 | [\#28618](https://github.com/airbytehq/airbyte/pull/28618) | Add hooks in preparation for destinations v2 implementation | -| 0.6.1 | 2023-07-14 | [\#28345](https://github.com/airbytehq/airbyte/pull/28345) | Increment patch to trigger a rebuild | -| 0.6.0 | 2023-06-27 | [\#27993](https://github.com/airbytehq/airbyte/pull/27993) | destination-redshift will fail syncs if records or properties are too large, rather than silently skipping records and succeeding | -| 0.5.0 | 2023-06-27 | [\#27781](https://github.com/airbytehq/airbyte/pull/27781) | License Update: Elv2 | -| 0.4.9 | 2023-06-21 | [\#27555](https://github.com/airbytehq/airbyte/pull/27555) | Reduce image size | -| 0.4.8 | 2023-05-17 | [\#26165](https://github.com/airbytehq/airbyte/pull/26165) | Internal code change for future development (install normalization packages inside connector) | -| 0.4.7 | 2023-05-01 | [\#25698](https://github.com/airbytehq/airbyte/pull/25698) | Remove old VARCHAR to SUPER migration Java functionality | -| 0.4.6 | 2023-04-27 | [\#25346](https://github.com/airbytehq/airbyte/pull/25346) | Internal code cleanup | -| 0.4.5 | 2023-03-30 | [\#24736](https://github.com/airbytehq/airbyte/pull/24736) | Improve behavior when throttled by AWS API | -| 0.4.4 | 2023-03-29 | [\#24671](https://github.com/airbytehq/airbyte/pull/24671) | Fail faster in certain error cases | -| 0.4.3 | 2023-03-17 | [\#23788](https://github.com/airbytehq/airbyte/pull/23788) | S3-Parquet: added handler to process null values in arrays | -| 0.4.2 | 2023-03-10 | [\#23931](https://github.com/airbytehq/airbyte/pull/23931) | Added support for periodic buffer flush | -| 0.4.1 | 2023-03-10 | [\#23466](https://github.com/airbytehq/airbyte/pull/23466) | Changed S3 Avro type from Int to Long | -| 0.4.0 | 2023-02-28 | [\#23523](https://github.com/airbytehq/airbyte/pull/23523) | Add SSH Bastion Host configuration options | -| 0.3.56 | 2023-01-26 | [\#21890](https://github.com/airbytehq/airbyte/pull/21890) | Fixed configurable parameter for number of file buffers | -| 0.3.55 | 2023-01-26 | [\#20631](https://github.com/airbytehq/airbyte/pull/20631) | Added support for destination checkpointing with staging | -| 0.3.54 | 2023-01-18 | [\#21087](https://github.com/airbytehq/airbyte/pull/21087) | Wrap Authentication Errors as Config Exceptions | -| 0.3.53 | 2023-01-03 | [\#17273](https://github.com/airbytehq/airbyte/pull/17273) | Flatten JSON arrays to fix maximum size check for SUPER field | -| 0.3.52 | 2022-12-30 | [\#20879](https://github.com/airbytehq/airbyte/pull/20879) | Added configurable parameter for number of file buffers (⛔ this version has a bug and will not work; use `0.3.56` instead) | -| 0.3.51 | 2022-10-26 | [\#18434](https://github.com/airbytehq/airbyte/pull/18434) | Fix empty S3 bucket path handling | -| 0.3.50 | 2022-09-14 | [\#15668](https://github.com/airbytehq/airbyte/pull/15668) | Wrap logs in AirbyteLogMessage | -| 0.3.49 | 2022-09-01 | [\#16243](https://github.com/airbytehq/airbyte/pull/16243) | Fix Json to Avro conversion when there is field name clash from combined restrictions (`anyOf`, `oneOf`, `allOf` fields) | -| 0.3.48 | 2022-09-01 | | Added JDBC URL params | -| 0.3.47 | 2022-07-15 | [\#14494](https://github.com/airbytehq/airbyte/pull/14494) | Make S3 output filename configurable. | -| 0.3.46 | 2022-06-27 | [\#14190](https://github.com/airbytehq/airbyte/pull/13916) | Correctly cleanup S3 bucket when using a configured bucket path for S3 staging operations. | -| 0.3.45 | 2022-06-25 | [\#13916](https://github.com/airbytehq/airbyte/pull/13916) | Use the configured bucket path for S3 staging operations. | -| 0.3.44 | 2022-06-24 | [\#14114](https://github.com/airbytehq/airbyte/pull/14114) | Remove "additionalProperties": false from specs for connectors with staging | -| 0.3.43 | 2022-06-24 | [\#13690](https://github.com/airbytehq/airbyte/pull/13690) | Improved discovery for NOT SUPER column | -| 0.3.42 | 2022-06-21 | [\#14013](https://github.com/airbytehq/airbyte/pull/14013) | Add an option to use encryption with staging in Redshift Destination | -| 0.3.40 | 2022-06-17 | [\#13753](https://github.com/airbytehq/airbyte/pull/13753) | Deprecate and remove PART\*SIZE_MB fields from connectors based on StreamTransferManager | -| 0.3.39 | 2022-06-02 | [\#13415](https://github.com/airbytehq/airbyte/pull/13415) | Add dropdown to select Uploading Method.
**PLEASE NOTICE**: After this update your **uploading method** will be set to **Standard**, you will need to reconfigure the method to use **S3 Staging** again. | -| 0.3.37 | 2022-05-23 | [\#13090](https://github.com/airbytehq/airbyte/pull/13090) | Removed redshiftDataTmpTableMode. Some refactoring. | -| 0.3.36 | 2022-05-23 | [\#12820](https://github.com/airbytehq/airbyte/pull/12820) | Improved 'check' operation performance | -| 0.3.35 | 2022-05-18 | [\#12940](https://github.com/airbytehq/airbyte/pull/12940) | Fixed maximum record size for SUPER type | -| 0.3.34 | 2022-05-16 | [\#12869](https://github.com/airbytehq/airbyte/pull/12869) | Fixed NPE in S3 staging check | -| 0.3.33 | 2022-05-04 | [\#12601](https://github.com/airbytehq/airbyte/pull/12601) | Apply buffering strategy for S3 staging | -| 0.3.32 | 2022-04-20 | [\#12085](https://github.com/airbytehq/airbyte/pull/12085) | Fixed bug with switching between INSERT and COPY config | -| 0.3.31 | 2022-04-19 | [\#12064](https://github.com/airbytehq/airbyte/pull/12064) | Added option to support SUPER datatype in \_airbyte_raw\*\*\* table | -| 0.3.29 | 2022-04-05 | [\#11729](https://github.com/airbytehq/airbyte/pull/11729) | Fixed bug with dashes in schema name | -| 0.3.28 | 2022-03-18 | [\#11254](https://github.com/airbytehq/airbyte/pull/11254) | Fixed missing records during S3 staging | -| 0.3.27 | 2022-02-25 | [\#10421](https://github.com/airbytehq/airbyte/pull/10421) | Refactor JDBC parameters handling | -| 0.3.25 | 2022-02-14 | [\#9920](https://github.com/airbytehq/airbyte/pull/9920) | Updated the size of staging files for S3 staging. Also, added closure of S3 writers to staging files when data has been written to an staging file. | -| 0.3.24 | 2022-02-14 | [\#10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | -| 0.3.23 | 2021-12-16 | [\#8855](https://github.com/airbytehq/airbyte/pull/8855) | Add `purgeStagingData` option to enable/disable deleting the staging data | -| 0.3.22 | 2021-12-15 | [\#8607](https://github.com/airbytehq/airbyte/pull/8607) | Accept a path for the staging data | -| 0.3.21 | 2021-12-10 | [\#8562](https://github.com/airbytehq/airbyte/pull/8562) | Moving classes around for better dependency management | -| 0.3.20 | 2021-11-08 | [\#7719](https://github.com/airbytehq/airbyte/pull/7719) | Improve handling of wide rows by buffering records based on their byte size rather than their count | -| 0.3.19 | 2021-10-21 | [\#7234](https://github.com/airbytehq/airbyte/pull/7234) | Allow SSL traffic only | -| 0.3.17 | 2021-10-12 | [\#6965](https://github.com/airbytehq/airbyte/pull/6965) | Added SSL Support | -| 0.3.16 | 2021-10-11 | [\#6949](https://github.com/airbytehq/airbyte/pull/6949) | Each stream was split into files of 10,000 records each for copying using S3 or GCS | -| 0.3.14 | 2021-10-08 | [\#5924](https://github.com/airbytehq/airbyte/pull/5924) | Fixed AWS S3 Staging COPY is writing records from different table in the same raw table | -| 0.3.13 | 2021-09-02 | [\#5745](https://github.com/airbytehq/airbyte/pull/5745) | Disable STATUPDATE flag when using S3 staging to speed up performance | -| 0.3.12 | 2021-07-21 | [\#3555](https://github.com/airbytehq/airbyte/pull/3555) | Enable partial checkpointing for halfway syncs | -| 0.3.11 | 2021-07-20 | [\#4874](https://github.com/airbytehq/airbyte/pull/4874) | allow `additionalProperties` in connector spec | +| Version | Date | Pull Request | Subject | +|:--------|:-----------|:-----------------------------------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| 2.1.8 | 2024-02-09 | [\#35354](https://github.com/airbytehq/airbyte/pull/35354) | Update to CDK 0.23.0; Gather required initial state upfront, remove dependency on svv_table_info for table empty check | +| 2.1.7 | 2024-02-09 | [\#34562](https://github.com/airbytehq/airbyte/pull/34562) | Switch back to jooq-based sql execution for standard insert | +| 2.1.6 | 2024-02-08 | [\#34502](https://github.com/airbytehq/airbyte/pull/34502) | Update to CDK version 0.17.0 | +| 2.1.5 | 2024-01-30 | [\#34680](https://github.com/airbytehq/airbyte/pull/34680) | Update to CDK version 0.16.3 | +| 2.1.4 | 2024-01-29 | [\#34634](https://github.com/airbytehq/airbyte/pull/34634) | Use lowercase raw schema and table in T+D [CDK changes](https://github.com/airbytehq/airbyte/pull/34533) | +| 2.1.3 | 2024-01-26 | [\#34544](https://github.com/airbytehq/airbyte/pull/34544) | Proper string-escaping in raw tables | +| 2.1.2 | 2024-01-24 | [\#34451](https://github.com/airbytehq/airbyte/pull/34451) | Improve logging for unparseable input | +| 2.1.1 | 2024-01-24 | [\#34458](https://github.com/airbytehq/airbyte/pull/34458) | Improve error reporting | +| 2.1.0 | 2024-01-24 | [\#34467](https://github.com/airbytehq/airbyte/pull/34467) | Upgrade CDK to 0.14.0 | +| 2.0.0 | 2024-01-23 | [\#34077](https://github.com/airbytehq/airbyte/pull/34077) | Destinations V2 | +| 0.8.0 | 2024-01-18 | [\#34236](https://github.com/airbytehq/airbyte/pull/34236) | Upgrade CDK to 0.13.0 | +| 0.7.15 | 2024-01-11 | [\#34186](https://github.com/airbytehq/airbyte/pull/34186) | Update check method with svv_table_info permission check, fix bug where s3 staging files were not being deleted. | +| 0.7.14 | 2024-01-08 | [\#34014](https://github.com/airbytehq/airbyte/pull/34014) | Update order of options in spec | +| 0.7.13 | 2024-01-05 | [\#33948](https://github.com/airbytehq/airbyte/pull/33948) | Fix NPE when prepare tables fail; Add case sensitive session for super; Bastion heartbeats added | +| 0.7.12 | 2024-01-03 | [\#33924](https://github.com/airbytehq/airbyte/pull/33924) | Add new ap-southeast-3 AWS region | +| 0.7.11 | 2024-01-04 | [\#33730](https://github.com/airbytehq/airbyte/pull/33730) | Internal code structure changes | +| 0.7.10 | 2024-01-04 | [\#33728](https://github.com/airbytehq/airbyte/pull/33728) | Allow users to disable final table creation | +| 0.7.9 | 2024-01-03 | [\#33877](https://github.com/airbytehq/airbyte/pull/33877) | Fix Jooq StackOverflowError | +| 0.7.8 | 2023-12-28 | [\#33788](https://github.com/airbytehq/airbyte/pull/33788) | Thread-safe fix for file part names (s3 staging files) | +| 0.7.7 | 2024-01-04 | [\#33728](https://github.com/airbytehq/airbyte/pull/33728) | Add option to only type and dedupe at the end of the sync | +| 0.7.6 | 2023-12-20 | [\#33704](https://github.com/airbytehq/airbyte/pull/33704) | Only run T+D on a stream if it had any records during the sync | +| 0.7.5 | 2023-12-18 | [\#33124](https://github.com/airbytehq/airbyte/pull/33124) | Make Schema Creation Separate from Table Creation | +| 0.7.4 | 2023-12-13 | [\#33369](https://github.com/airbytehq/airbyte/pull/33369) | Use jdbc common sql implementation | +| 0.7.3 | 2023-12-12 | [\#33367](https://github.com/airbytehq/airbyte/pull/33367) | DV2: fix migration logic | +| 0.7.2 | 2023-12-11 | [\#33335](https://github.com/airbytehq/airbyte/pull/33335) | DV2: improve data type mapping | +| 0.7.1 | 2023-12-11 | [\#33307](https://github.com/airbytehq/airbyte/pull/33307) | ~DV2: improve data type mapping~ No changes | +| 0.7.0 | 2023-12-05 | [\#32326](https://github.com/airbytehq/airbyte/pull/32326) | Opt in beta for v2 destination | +| 0.6.11 | 2023-11-29 | [\#32888](https://github.com/airbytehq/airbyte/pull/32888) | Use the new async framework. | +| 0.6.10 | 2023-11-06 | [\#32193](https://github.com/airbytehq/airbyte/pull/32193) | Adopt java CDK version 0.4.1. | +| 0.6.9 | 2023-10-10 | [\#31083](https://github.com/airbytehq/airbyte/pull/31083) | Fix precision of numeric values in async destinations | +| 0.6.8 | 2023-10-10 | [\#31218](https://github.com/airbytehq/airbyte/pull/31218) | Clarify configuration groups | +| 0.6.7 | 2023-10-06 | [\#31153](https://github.com/airbytehq/airbyte/pull/31153) | Increase jvm GC retries | +| 0.6.6 | 2023-10-06 | [\#31129](https://github.com/airbytehq/airbyte/pull/31129) | Reduce async buffer size | +| 0.6.5 | 2023-08-18 | [\#28619](https://github.com/airbytehq/airbyte/pull/29640) | Fix duplicate staging object names in concurrent environment (e.g. async) | +| 0.6.4 | 2023-08-10 | [\#28619](https://github.com/airbytehq/airbyte/pull/28619) | Use async method for staging | +| 0.6.3 | 2023-08-07 | [\#29188](https://github.com/airbytehq/airbyte/pull/29188) | Internal code refactoring | +| 0.6.2 | 2023-07-24 | [\#28618](https://github.com/airbytehq/airbyte/pull/28618) | Add hooks in preparation for destinations v2 implementation | +| 0.6.1 | 2023-07-14 | [\#28345](https://github.com/airbytehq/airbyte/pull/28345) | Increment patch to trigger a rebuild | +| 0.6.0 | 2023-06-27 | [\#27993](https://github.com/airbytehq/airbyte/pull/27993) | destination-redshift will fail syncs if records or properties are too large, rather than silently skipping records and succeeding | +| 0.5.0 | 2023-06-27 | [\#27781](https://github.com/airbytehq/airbyte/pull/27781) | License Update: Elv2 | +| 0.4.9 | 2023-06-21 | [\#27555](https://github.com/airbytehq/airbyte/pull/27555) | Reduce image size | +| 0.4.8 | 2023-05-17 | [\#26165](https://github.com/airbytehq/airbyte/pull/26165) | Internal code change for future development (install normalization packages inside connector) | +| 0.4.7 | 2023-05-01 | [\#25698](https://github.com/airbytehq/airbyte/pull/25698) | Remove old VARCHAR to SUPER migration Java functionality | +| 0.4.6 | 2023-04-27 | [\#25346](https://github.com/airbytehq/airbyte/pull/25346) | Internal code cleanup | +| 0.4.5 | 2023-03-30 | [\#24736](https://github.com/airbytehq/airbyte/pull/24736) | Improve behavior when throttled by AWS API | +| 0.4.4 | 2023-03-29 | [\#24671](https://github.com/airbytehq/airbyte/pull/24671) | Fail faster in certain error cases | +| 0.4.3 | 2023-03-17 | [\#23788](https://github.com/airbytehq/airbyte/pull/23788) | S3-Parquet: added handler to process null values in arrays | +| 0.4.2 | 2023-03-10 | [\#23931](https://github.com/airbytehq/airbyte/pull/23931) | Added support for periodic buffer flush | +| 0.4.1 | 2023-03-10 | [\#23466](https://github.com/airbytehq/airbyte/pull/23466) | Changed S3 Avro type from Int to Long | +| 0.4.0 | 2023-02-28 | [\#23523](https://github.com/airbytehq/airbyte/pull/23523) | Add SSH Bastion Host configuration options | +| 0.3.56 | 2023-01-26 | [\#21890](https://github.com/airbytehq/airbyte/pull/21890) | Fixed configurable parameter for number of file buffers | +| 0.3.55 | 2023-01-26 | [\#20631](https://github.com/airbytehq/airbyte/pull/20631) | Added support for destination checkpointing with staging | +| 0.3.54 | 2023-01-18 | [\#21087](https://github.com/airbytehq/airbyte/pull/21087) | Wrap Authentication Errors as Config Exceptions | +| 0.3.53 | 2023-01-03 | [\#17273](https://github.com/airbytehq/airbyte/pull/17273) | Flatten JSON arrays to fix maximum size check for SUPER field | +| 0.3.52 | 2022-12-30 | [\#20879](https://github.com/airbytehq/airbyte/pull/20879) | Added configurable parameter for number of file buffers (⛔ this version has a bug and will not work; use `0.3.56` instead) | +| 0.3.51 | 2022-10-26 | [\#18434](https://github.com/airbytehq/airbyte/pull/18434) | Fix empty S3 bucket path handling | +| 0.3.50 | 2022-09-14 | [\#15668](https://github.com/airbytehq/airbyte/pull/15668) | Wrap logs in AirbyteLogMessage | +| 0.3.49 | 2022-09-01 | [\#16243](https://github.com/airbytehq/airbyte/pull/16243) | Fix Json to Avro conversion when there is field name clash from combined restrictions (`anyOf`, `oneOf`, `allOf` fields) | +| 0.3.48 | 2022-09-01 | | Added JDBC URL params | +| 0.3.47 | 2022-07-15 | [\#14494](https://github.com/airbytehq/airbyte/pull/14494) | Make S3 output filename configurable. | +| 0.3.46 | 2022-06-27 | [\#14190](https://github.com/airbytehq/airbyte/pull/13916) | Correctly cleanup S3 bucket when using a configured bucket path for S3 staging operations. | +| 0.3.45 | 2022-06-25 | [\#13916](https://github.com/airbytehq/airbyte/pull/13916) | Use the configured bucket path for S3 staging operations. | +| 0.3.44 | 2022-06-24 | [\#14114](https://github.com/airbytehq/airbyte/pull/14114) | Remove "additionalProperties": false from specs for connectors with staging | +| 0.3.43 | 2022-06-24 | [\#13690](https://github.com/airbytehq/airbyte/pull/13690) | Improved discovery for NOT SUPER column | +| 0.3.42 | 2022-06-21 | [\#14013](https://github.com/airbytehq/airbyte/pull/14013) | Add an option to use encryption with staging in Redshift Destination | +| 0.3.40 | 2022-06-17 | [\#13753](https://github.com/airbytehq/airbyte/pull/13753) | Deprecate and remove PART\*SIZE_MB fields from connectors based on StreamTransferManager | +| 0.3.39 | 2022-06-02 | [\#13415](https://github.com/airbytehq/airbyte/pull/13415) | Add dropdown to select Uploading Method.
**PLEASE NOTICE**: After this update your **uploading method** will be set to **Standard**, you will need to reconfigure the method to use **S3 Staging** again. | +| 0.3.37 | 2022-05-23 | [\#13090](https://github.com/airbytehq/airbyte/pull/13090) | Removed redshiftDataTmpTableMode. Some refactoring. | +| 0.3.36 | 2022-05-23 | [\#12820](https://github.com/airbytehq/airbyte/pull/12820) | Improved 'check' operation performance | +| 0.3.35 | 2022-05-18 | [\#12940](https://github.com/airbytehq/airbyte/pull/12940) | Fixed maximum record size for SUPER type | +| 0.3.34 | 2022-05-16 | [\#12869](https://github.com/airbytehq/airbyte/pull/12869) | Fixed NPE in S3 staging check | +| 0.3.33 | 2022-05-04 | [\#12601](https://github.com/airbytehq/airbyte/pull/12601) | Apply buffering strategy for S3 staging | +| 0.3.32 | 2022-04-20 | [\#12085](https://github.com/airbytehq/airbyte/pull/12085) | Fixed bug with switching between INSERT and COPY config | +| 0.3.31 | 2022-04-19 | [\#12064](https://github.com/airbytehq/airbyte/pull/12064) | Added option to support SUPER datatype in \_airbyte_raw\*\*\* table | +| 0.3.29 | 2022-04-05 | [\#11729](https://github.com/airbytehq/airbyte/pull/11729) | Fixed bug with dashes in schema name | +| 0.3.28 | 2022-03-18 | [\#11254](https://github.com/airbytehq/airbyte/pull/11254) | Fixed missing records during S3 staging | +| 0.3.27 | 2022-02-25 | [\#10421](https://github.com/airbytehq/airbyte/pull/10421) | Refactor JDBC parameters handling | +| 0.3.25 | 2022-02-14 | [\#9920](https://github.com/airbytehq/airbyte/pull/9920) | Updated the size of staging files for S3 staging. Also, added closure of S3 writers to staging files when data has been written to an staging file. | +| 0.3.24 | 2022-02-14 | [\#10256](https://github.com/airbytehq/airbyte/pull/10256) | Add `-XX:+ExitOnOutOfMemoryError` JVM option | +| 0.3.23 | 2021-12-16 | [\#8855](https://github.com/airbytehq/airbyte/pull/8855) | Add `purgeStagingData` option to enable/disable deleting the staging data | +| 0.3.22 | 2021-12-15 | [\#8607](https://github.com/airbytehq/airbyte/pull/8607) | Accept a path for the staging data | +| 0.3.21 | 2021-12-10 | [\#8562](https://github.com/airbytehq/airbyte/pull/8562) | Moving classes around for better dependency management | +| 0.3.20 | 2021-11-08 | [\#7719](https://github.com/airbytehq/airbyte/pull/7719) | Improve handling of wide rows by buffering records based on their byte size rather than their count | +| 0.3.19 | 2021-10-21 | [\#7234](https://github.com/airbytehq/airbyte/pull/7234) | Allow SSL traffic only | +| 0.3.17 | 2021-10-12 | [\#6965](https://github.com/airbytehq/airbyte/pull/6965) | Added SSL Support | +| 0.3.16 | 2021-10-11 | [\#6949](https://github.com/airbytehq/airbyte/pull/6949) | Each stream was split into files of 10,000 records each for copying using S3 or GCS | +| 0.3.14 | 2021-10-08 | [\#5924](https://github.com/airbytehq/airbyte/pull/5924) | Fixed AWS S3 Staging COPY is writing records from different table in the same raw table | +| 0.3.13 | 2021-09-02 | [\#5745](https://github.com/airbytehq/airbyte/pull/5745) | Disable STATUPDATE flag when using S3 staging to speed up performance | +| 0.3.12 | 2021-07-21 | [\#3555](https://github.com/airbytehq/airbyte/pull/3555) | Enable partial checkpointing for halfway syncs | +| 0.3.11 | 2021-07-20 | [\#4874](https://github.com/airbytehq/airbyte/pull/4874) | allow `additionalProperties` in connector spec | diff --git a/docs/using-airbyte/core-concepts/typing-deduping.md b/docs/using-airbyte/core-concepts/typing-deduping.md index eb606fc3e096..c0c6c57906bd 100644 --- a/docs/using-airbyte/core-concepts/typing-deduping.md +++ b/docs/using-airbyte/core-concepts/typing-deduping.md @@ -114,8 +114,8 @@ observers. This is done for a number of reasons, including: column order and data types of each column. - **Transactional Full Refresh** - In order to keep your final tables consistently useful, when a refresh or reset occurs, airbyte will erase the raw tables, and then build a new tmp final table - first. Only at hte last second will Airbyte swap the old and tmp final tables, usually via a - rename. Otherwise, there would be a period of time where the final table is empty, which could + first. Airbyte attempts to do an atomic swap of old and tmp final tables, usually via a + rename at the last second. Otherwise, there would be a period of time where the final table is empty, which could cause downstream issues. This means that additional permissions, constraints, views, or other rules you apply to the final From 0eae7e686d5a8de5f0d1c58124a7409ce736a6d7 Mon Sep 17 00:00:00 2001 From: Gireesh Sreepathi Date: Fri, 23 Feb 2024 12:37:57 -0800 Subject: [PATCH 8/8] use new cdk --- .../connectors/destination-redshift/build.gradle | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte-integrations/connectors/destination-redshift/build.gradle b/airbyte-integrations/connectors/destination-redshift/build.gradle index 83cdbd0b451b..298b24ec4012 100644 --- a/airbyte-integrations/connectors/destination-redshift/build.gradle +++ b/airbyte-integrations/connectors/destination-redshift/build.gradle @@ -4,7 +4,7 @@ plugins { } airbyteJavaConnector { - cdkVersionRequired = '0.23.0' + cdkVersionRequired = '0.23.2' features = ['db-destinations', 's3-destinations', 'typing-deduping'] useLocalCdk = false }