-
Notifications
You must be signed in to change notification settings - Fork 4.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
DV2 TyperDeduper: Extract migrations to separate method #35376
Changes from all commits
d6199c6
f946fb9
f5e878e
7a35038
189e32e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
version=0.23.6 | ||
version=0.23.7 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,7 +8,6 @@ | |
import static io.airbyte.cdk.integrations.util.ConnectorExceptionUtil.getResultsOrLogAndThrowFirst; | ||
import static io.airbyte.integrations.base.destination.typing_deduping.FutureUtils.*; | ||
import static io.airbyte.integrations.base.destination.typing_deduping.FutureUtils.reduceExceptions; | ||
import static io.airbyte.integrations.base.destination.typing_deduping.TyperDeduperUtilKt.prepareAllSchemas; | ||
import static java.util.Collections.singleton; | ||
|
||
import io.airbyte.cdk.integrations.destination.StreamSyncSummary; | ||
|
@@ -43,7 +42,7 @@ | |
* <p> | ||
* In a typical sync, destinations should call the methods: | ||
* <ol> | ||
* <li>{@link #prepareTables()} once at the start of the sync</li> | ||
* <li>{@link #prepareFinalTables()} once at the start of the sync</li> | ||
* <li>{@link #typeAndDedupe(String, String, boolean)} as needed throughout the sync</li> | ||
* <li>{@link #commitFinalTables()} once at the end of the sync</li> | ||
* </ol> | ||
|
@@ -104,27 +103,23 @@ public DefaultTyperDeduper(final SqlGenerator sqlGenerator, | |
this(sqlGenerator, destinationHandler, parsedCatalog, v1V2Migrator, new NoopV2TableMigrator()); | ||
} | ||
|
||
private void prepareSchemas(final ParsedCatalog parsedCatalog) throws Exception { | ||
prepareAllSchemas(parsedCatalog, sqlGenerator, destinationHandler); | ||
@Override | ||
public void prepareSchemasAndRunMigrations() { | ||
// Technically kind of weird to call this here, but it's the best place we have. | ||
// Ideally, we'd create just airbyte_internal here, and defer creating the final table schemas | ||
// until prepareFinalTables... but it doesn't really matter. | ||
TyperDeduperUtil.prepareSchemas(sqlGenerator, destinationHandler, parsedCatalog); | ||
TyperDeduperUtil.executeRawTableMigrations(executorService, sqlGenerator, destinationHandler, v1V2Migrator, v2TableMigrator, parsedCatalog); | ||
} | ||
|
||
@Override | ||
public void prepareTables() throws Exception { | ||
public void prepareFinalTables() throws Exception { | ||
if (overwriteStreamsWithTmpTable != null) { | ||
throw new IllegalStateException("Tables were already prepared."); | ||
} | ||
overwriteStreamsWithTmpTable = ConcurrentHashMap.newKeySet(); | ||
LOGGER.info("Preparing tables"); | ||
|
||
// This is intentionally not done in parallel to avoid rate limits in some destinations. | ||
prepareSchemas(parsedCatalog); | ||
|
||
// TODO: Either the migrations run the soft reset and create v2 tables or the actual prepare tables. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this logic now happens in TyperDeduperUtil.executeRawTableMigrations |
||
// unify the logic with current state of raw tables & final tables. This is done first before gather | ||
// initial state to avoid recreating final tables later again. | ||
final List<Either<? extends Exception, Void>> runMigrationsResult = | ||
CompletableFutures.allOf(parsedCatalog.streams().stream().map(this::runMigrationsAsync).toList()).toCompletableFuture().join(); | ||
getResultsOrLogAndThrowFirst("The following exceptions were thrown attempting to run migrations:\n", runMigrationsResult); | ||
final List<DestinationInitialState> initialStates = destinationHandler.gatherInitialState(parsedCatalog.streams()); | ||
final List<Either<? extends Exception, Void>> prepareTablesFutureResult = CompletableFutures.allOf( | ||
initialStates.stream().map(this::prepareTablesFuture).toList()).toCompletableFuture().join(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,15 +1,72 @@ | ||
package io.airbyte.integrations.base.destination.typing_deduping | ||
|
||
import com.google.common.collect.Streams | ||
import io.airbyte.cdk.integrations.util.ConnectorExceptionUtil.getResultsOrLogAndThrowFirst | ||
import io.airbyte.commons.concurrency.CompletableFutures | ||
import java.util.* | ||
import java.util.concurrent.CompletableFuture | ||
import java.util.concurrent.CompletionStage | ||
import java.util.concurrent.ExecutorService | ||
|
||
/** | ||
* Extracts all the "raw" and "final" schemas identified in the [parsedCatalog] and ensures they | ||
* exist in the Destination Database. | ||
*/ | ||
fun prepareAllSchemas(parsedCatalog: ParsedCatalog, sqlGenerator: SqlGenerator, destinationHandler: DestinationHandler) { | ||
val rawSchema = parsedCatalog.streams.mapNotNull { it.id.rawNamespace } | ||
val finalSchema = parsedCatalog.streams.mapNotNull { it.id.finalNamespace } | ||
val createAllSchemasSql = rawSchema.union(finalSchema) | ||
.map { sqlGenerator.createSchema(it) } | ||
.toList() | ||
destinationHandler.execute(Sql.concat(createAllSchemasSql)) | ||
|
||
class TyperDeduperUtil { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this class is partly taken from @jbfbell 's https://github.com/airbytehq/airbyte/pull/34637/files#diff-c9e7bf1074e965f63b135422dd2a7ac6ba196046b226a0ffe3a855a4eb82ed3b it'll have some merge conflicts b/c I reordered the arguments in |
||
companion object { | ||
|
||
@JvmStatic | ||
fun executeRawTableMigrations( | ||
executorService: ExecutorService, | ||
sqlGenerator: SqlGenerator, | ||
destinationHandler: DestinationHandler, | ||
v1V2Migrator: DestinationV1V2Migrator, | ||
v2TableMigrator: V2TableMigrator, | ||
parsedCatalog: ParsedCatalog | ||
) { | ||
// TODO: Either the migrations run the soft reset and create v2 tables or the actual prepare tables. | ||
// unify the logic | ||
// with current state of raw tables & final tables. This is done first before gather initial state | ||
// to avoid recreating | ||
// final tables later again. | ||
val runMigrationsResult = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as noted above - this code originally lived in DefaultTyperDeduper, I just extracted it to a separate class. |
||
CompletableFutures.allOf(parsedCatalog.streams().stream() | ||
.map { streamConfig -> runMigrationsAsync(executorService, sqlGenerator, destinationHandler, v1V2Migrator, v2TableMigrator, streamConfig) } | ||
.toList()).toCompletableFuture().join() | ||
getResultsOrLogAndThrowFirst("The following exceptions were thrown attempting to run migrations:\n", runMigrationsResult) | ||
} | ||
|
||
/** | ||
* Extracts all the "raw" and "final" schemas identified in the [parsedCatalog] and ensures they | ||
* exist in the Destination Database. | ||
*/ | ||
@JvmStatic | ||
fun prepareSchemas( | ||
sqlGenerator: SqlGenerator, | ||
destinationHandler: DestinationHandler, | ||
parsedCatalog: ParsedCatalog) { | ||
val rawSchema = parsedCatalog.streams.stream().map { it.id.rawNamespace } | ||
val finalSchema = parsedCatalog.streams.stream().map { it.id.finalNamespace } | ||
val createAllSchemasSql = Streams.concat<String>(rawSchema, finalSchema) | ||
.filter(Objects::nonNull) | ||
.distinct() | ||
.map(sqlGenerator::createSchema) | ||
.toList() | ||
destinationHandler.execute(Sql.concat(createAllSchemasSql)) | ||
} | ||
|
||
private fun runMigrationsAsync( | ||
executorService: ExecutorService, | ||
sqlGenerator: SqlGenerator, | ||
destinationHandler: DestinationHandler, | ||
v1V2Migrator: DestinationV1V2Migrator, | ||
v2TableMigrator: V2TableMigrator, | ||
streamConfig: StreamConfig): CompletionStage<Void> { | ||
return CompletableFuture.runAsync({ | ||
try { | ||
v1V2Migrator.migrateIfNecessary(sqlGenerator, destinationHandler, streamConfig) | ||
v2TableMigrator.migrateIfNecessary(streamConfig) | ||
} catch (e: java.lang.Exception) { | ||
throw RuntimeException(e) | ||
} | ||
}, executorService) | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this call now happens in prepareSchemasAndRawTables