-
Notifications
You must be signed in to change notification settings - Fork 4.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' into tope/adjust/migrate-manifest-only
- Loading branch information
Showing
156 changed files
with
26,776 additions
and
1,907 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
47 changes: 47 additions & 0 deletions
47
airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/data/AirbyteSchemaMapper.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
/* | ||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.cdk.load.data | ||
|
||
interface AirbyteSchemaMapper { | ||
fun map(schema: AirbyteType): AirbyteType = | ||
when (schema) { | ||
is NullType -> mapNull(schema) | ||
is StringType -> mapString(schema) | ||
is BooleanType -> mapBoolean(schema) | ||
is IntegerType -> mapInteger(schema) | ||
is NumberType -> mapNumber(schema) | ||
is ArrayType -> mapArray(schema) | ||
is ArrayTypeWithoutSchema -> mapArrayWithoutSchema(schema) | ||
is ObjectType -> mapObject(schema) | ||
is ObjectTypeWithoutSchema -> mapObjectWithoutSchema(schema) | ||
is ObjectTypeWithEmptySchema -> mapObjectWithEmptySchema(schema) | ||
is UnionType -> mapUnion(schema) | ||
is DateType -> mapDate(schema) | ||
is TimeTypeWithTimezone -> mapTimeTypeWithTimezone(schema) | ||
is TimeTypeWithoutTimezone -> mapTimeTypeWithoutTimezone(schema) | ||
is TimestampTypeWithTimezone -> mapTimestampTypeWithTimezone(schema) | ||
is TimestampTypeWithoutTimezone -> mapTimestampTypeWithoutTimezone(schema) | ||
is UnknownType -> mapUnknown(schema) | ||
} | ||
|
||
fun mapField(field: FieldType): FieldType | ||
fun mapNull(schema: NullType): AirbyteType | ||
fun mapString(schema: StringType): AirbyteType | ||
fun mapBoolean(schema: BooleanType): AirbyteType | ||
fun mapInteger(schema: IntegerType): AirbyteType | ||
fun mapNumber(schema: NumberType): AirbyteType | ||
fun mapArray(schema: ArrayType): AirbyteType | ||
fun mapArrayWithoutSchema(schema: ArrayTypeWithoutSchema): AirbyteType | ||
fun mapObject(schema: ObjectType): AirbyteType | ||
fun mapObjectWithoutSchema(schema: ObjectTypeWithoutSchema): AirbyteType | ||
fun mapObjectWithEmptySchema(schema: ObjectTypeWithEmptySchema): AirbyteType | ||
fun mapUnion(schema: UnionType): AirbyteType | ||
fun mapDate(schema: DateType): AirbyteType | ||
fun mapTimeTypeWithTimezone(schema: TimeTypeWithTimezone): AirbyteType | ||
fun mapTimeTypeWithoutTimezone(schema: TimeTypeWithoutTimezone): AirbyteType | ||
fun mapTimestampTypeWithTimezone(schema: TimestampTypeWithTimezone): AirbyteType | ||
fun mapTimestampTypeWithoutTimezone(schema: TimestampTypeWithoutTimezone): AirbyteType | ||
fun mapUnknown(schema: UnknownType): AirbyteType | ||
} |
116 changes: 116 additions & 0 deletions
116
...cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/data/AirbyteValueIdentityMapper.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
/* | ||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.cdk.load.data | ||
|
||
import io.airbyte.cdk.load.message.DestinationRecord | ||
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange.Change | ||
import io.airbyte.protocol.models.v0.AirbyteRecordMessageMetaChange.Reason | ||
|
||
open class AirbyteValueIdentityMapper( | ||
open val meta: DestinationRecord.Meta, | ||
) { | ||
private fun collectFailure( | ||
path: List<String>, | ||
reason: Reason = Reason.DESTINATION_SERIALIZATION_ERROR | ||
) { | ||
meta.changes.add(DestinationRecord.Change(path.joinToString("."), Change.NULLED, reason)) | ||
} | ||
|
||
fun map( | ||
value: AirbyteValue, | ||
schema: AirbyteType, | ||
path: List<String> = emptyList() | ||
): AirbyteValue = | ||
try { | ||
when (schema) { | ||
is ObjectType -> mapObject(value as ObjectValue, schema, path) | ||
is ObjectTypeWithoutSchema -> | ||
mapObjectWithoutSchema(value as ObjectValue, schema, path) | ||
is ObjectTypeWithEmptySchema -> | ||
mapObjectWithEmptySchema(value as ObjectValue, schema, path) | ||
is ArrayType -> mapArray(value as ArrayValue, schema, path) | ||
is ArrayTypeWithoutSchema -> | ||
mapArrayWithoutSchema(value as ArrayValue, schema, path) | ||
is UnionType -> mapUnion(value, schema, path) | ||
is BooleanType -> mapBoolean(value as BooleanValue, path) | ||
is NumberType -> mapNumber(value as NumberValue, path) | ||
is StringType -> mapString(value as StringValue, path) | ||
is IntegerType -> mapInteger(value as IntegerValue, path) | ||
is DateType -> mapDate(value as DateValue, path) | ||
is TimeTypeWithTimezone -> mapTimeWithTimezone(value as TimeValue, path) | ||
is TimeTypeWithoutTimezone -> mapTimeWithoutTimezone(value as TimeValue, path) | ||
is TimestampTypeWithTimezone -> | ||
mapTimestampWithTimezone(value as TimestampValue, path) | ||
is TimestampTypeWithoutTimezone -> | ||
mapTimestampWithoutTimezone(value as TimestampValue, path) | ||
is NullType -> mapNull(path) | ||
is UnknownType -> mapUnknown(value as UnknownValue, path) | ||
} | ||
} catch (e: Exception) { | ||
collectFailure(path) | ||
mapNull(path) | ||
} | ||
|
||
open fun mapObject(value: ObjectValue, schema: ObjectType, path: List<String>): AirbyteValue { | ||
val values = LinkedHashMap<String, AirbyteValue>() | ||
schema.properties.forEach { (name, field) -> | ||
values[name] = map(value.values[name] ?: NullValue, field.type, path + name) | ||
} | ||
return ObjectValue(values) | ||
} | ||
|
||
open fun mapObjectWithoutSchema( | ||
value: ObjectValue, | ||
schema: ObjectTypeWithoutSchema, | ||
path: List<String> | ||
): AirbyteValue = value | ||
|
||
open fun mapObjectWithEmptySchema( | ||
value: ObjectValue, | ||
schema: ObjectTypeWithEmptySchema, | ||
path: List<String> | ||
): AirbyteValue = value | ||
|
||
open fun mapArray(value: ArrayValue, schema: ArrayType, path: List<String>): AirbyteValue { | ||
return ArrayValue( | ||
value.values.mapIndexed { index, element -> | ||
map(element, schema.items.type, path + "[$index]") | ||
} | ||
) | ||
} | ||
|
||
open fun mapArrayWithoutSchema( | ||
value: ArrayValue, | ||
schema: ArrayTypeWithoutSchema, | ||
path: List<String> | ||
): AirbyteValue = value | ||
|
||
open fun mapUnion(value: AirbyteValue, schema: UnionType, path: List<String>): AirbyteValue = | ||
value | ||
|
||
open fun mapBoolean(value: BooleanValue, path: List<String>): AirbyteValue = value | ||
|
||
open fun mapNumber(value: NumberValue, path: List<String>): AirbyteValue = value | ||
|
||
open fun mapString(value: StringValue, path: List<String>): AirbyteValue = value | ||
|
||
open fun mapInteger(value: IntegerValue, path: List<String>): AirbyteValue = value | ||
|
||
open fun mapDate(value: DateValue, path: List<String>): AirbyteValue = value | ||
|
||
open fun mapTimeWithTimezone(value: TimeValue, path: List<String>): AirbyteValue = value | ||
|
||
open fun mapTimeWithoutTimezone(value: TimeValue, path: List<String>): AirbyteValue = value | ||
|
||
open fun mapTimestampWithTimezone(value: TimestampValue, path: List<String>): AirbyteValue = | ||
value | ||
|
||
open fun mapTimestampWithoutTimezone(value: TimestampValue, path: List<String>): AirbyteValue = | ||
value | ||
|
||
open fun mapNull(path: List<String>): AirbyteValue = NullValue | ||
|
||
open fun mapUnknown(value: UnknownValue, path: List<String>): AirbyteValue = value | ||
} |
73 changes: 73 additions & 0 deletions
73
airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/data/AirbyteValueMapper.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
/* | ||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.cdk.load.data | ||
|
||
interface AirbyteValueMapper { | ||
fun collectFailure(path: List<String>) | ||
fun map( | ||
value: AirbyteValue, | ||
schema: AirbyteType, | ||
path: List<String> = emptyList() | ||
): AirbyteValue = | ||
try { | ||
when (schema) { | ||
is ObjectType -> mapObject(value as ObjectValue, schema, path) | ||
is ObjectTypeWithoutSchema -> | ||
mapObjectWithoutSchema(value as ObjectValue, schema, path) | ||
is ObjectTypeWithEmptySchema -> | ||
mapObjectWithEmptySchema(value as ObjectValue, schema, path) | ||
is ArrayType -> mapArray(value as ArrayValue, schema, path) | ||
is ArrayTypeWithoutSchema -> | ||
mapArrayWithoutSchema(value as ArrayValue, schema, path) | ||
is UnionType -> mapUnion(value, schema, path) | ||
is BooleanType -> mapBoolean(value as BooleanValue, path) | ||
is NumberType -> mapNumber(value as NumberValue, path) | ||
is StringType -> mapString(value as StringValue, path) | ||
is IntegerType -> mapInteger(value as IntegerValue, path) | ||
is DateType -> mapDate(value as DateValue, path) | ||
is TimeTypeWithTimezone -> mapTimeWithTimezone(value as TimeValue, path) | ||
is TimeTypeWithoutTimezone -> mapTimeWithoutTimezone(value as TimeValue, path) | ||
is TimestampTypeWithTimezone -> | ||
mapTimestampWithTimezone(value as TimestampValue, path) | ||
is TimestampTypeWithoutTimezone -> | ||
mapTimestampWithoutTimezone(value as TimestampValue, path) | ||
is NullType -> mapNull(path) | ||
is UnknownType -> mapUnknown(value as UnknownValue, path) | ||
} | ||
} catch (e: Exception) { | ||
collectFailure(path) | ||
mapNull(path) | ||
} | ||
|
||
fun mapObject(value: ObjectValue, schema: ObjectType, path: List<String>): AirbyteValue | ||
fun mapObjectWithoutSchema( | ||
value: ObjectValue, | ||
schema: ObjectTypeWithoutSchema, | ||
path: List<String> | ||
): AirbyteValue | ||
fun mapObjectWithEmptySchema( | ||
value: ObjectValue, | ||
schema: ObjectTypeWithEmptySchema, | ||
path: List<String> | ||
): AirbyteValue | ||
fun mapArray(value: ArrayValue, schema: ArrayType, path: List<String>): AirbyteValue | ||
fun mapArrayWithoutSchema( | ||
value: ArrayValue, | ||
schema: ArrayTypeWithoutSchema, | ||
path: List<String> | ||
): AirbyteValue | ||
fun mapUnion(value: AirbyteValue, schema: UnionType, path: List<String>): AirbyteValue | ||
fun mapBoolean(value: BooleanValue, path: List<String>): AirbyteValue | ||
fun mapNumber(value: NumberValue, path: List<String>): AirbyteValue | ||
fun mapString(value: StringValue, path: List<String>): AirbyteValue | ||
fun mapInteger(value: IntegerValue, path: List<String>): AirbyteValue | ||
fun mapDate(value: DateValue, path: List<String>): AirbyteValue | ||
fun mapTimeWithTimezone(value: TimeValue, path: List<String>): AirbyteValue | ||
fun mapTimeWithoutTimezone(value: TimeValue, path: List<String>): AirbyteValue | ||
fun mapTimestampWithTimezone(value: TimestampValue, path: List<String>): AirbyteValue | ||
fun mapTimestampWithoutTimezone(value: TimestampValue, path: List<String>): AirbyteValue | ||
fun mapNull(path: List<String>): AirbyteValue | ||
fun mapUnknown(value: UnknownValue, path: List<String>): AirbyteValue | ||
} |
61 changes: 61 additions & 0 deletions
61
airbyte-cdk/bulk/core/load/src/main/kotlin/io/airbyte/cdk/load/data/MergeUnions.kt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
/* | ||
* Copyright (c) 2024 Airbyte, Inc., all rights reserved. | ||
*/ | ||
|
||
package io.airbyte.cdk.load.data | ||
|
||
class MergeUnions : AirbyteSchemaIdentityMapper { | ||
override fun mapUnion(schema: UnionType): AirbyteType { | ||
// Map the options first so they're in their final form | ||
val mappedOptions = schema.options.map { map(it) } | ||
val mergedOptions = mergeOptions(mappedOptions) | ||
if (mergedOptions.size == 1) { | ||
return mergedOptions.first() | ||
} | ||
return UnionType(mergedOptions.toList()) | ||
} | ||
|
||
private fun mergeOptions(options: List<AirbyteType>): Set<AirbyteType> { | ||
val mergedOptions = mutableSetOf<AirbyteType>() | ||
mergeOptions(mergedOptions, options) | ||
return mergedOptions | ||
} | ||
|
||
private fun mergeOptions(into: MutableSet<AirbyteType>, from: List<AirbyteType>) { | ||
for (option in from) { | ||
if (option is UnionType) { | ||
// If this is a union of a union, recursively merge the other union's options in | ||
mergeOptions(into, option.options) | ||
} else if (option is ObjectType) { | ||
val existingObjOption: ObjectType? = into.find { it is ObjectType } as ObjectType? | ||
if (existingObjOption == null) { | ||
// No other object in the set, so just add this one | ||
into.add(option) | ||
continue | ||
} | ||
|
||
into.remove(existingObjOption) | ||
val newProperties = existingObjOption.properties | ||
for ((name, field) in option.properties) { | ||
val existingField = newProperties[name] | ||
newProperties[name] = field | ||
if (existingField == null) { | ||
// If no field exists with the same name, just adding this one is fine | ||
continue | ||
} | ||
|
||
if (existingField != field) { | ||
throw IllegalArgumentException( | ||
"Cannot merge unions of objects with different types for the same field" | ||
) | ||
} | ||
|
||
// If the fields are identical, we can just keep the existing field | ||
} | ||
into.add(ObjectType(newProperties)) | ||
} else { | ||
into.add(option) | ||
} | ||
} | ||
} | ||
} |
Oops, something went wrong.