Skip to content

Commit

Permalink
CTK: Improve transformations for MongoDB Table Loader
Browse files Browse the repository at this point in the history
  • Loading branch information
amotl committed Oct 22, 2024
1 parent 398aad5 commit 4f9b4f9
Show file tree
Hide file tree
Showing 3 changed files with 82 additions and 8 deletions.
2 changes: 1 addition & 1 deletion application/cratedb-toolkit/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
cratedb-toolkit[influxdb,mongodb]==0.0.27
cratedb-toolkit[influxdb,mongodb]==0.0.29
6 changes: 6 additions & 0 deletions application/cratedb-toolkit/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,12 @@ def test_ctk_load_table_mongodb_json(drop_testing_tables):
progress=GitProgressPrinter(),
)

# The `countries-big.json` file contains bogus characters.
countries_big_path = datasets_path / "countries-big.json"
payload = countries_big_path.read_text()
payload = payload.replace("\ufeff", "")
countries_big_path.write_text(payload)

# Invoke data transfer.
command = f"""
ctk load table \
Expand Down
82 changes: 75 additions & 7 deletions application/cratedb-toolkit/zyp-mongodb-json-files.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,79 @@
meta:
type: zyp-project
version: 1

collections:
- address:
container: datasets
name: companies
pre:
rules:
- expression: .[] |= del(.image.available_sizes, .screenshots[].available_sizes)
type: jq

- address:
container: datasets
name: books
pre:
rules:
- expression: .[] |= (._id |= tostring)
type: jq

- address:
container: datasets
name: city_inspections
pre:
rules:
- expression: |
.[] |= (
select(true)
| .address.number |= numbers
| .address.zip |= numbers
| .certificate_number |= tostring
)
type: jq
- address:
container: datasets
name: companies
pre:
rules:
- expression: |
.[] |=
del(
.image.available_sizes,
.screenshots[].available_sizes,
.created_at
)
type: jq
- address:
container: datasets
name: countries-big
pre:
rules:
- expression: .[] |= (.ISO |= tostring)
type: jq

- address:
container: datasets
name: products
pre:
rules:
- expression: |
.[] |= (
select(true)
| if (.for) then .for |= to_array end
| if (.type) then .type |= to_array end
| if (.limits.data.n) then .limits.data.n |= tostring end
| if (.limits.sms.n) then .limits.sms.n |= tostring end
| if (.limits.voice.n) then .limits.voice.n |= tostring end
| del(.additional_tarriffs)
)
type: jq
- address:
container: datasets
name: restaurant
pre:
rules:
- expression: |
.[] |= (
select(true)
| .rating |= tostring
| .type |= to_array
)
type: jq

0 comments on commit 4f9b4f9

Please sign in to comment.