Skip to content

Update Knowledge Graph with data from dataset 'Wereldmuseum Thesaurus', based on IRIs in the Knowledge Graph #645

Update Knowledge Graph with data from dataset 'Wereldmuseum Thesaurus', based on IRIs in the Knowledge Graph

Update Knowledge Graph with data from dataset 'Wereldmuseum Thesaurus', based on IRIs in the Knowledge Graph #645

name: Update Knowledge Graph with data from dataset 'Wereldmuseum Thesaurus', based on IRIs in the Knowledge Graph
# "At minute 0 past hour 6 and 18"
on:
schedule:
- cron: "0 6,18 * * *"
# Run a single workflow at a time
concurrency:
group: knowledge-graph
jobs:
run:
runs-on: ubuntu-latest
timeout-minutes: 120
steps:
- name: Checkout data repository
uses: actions/checkout@v4
with:
path: ./data
sparse-checkout: wereldmuseum-thesaurus
- name: Checkout code repository
uses: actions/checkout@v4
with:
path: ./code
repository: colonial-heritage/integration-layer
- name: Install Node
uses: actions/setup-node@v4
with:
node-version: "20"
- name: Install app
run: |
cd ./code
npm install --no-progress
npx turbo run build --filter=@colonial-collections/graph-create
- name: Create graph
run: |
cd ./data
mkdir -p "$RUNNER_TEMP"
test -f ./wereldmuseum-thesaurus/data/data.tar.zst && tar --zstd -xf ./wereldmuseum-thesaurus/data/data.tar.zst -C "$RUNNER_TEMP"
../code/apps/graph-create/dist/cli.js create \
--resource-dir ./wereldmuseum-thesaurus/resources \
--data-file "$RUNNER_TEMP/data.sqlite" \
--iterate-endpoint-url "${{ vars.SPARQL_ENDPOINT_URL_KG }}" \
--iterate-query-file ./wereldmuseum-thesaurus/queries/iterate.rq \
--generate-endpoint-url "https://api.colonialcollections.nl/datasets/nmvw/thesaurus/sparql" \
--generate-query-file ./wereldmuseum-thesaurus/queries/generate.rq \
--generate-batch-size 5000000 \
--triplydb-instance-url "${{ vars.TRIPLYDB_INSTANCE_URL }}" \
--triplydb-api-token "${{ secrets.TRIPLYDB_API_TOKEN }}" \
--triplydb-account "${{ vars.TRIPLYDB_ACCOUNT }}" \
--triplydb-dataset "${{ vars.TRIPLYDB_DATASET_KG }}" \
--triplydb-service "kg" \
--graph-name "https://data.colonialcollections.nl/wereldmuseum-thesaurus" \
--temp-dir "$RUNNER_TEMP"
- name: Save changes
run: |
cd ./data
mkdir -p ./wereldmuseum-thesaurus/data
test -f "$RUNNER_TEMP/data.sqlite" && tar --zstd -cf ./wereldmuseum-thesaurus/data/data.tar.zst -C "$RUNNER_TEMP" data.sqlite
git config --global user.name "github-actions[bot]"
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git add .
git commit --quiet -a -m "Save changes" || true
git push --force -u origin