-
Notifications
You must be signed in to change notification settings - Fork 0
65 lines (62 loc) · 2.55 KB
/
geonames-create-graph.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
name: Update Knowledge Graph with data from dataset 'GeoNames', based on IRIs in the Knowledge Graph
# Every 3 hours. Don't run it more often; otherwise you might hit
# GeoNames' hourly and daily request limits - http://www.geonames.org/export/
on:
schedule:
- cron: "0 */3 * * *"
# Run a single workflow at a time
concurrency:
group: knowledge-graph
jobs:
run:
runs-on: ubuntu-latest
timeout-minutes: 120
steps:
- name: Checkout data repository
uses: actions/checkout@v4
with:
path: ./data
sparse-checkout: geonames
- name: Checkout code repository
uses: actions/checkout@v4
with:
path: ./code
repository: colonial-heritage/integration-layer
- name: Install Node
uses: actions/setup-node@v4
with:
node-version: "20"
- name: Install app
run: |
cd ./code
npm install --no-progress
npx turbo run build --filter=@colonial-collections/graph-create-geonames
- name: Create graph
run: |
cd ./data
mkdir -p "$RUNNER_TEMP"
test -f ./geonames/data/data.tar.zst && tar --zstd -xf ./geonames/data/data.tar.zst -C "$RUNNER_TEMP"
../code/apps/graph-create-geonames/dist/cli.js create \
--resource-dir ./geonames/resources \
--data-file "$RUNNER_TEMP/data.sqlite" \
--iterate-endpoint-url "${{ vars.SPARQL_ENDPOINT_URL_KG }}" \
--iterate-locations-query-file ./geonames/queries/iterate-locations.rq \
--iterate-countries-query-file ./geonames/queries/iterate-countries.rq \
--dereference-batch-size 500 \
--triplydb-instance-url "${{ vars.TRIPLYDB_INSTANCE_URL }}" \
--triplydb-api-token "${{ secrets.TRIPLYDB_API_TOKEN }}" \
--triplydb-account "${{ vars.TRIPLYDB_ACCOUNT }}" \
--triplydb-dataset "${{ vars.TRIPLYDB_DATASET_KG }}" \
--triplydb-service "kg" \
--graph-name "https://data.colonialcollections.nl/geonames" \
--temp-dir "$RUNNER_TEMP"
- name: Save changes
run: |
cd ./data
mkdir -p ./geonames/data
test -f "$RUNNER_TEMP/data.sqlite" && tar --zstd -cf ./geonames/data/data.tar.zst -C "$RUNNER_TEMP" data.sqlite
git config --global user.name "github-actions[bot]"
git config --global user.email "github-actions[bot]@users.noreply.github.com"
git add .
git commit --quiet -a -m "Save changes" || true
git push --force -u origin