-
Notifications
You must be signed in to change notification settings - Fork 242
63 lines (58 loc) · 2.44 KB
/
scraper.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
name: Deploy to main & run scraper
on:
push:
branches:
- main
jobs:
trigger-deploy:
runs-on: ubuntu-22.04
steps:
- name: Ping the deployment hook
run: |
response="$(curl -X POST ${{ secrets.DEPLOY_HOOK }})"
timestamp="$(jq -r ".job.createdAt" <<< "$response")"
echo "timestamp=$timestamp" >> $GITHUB_OUTPUT
await-deploy:
needs: trigger-deploy
runs-on: ubuntu-22.04
steps:
- env:
TIMESTAMP: ${{ needs.trigger-deploy.outputs.timestamp }}
TEAM_ID: team_okdUhHpKkohHXyqJWQOuloqC
APP_NAME: website
name: Wait for deployment
run: |
for i in {1..25}; do
sleep 30
response=$(curl -s -X GET "https://api.vercel.com/v6/deployments?app=$APP_NAME&teamId=$TEAM_ID&since=$TIMESTAMP" -H "Authorization: Bearer ${{ secrets.VERCEL_TOKEN }}")
status=$(echo "$response" | jq -r '.deployments[0] | .state // ""')
if [[ $status == "READY" ]]; then
ready_deployment_url=$(echo "$response" | jq -r '.deployments[0] | .url // ""')
echo "Deployment is ready at $ready_deployment_url"
break
elif [[ $status == "ERROR" || $status == "CANCELED" ]]; then
echo "Deployment failed with status $status"
exit 1 # exit with a failure status code
fi
echo "Deployment is not ready yet, retrying..."
done
scrape-docs:
needs: await-deploy
runs-on: ubuntu-20.04
name: scrape and push content on Meilisearch instance
steps:
- uses: actions/checkout@v4
- name: Prepare config file
run: |
jq '.custom_settings.embedders.default.apiKey = "${{ secrets.OPENAI_API_KEY }}"' "${{ github.workspace }}/docs-scraper.config.json" > "${{ github.workspace }}/temp-config.json" && mv "${{ github.workspace }}/temp-config.json" "${{ github.workspace }}/docs-scraper.config.json"
- name: Run docs-scraper
env:
HOST_URL: ${{ secrets.MEILISEARCH_HOST_URL }}
API_KEY: ${{ secrets.MEILISEARCH_API_KEY }}
CONFIG_FILE_PATH: ${{ github.workspace }}/docs-scraper.config.json
run: |
docker run -t --rm \
-e MEILISEARCH_HOST_URL=$HOST_URL \
-e MEILISEARCH_API_KEY=$API_KEY \
-v $CONFIG_FILE_PATH:/docs-scraper/config.json \
getmeili/docs-scraper:v0.12.8 pipenv run ./docs_scraper config.json