Scrape Releases #3363
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Scrape Releases | |
on: | |
push: | |
branches: | |
- main | |
schedule: | |
- cron: "0 */8 * * *" | |
workflow_dispatch: | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.ref }} | |
cancel-in-progress: true | |
jobs: | |
scrape: | |
runs-on: ubuntu-latest | |
steps: | |
- name: 🛒 Checkout the repo | |
uses: actions/checkout@v3 | |
- name: 🐍 Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.x' | |
cache: 'pip' | |
- run: pip install -r requirements.txt | |
- name: 🔧 Setup TOR | |
uses: tor-actions/setup-tor@main | |
with: | |
stable: true | |
daemon: true | |
- name: 🤿 Start TOR Proxy | |
run: | | |
pproxy -r 'socks5://127.0.0.1:9050' -l http://127.0.0.1:9051 --daemon || exit 1 | |
shell: bash | |
- name: 🍱 Restore SQLite Database from cache | |
uses: actions/cache/restore@v4 | |
with: | |
path: releases.db | |
key: releases_db | |
- name: 🕸️ Run crawler | |
run: | | |
scrapy crawl releases -L WARNING --logfile=releases.log | |
shell: bash | |
- name: 🥡 Save SQLite Database to Cache | |
uses: actions/cache/save@v4 | |
with: | |
path: releases.db | |
key: releases_db | |
- name: ✅ Validate stuff | |
run: | | |
[ -s "${{ github.workspace }}/releases.csv" ] && echo "🎉 CSV file generated successfully!" || exit 1 | |
[ -s "${{ github.workspace }}/releases.json" ] && echo "🎉 JSON file generated successfully!" || exit 1 | |
[ -s "${{ github.workspace }}/releases.jsonl" ] && echo "🎉 JSONLines file generated successfully!" || exit 1 | |
[ -s "${{ github.workspace }}/releases.rss" ] && echo "🎉 RSS Feed generated successfully!" || exit 1 | |
[ -s "${{ github.workspace }}/releases.db" ] && echo "🎉 SQLite DB updated successfully!" || exit 1 | |
shell: bash | |
- name: 🗒️ Upload log | |
if: failure() | |
uses: actions/upload-artifact@v3 | |
with: | |
name: releases.log | |
path: ${{ github.workspace }}/releases.log | |
- name: 🚮 Delete older releases | |
uses: dev-drprasad/[email protected] | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
keep_latest: 3 | |
delete_tags: true | |
- name: 🛠️ Generate stats | |
id: utils | |
run: | | |
# Date | |
DATE="$(date -u '+%Y%m%d%H%M%S')" | |
echo "date=$DATE" >> $GITHUB_OUTPUT | |
# Number of releases | |
RELEASES="$(jq length "${{ github.workspace }}/releases.json")" | |
echo "releases=$RELEASES" >> $GITHUB_OUTPUT | |
shell: bash | |
- name: 🚀 Upload new release | |
uses: softprops/action-gh-release@v1 | |
with: | |
tag_name: feeds-${{ steps.utils.outputs.date }} | |
name: ${{ steps.utils.outputs.date }} | |
body: | | |
Number of releases - ${{ steps.utils.outputs.releases }} | |
files: | | |
${{ github.workspace }}/releases.csv | |
${{ github.workspace }}/releases.json | |
${{ github.workspace }}/releases.jsonl | |
${{ github.workspace }}/releases.rss | |
${{ github.workspace }}/releases.db | |
draft: false | |
prerelease: false |