Skip to content

Scrape Releases

Scrape Releases #3359

Workflow file for this run

name: Scrape Releases
on:
push:
branches:
- main
schedule:
- cron: "0 */8 * * *"
workflow_dispatch:
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
scrape:
runs-on: ubuntu-latest
steps:
- name: 🛒 Checkout the repo
uses: actions/checkout@v3
- name: 🐍 Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
cache: 'pip'
- run: pip install -r requirements.txt
- name: 🔧 Setup TOR
uses: tor-actions/setup-tor@main
with:
stable: true
daemon: true
- name: 🤿 Start TOR Proxy
run: |
pproxy -r 'socks5://127.0.0.1:9050' -l http://127.0.0.1:9051 --daemon || exit 1
shell: bash
- name: 🍱 Restore SQLite Database from cache
uses: actions/cache/restore@v4
with:
path: releases.db
key: releases_db
- name: 🕸️ Run crawler
run: |
scrapy crawl releases -L WARNING --logfile=releases.log
shell: bash
- name: 🥡 Save SQLite Database to Cache
uses: actions/cache/save@v4
with:
path: releases.db
key: releases_db
- name: ✅ Validate stuff
run: |
[ -s "${{ github.workspace }}/releases.csv" ] && echo "🎉 CSV file generated successfully!" || exit 1
[ -s "${{ github.workspace }}/releases.json" ] && echo "🎉 JSON file generated successfully!" || exit 1
[ -s "${{ github.workspace }}/releases.jsonl" ] && echo "🎉 JSONLines file generated successfully!" || exit 1
[ -s "${{ github.workspace }}/releases.rss" ] && echo "🎉 RSS Feed generated successfully!" || exit 1
[ -s "${{ github.workspace }}/releases.db" ] && echo "🎉 SQLite DB updated successfully!" || exit 1
shell: bash
- name: 🗒️ Upload log
if: failure()
uses: actions/upload-artifact@v3
with:
name: releases.log
path: ${{ github.workspace }}/releases.log
- name: 🚮 Delete older releases
uses: dev-drprasad/[email protected]
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
keep_latest: 3
delete_tags: true
- name: 🛠️ Generate stats
id: utils
run: |
# Date
DATE="$(date -u '+%Y%m%d%H%M%S')"
echo "date=$DATE" >> $GITHUB_OUTPUT
# Number of releases
RELEASES="$(jq length "${{ github.workspace }}/releases.json")"
echo "releases=$RELEASES" >> $GITHUB_OUTPUT
shell: bash
- name: 🚀 Upload new release
uses: softprops/action-gh-release@v1
with:
tag_name: feeds-${{ steps.utils.outputs.date }}
name: ${{ steps.utils.outputs.date }}
body: |
Number of releases - ${{ steps.utils.outputs.releases }}
files: |
${{ github.workspace }}/releases.csv
${{ github.workspace }}/releases.json
${{ github.workspace }}/releases.jsonl
${{ github.workspace }}/releases.rss
${{ github.workspace }}/releases.db
draft: false
prerelease: false