Skip to content

Commit

Permalink
github workflow to pull from upstream once a week
Browse files Browse the repository at this point in the history
  • Loading branch information
TheTechromancer committed Feb 21, 2024
1 parent a45b9b5 commit e8b7ce7
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 0 deletions.
39 changes: 39 additions & 0 deletions .github/workflows/pull-upstream.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
name: Merge new changes from upstream wappalyzer

on:
schedule:
# Runs at 23:39 every Sunday
- cron: '39 23 * * 0'

jobs:
update-and-commit:
runs-on: ubuntu-latest

steps:
- name: Checkout code
uses: actions/checkout@v3

- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.x"

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install requests
- name: Update Fingerprints
run: |
python scripts/pull_upstream.py
- name: Create Pull Request
uses: peter-evans/create-pull-request@v6
with:
commit-message: Merge in new official signatures
title: Merge in new official signatures
body: |
This is an automated update of fingerprints.
branch: update-fingerprints-$(date +'%Y%m%d')
delete-branch: true
signoff: true
54 changes: 54 additions & 0 deletions scripts/pull_upstream.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import requests
import zipfile
import io
import json
from pathlib import Path


# Ensure the technologies directory exists
code_dir = Path(__file__).parent.parent
local_fingerprint_dir = code_dir / "src" / "technologies"
local_fingerprint_dir.mkdir(parents=True, exist_ok=True)

chrome_extension_url = "https://clients2.google.com/service/update2/crx?response=redirect&prodversion=119.0.6045.199&acceptformat=crx2,crx3&x=id%3Dgppongmhjkpfnbhagpmjfkannfbllamg%26installsource%3Dondemand%26uc"


def download_and_extract_fingerprints():
# Download the CRX file
resp = requests.get(chrome_extension_url)
resp.raise_for_status() # Raises stored HTTPError, if one occurred.

# Read the content into a byte slice
body = resp.content

# Read the CRX file without writing to disk
zip_reader = zipfile.ZipFile(io.BytesIO(body), 'r')

# Iterate through the files in the zip archive
for file in zip_reader.namelist():
if file.startswith("technologies/") and file.endswith(".json"):
print(f"Extracted {file}")

# Open the file from the zip
with zip_reader.open(file) as f:
# Read fingerprints from file
upstream_fingerprints = json.load(f)

# Path for the local file
local_file_path = local_fingerprint_dir / Path(file).name
# Ensure local file exists
local_file_path.touch()
local_fingerprints = {}

with open(local_file_path, 'r+', encoding='utf-8') as local_file:
# Read the local JSON file
local_fingerprints = json.load(local_file)
# Update local_data with the new fingerprints
local_fingerprints.update(upstream_fingerprints)
# Write the updated data back
local_file.seek(0) # Go to the beginning of the file
json.dump(local_fingerprints, local_file, indent=2, sort_keys=True)
local_file.truncate() # Remove the remaining part of the old content

download_and_extract_fingerprints()
print("Fingerprints updated successfully.")

0 comments on commit e8b7ce7

Please sign in to comment.