Skip to content

Update sitemaps.yml #33

Update sitemaps.yml

Update sitemaps.yml #33

Workflow file for this run

# Ultralytics YOLO 🚀, AGPL-3.0 license
# Submit Sitemaps to Google Search Console after Pages Deployment
name: Submit Sitemaps
on:
workflow_dispatch:
pull_request:
branches: [main]
workflow_run:
workflows: ["pages-build-deployment"]
types:
- completed
jobs:
submit-sitemaps:
runs-on: ubuntu-latest
# if: ${{ github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }}
steps:
- name: Checkout Repo
uses: actions/checkout@v4
with:
fetch-depth: 2 # fetch the current and previous commit
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install google-api-python-client oauth2client
- name: Get modified files
id: modified_files
run: |
modified_files=$(git diff --name-only HEAD^ HEAD | tr '\n' ' ')
echo "Modified files: $modified_files"
echo "files=$modified_files" >> $GITHUB_ENV
- name: Submit Sitemaps to Google
env:
CREDENTIALS_JSON: ${{ secrets.GOOGLE_SEARCH_CONSOLE_API_JSON }}
shell: python
run: |
import os
import json
from googleapiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
def submit_sitemap(site_url, sitemap_url, credentials_json):
try:
credentials = ServiceAccountCredentials.from_json_keyfile_dict(json.loads(credentials_json), ['https://www.googleapis.com/auth/webmasters'])
webmasters_service = build('webmasters', 'v3', credentials=credentials)
webmasters_service.sitemaps().submit(siteUrl=site_url, feedpath=sitemap_url).execute()
print(f'Submitted {sitemap_url} ✅')
except Exception as e:
print(f'ERROR ❌: {sitemap_url} failed to submit {e}')
credentials_json = os.environ['CREDENTIALS_JSON']
# Submit sitemap for the main site and main docs
# submit_sitemap('https://docs.ultralytics.com', 'https://docs.ultralytics.com/sitemap.xml', credentials_json)
# Submit sitemaps for each language
languages = ['zh', 'ko', 'ja', 'ru', 'de', 'fr', 'es', 'pt', 'hi', 'ar']
for lang in languages:
pass # submit_sitemap(f'https://docs.ultralytics.com/', f'https://docs.ultralytics.com/{lang}/sitemap.xml', credentials_json)
- name: Submit URLs to IndexNow
env:
INDEXNOW_KEY: ${{ secrets.INDEXNOW_KEY_DOCS }}
MODIFIED_FILES: ${{ steps.modified_files.outputs.files }}
shell: python
run: |
import json
import os
import re
import requests
def submit_urls_to_indexnow(host, urls):
key = os.environ['INDEXNOW_KEY']
endpoint = "https://api.indexnow.org/indexnow" # static API endpoint from https://www.indexnow.org/faq
headers = {"Content-Type": "application/json; charset=utf-8"}
payload = {"host": host, "key": key, "urlList": urls, "keyLocation": f"https://{host}/{key}.txt"}
try:
response = requests.post(endpoint, headers=headers, data=json.dumps(payload))
if response.status_code == 200:
print(f"Submitted batch of {len(urls)} {host} URLs to IndexNow endpoint {endpoint} ✅")
else:
print(f"Failed to submit batch of URLs: Status code {response.status_code}, Response: {response.text}")
except Exception as e:
print(f"ERROR ❌: Failed to submit batch of URLs - {e}")
def extract_urls_from_sitemap(sitemap_url):
try:
response = requests.get(sitemap_url)
return re.findall(r"<loc>(.*?)</loc>", response.text)
except Exception as e:
print(f"ERROR ❌: Failed to extract URLs from {sitemap_url} - {e}")
return []
def filter_modified_urls(urls, modified_files):
# Filter URLs based on modified files
modified_urls = []
for file in modified_files:
# Convert file path to URL path, i.e. 'modes/index.html' -> 'https://docs.ultralytics.com/modes/'
full_url = f'https://{host}/{file.replace('index.html', '')}'
if full_url in urls:
modified_urls.append(full_url)
return modified_urls
# Submit URLs from each sitemap to IndexNow
host = "docs.ultralytics.com"
all_urls = []
for lang in ['', '/zh', '/ko', '/ja', '/ru', '/de', '/fr', '/es', '/pt', '/hi', '/ar']:
sitemap = f'https://{host}{lang}/sitemap.xml'
lang_urls = extract_urls_from_sitemap(sitemap)
all_urls.extend(lang_urls)
print(f'Found {len(lang_urls)} in {sitemap} ({len(all_urls)} total)')
# Filter URLs based on modified files
files = os.environ['MODIFIED_FILES'].split()
print(files)
urls_to_submit = filter_modified_urls(all_urls, files)
print(f'Filtered {len(urls_to_submit)} updated pages to submit)')
# Submit filtered URLs
# submit_urls_to_indexnow(host, urls_to_submit)