Update sitemaps.yml #27
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Ultralytics YOLO 🚀, AGPL-3.0 license | |
# Submit Sitemaps to Google Search Console after Pages Deployment | |
name: Submit Sitemaps | |
on: | |
workflow_dispatch: | |
pull_request: | |
branches: [main] | |
workflow_run: | |
workflows: ["pages-build-deployment"] | |
types: | |
- completed | |
jobs: | |
submit-sitemaps: | |
runs-on: ubuntu-latest | |
# if: ${{ github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }} | |
steps: | |
- name: Checkout Repo | |
uses: actions/checkout@v4 | |
- name: Set up Python | |
uses: actions/setup-python@v4 | |
with: | |
python-version: '3.x' | |
- name: Install Dependencies | |
run: | | |
python -m pip install --upgrade pip | |
pip install google-api-python-client oauth2client | |
- name: Get modified files | |
id: modified_files | |
run: | | |
echo "files=$(git diff --name-only HEAD^ HEAD)" >> $GITHUB_ENV | |
- name: Submit Sitemaps to Google | |
env: | |
CREDENTIALS_JSON: ${{ secrets.GOOGLE_SEARCH_CONSOLE_API_JSON }} | |
shell: python | |
run: | | |
import os | |
import json | |
from googleapiclient.discovery import build | |
from oauth2client.service_account import ServiceAccountCredentials | |
def submit_sitemap(site_url, sitemap_url, credentials_json): | |
try: | |
credentials = ServiceAccountCredentials.from_json_keyfile_dict(json.loads(credentials_json), ['https://www.googleapis.com/auth/webmasters']) | |
webmasters_service = build('webmasters', 'v3', credentials=credentials) | |
webmasters_service.sitemaps().submit(siteUrl=site_url, feedpath=sitemap_url).execute() | |
print(f'Submitted {sitemap_url} ✅') | |
except Exception as e: | |
print(f'ERROR ❌: {sitemap_url} failed to submit {e}') | |
credentials_json = os.environ['CREDENTIALS_JSON'] | |
# Submit sitemap for the main site and main docs | |
# submit_sitemap('https://docs.ultralytics.com', 'https://docs.ultralytics.com/sitemap.xml', credentials_json) | |
# Submit sitemaps for each language | |
languages = ['zh', 'ko', 'ja', 'ru', 'de', 'fr', 'es', 'pt', 'hi', 'ar'] | |
for lang in languages: | |
pass # submit_sitemap(f'https://docs.ultralytics.com/', f'https://docs.ultralytics.com/{lang}/sitemap.xml', credentials_json) | |
- name: Submit URLs to IndexNow | |
env: | |
INDEXNOW_KEY: ${{ secrets.INDEXNOW_KEY_DOCS }} | |
MODIFIED_FILES: ${{ steps.modified_files.outputs.files }} | |
shell: python | |
run: | | |
import json | |
import os | |
import re | |
import requests | |
def submit_urls_to_indexnow(host, urls): | |
key = os.environ['INDEXNOW_KEY'] | |
endpoint = "https://api.indexnow.org/indexnow" # static API endpoint from https://www.indexnow.org/faq | |
headers = {"Content-Type": "application/json; charset=utf-8"} | |
payload = {"host": host, "key": key, "urlList": urls, "keyLocation": f"https://{host}/{key}.txt"} | |
try: | |
response = requests.post(endpoint, headers=headers, data=json.dumps(payload)) | |
if response.status_code == 200: | |
print(f"Submitted batch of {len(urls)} {host} URLs to IndexNow endpoint {endpoint} ✅") | |
else: | |
print(f"Failed to submit batch of URLs: Status code {response.status_code}, Response: {response.text}") | |
except Exception as e: | |
print(f"ERROR ❌: Failed to submit batch of URLs - {e}") | |
def extract_urls_from_sitemap(sitemap_url): | |
try: | |
response = requests.get(sitemap_url) | |
return re.findall(r"<loc>(.*?)</loc>", response.text) | |
except Exception as e: | |
print(f"ERROR ❌: Failed to extract URLs from {sitemap_url} - {e}") | |
return [] | |
def filter_modified_urls(urls, modified_files): | |
# Filter URLs based on modified files | |
modified_urls = [] | |
for file in modified_files: | |
# Convert file path to URL path, i.e. 'modes/index.html' -> 'https://docs.ultralytics.com/modes/' | |
full_url = f'https://{host}/{file.replace('/index.html', '/')}' | |
if full_url in urls: | |
modified_urls.append(full_url) | |
return modified_urls | |
# Submit URLs from each sitemap to IndexNow | |
host = "docs.ultralytics.com" | |
all_urls = [] | |
for lang in ['', '/zh', '/ko', '/ja', '/ru', '/de', '/fr', '/es', '/pt', '/hi', '/ar']: | |
sitemap = f'https://{host}{lang}/sitemap.xml' | |
lang_urls = extract_urls_from_sitemap(sitemap) | |
all_urls.extend(lang_urls) | |
print(f'Found {len(lang_urls)} in {sitemap} ({len(all_urls)} total)') | |
# Filter URLs based on modified files | |
urls_to_submit = filter_modified_urls(all_urls, os.environ['MODIFIED_FILES'].split()) | |
print(f'Filtered {len(urls_to_submit)} updated pages to submit)') | |
# Submit filtered URLs | |
# submit_urls_to_indexnow(host, urls_to_submit) |