-
-
Notifications
You must be signed in to change notification settings - Fork 2
132 lines (117 loc) · 5.68 KB
/
sitemaps.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Submit Sitemaps to Google Search Console after Pages Deployment
name: Submit Sitemaps
on:
workflow_dispatch:
inputs:
submit_all_urls:
type: boolean
description: Submit all URLs to IndexNow (do not filter by changed)
default: false
workflow_run:
workflows: ["pages-build-deployment"]
types:
- completed
jobs:
submit-sitemaps:
runs-on: ubuntu-latest
if: ${{ github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }}
steps:
- name: Checkout Repo
uses: actions/checkout@v4
with:
ref: gh-pages # checkout gh-pages branch
fetch-depth: 2 # fetch the current and previous commit
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: "3.x"
- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install google-api-python-client oauth2client
- name: Get modified files
id: modified_files
run: |
modified_files=$(git diff --name-only HEAD^ HEAD | tr '\n' ' ')
echo "Modified files: $modified_files"
echo "MODIFIED_FILES=$modified_files" >> $GITHUB_ENV
- name: Submit Sitemaps to Google
env:
CREDENTIALS_JSON: ${{ secrets.GOOGLE_SEARCH_CONSOLE_API_JSON }}
shell: python
run: |
import os
import json
from googleapiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
def submit_sitemap(site_url, sitemap_url, credentials_json):
try:
credentials = ServiceAccountCredentials.from_json_keyfile_dict(json.loads(credentials_json), ['https://www.googleapis.com/auth/webmasters'])
webmasters_service = build('webmasters', 'v3', credentials=credentials)
webmasters_service.sitemaps().submit(siteUrl=site_url, feedpath=sitemap_url).execute()
print(f'Submitted {sitemap_url} ✅')
except Exception as e:
print(f'ERROR ❌: {sitemap_url} failed to submit {e}')
credentials_json = os.environ['CREDENTIALS_JSON']
# Submit sitemaps for each language
host = "docs.ultralytics.com"
for lang in ['', '/zh', '/ko', '/ja', '/ru', '/de', '/fr', '/es', '/pt', '/hi', '/ar', '/tr', '/vi', '/it', '/nl']:
sitemap = f'https://{host}{lang}/sitemap.xml'
submit_sitemap(f'https://docs.ultralytics.com/', sitemap, credentials_json)
- name: Submit URLs to IndexNow
env:
INDEXNOW_KEY: ${{ secrets.INDEXNOW_KEY_DOCS }}
SUBMIT_ALL_URLS: ${{ github.event.inputs.submit_all_urls }}
shell: python
run: |
import json
import os
import re
import requests
def submit_urls_to_indexnow(host, urls):
key = os.environ['INDEXNOW_KEY']
endpoint = "https://api.indexnow.org/indexnow" # static API endpoint from https://www.indexnow.org/faq
headers = {"Content-Type": "application/json; charset=utf-8"}
payload = {"host": host, "key": key, "urlList": urls, "keyLocation": f"https://{host}/{key}.txt"}
try:
response = requests.post(endpoint, headers=headers, data=json.dumps(payload))
if response.status_code == 200:
print(f"Submitted batch of {len(urls)} {host} URLs to IndexNow endpoint {endpoint} ✅")
else:
print(f"Failed to submit batch of URLs: Status code {response.status_code}, Response: {response.text}")
except Exception as e:
print(f"ERROR ❌: Failed to submit batch of URLs - {e}")
def extract_urls_from_sitemap(sitemap_url):
try:
response = requests.get(sitemap_url)
return re.findall(r"<loc>(.*?)</loc>", response.text)
except Exception as e:
print(f"ERROR ❌: Failed to extract URLs from {sitemap_url} - {e}")
return []
def filter_modified_urls(urls, modified_files):
# Filter URLs based on modified files
modified_urls = []
for file in modified_files:
# Convert file path to URL path, i.e. 'modes/index.html' -> 'https://docs.ultralytics.com/modes/'
full_url = f'https://{host}/{file.replace('index.html', '')}'
if full_url in urls:
modified_urls.append(full_url)
return modified_urls
# Submit URLs from each sitemap to IndexNow
host = "docs.ultralytics.com"
all_urls = []
for lang in ['', '/zh', '/ko', '/ja', '/ru', '/de', '/fr', '/es', '/pt', '/hi', '/ar', '/tr', '/vi', '/it', '/nl']:
sitemap = f'https://{host}{lang}/sitemap.xml'
lang_urls = extract_urls_from_sitemap(sitemap)
all_urls.extend(lang_urls)
print(f'Found {len(lang_urls)} in {sitemap} ({len(all_urls)} total)')
# Filter URLs based on modified files
if os.getenv('SUBMIT_ALL_URLS', 'false').lower() == 'true':
urls_to_submit = all_urls
else:
urls_to_submit = filter_modified_urls(all_urls, os.environ['MODIFIED_FILES'].split())
print(f'\nFound {len(urls_to_submit)} URLs updated in last commit to submit:\n{"\n".join(urls_to_submit)}\n')
# Submit filtered URLs
if urls_to_submit:
submit_urls_to_indexnow(host, urls_to_submit)