-
-
Notifications
You must be signed in to change notification settings - Fork 2
126 lines (106 loc) · 5.32 KB
/
sitemaps.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# Ultralytics YOLO 🚀, AGPL-3.0 license
# Submit Sitemaps to Google Search Console after Pages Deployment
name: Submit Sitemaps
on:
workflow_dispatch:
pull_request:
branches: [main]
workflow_run:
workflows: ["pages-build-deployment"]
types:
- completed
jobs:
submit-sitemaps:
runs-on: ubuntu-latest
# if: ${{ github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }}
steps:
- name: Checkout Repo
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.x'
- name: Install Dependencies
run: |
python -m pip install --upgrade pip
pip install google-api-python-client oauth2client
- name: Get modified files
id: modified_files
run: |
echo "files=$(git diff --name-only HEAD^ HEAD)" >> $GITHUB_ENV
- name: Submit Sitemaps to Google
env:
CREDENTIALS_JSON: ${{ secrets.GOOGLE_SEARCH_CONSOLE_API_JSON }}
shell: python
run: |
import os
import json
from googleapiclient.discovery import build
from oauth2client.service_account import ServiceAccountCredentials
def submit_sitemap(site_url, sitemap_url, credentials_json):
try:
credentials = ServiceAccountCredentials.from_json_keyfile_dict(json.loads(credentials_json), ['https://www.googleapis.com/auth/webmasters'])
webmasters_service = build('webmasters', 'v3', credentials=credentials)
webmasters_service.sitemaps().submit(siteUrl=site_url, feedpath=sitemap_url).execute()
print(f'Submitted {sitemap_url} ✅')
except Exception as e:
print(f'ERROR ❌: {sitemap_url} failed to submit {e}')
credentials_json = os.environ['CREDENTIALS_JSON']
# Submit sitemap for the main site and main docs
# submit_sitemap('https://docs.ultralytics.com', 'https://docs.ultralytics.com/sitemap.xml', credentials_json)
# Submit sitemaps for each language
languages = ['zh', 'ko', 'ja', 'ru', 'de', 'fr', 'es', 'pt', 'hi', 'ar']
for lang in languages:
pass # submit_sitemap(f'https://docs.ultralytics.com/', f'https://docs.ultralytics.com/{lang}/sitemap.xml', credentials_json)
- name: Submit URLs to IndexNow
env:
INDEXNOW_KEY: ${{ secrets.INDEXNOW_KEY_DOCS }}
MODIFIED_FILES: ${{ steps.modified_files.outputs.files }}
shell: python
run: |
import json
import os
import re
import requests
def submit_urls_to_indexnow(host, urls):
key = os.environ['INDEXNOW_KEY']
endpoint = "https://api.indexnow.org/indexnow" # static API endpoint from https://www.indexnow.org/faq
headers = {"Content-Type": "application/json; charset=utf-8"}
payload = {"host": host, "key": key, "urlList": urls, "keyLocation": f"https://{host}/{key}.txt"}
try:
response = requests.post(endpoint, headers=headers, data=json.dumps(payload))
if response.status_code == 200:
print(f"Submitted batch of {len(urls)} {host} URLs to IndexNow endpoint {endpoint} ✅")
else:
print(f"Failed to submit batch of URLs: Status code {response.status_code}, Response: {response.text}")
except Exception as e:
print(f"ERROR ❌: Failed to submit batch of URLs - {e}")
def extract_urls_from_sitemap(sitemap_url):
try:
response = requests.get(sitemap_url)
return re.findall(r"<loc>(.*?)</loc>", response.text)
except Exception as e:
print(f"ERROR ❌: Failed to extract URLs from {sitemap_url} - {e}")
return []
def filter_modified_urls(urls, modified_files):
# Filter URLs based on modified files
modified_urls = []
for file in modified_files:
# Convert file path to URL path, i.e. 'modes/index.html' -> 'https://docs.ultralytics.com/modes/'
full_url = f'https://{host}/{file.replace('/index.html', '/')}'
if full_url in urls:
modified_urls.append(full_url)
return modified_urls
# Submit URLs from each sitemap to IndexNow
host = "docs.ultralytics.com"
all_urls = []
for lang in ['', '/zh', '/ko', '/ja', '/ru', '/de', '/fr', '/es', '/pt', '/hi', '/ar']:
sitemap = f'https://{host}{lang}/sitemap.xml'
lang_urls = extract_urls_from_sitemap(sitemap)
all_urls.extend(lang_urls)
print(f'Found {len(lang_urls)} in {sitemap} ({len(all_urls)} total)')
# Filter URLs based on modified files
urls_to_submit = filter_modified_urls(all_urls, os.environ['MODIFIED_FILES'].split())
print(f'Filtered {len(urls_to_submit)} updated pages to submit)')
# Submit filtered URLs
# submit_urls_to_indexnow(host, urls_to_submit)