-
Notifications
You must be signed in to change notification settings - Fork 60.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Periodically validate docs-urls.json in github/github (#49220)
Co-authored-by: Robert Sese <[email protected]>
- Loading branch information
Showing
11 changed files
with
750 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
name: Validate github/github docs URLs | ||
|
||
# **What it does**: Checks the URLs in docs-urls.json in github/github | ||
# **Why we have it**: To ensure the values in docs-urls.json are perfect. | ||
# **Who does it impact**: Docs content. | ||
|
||
on: | ||
workflow_dispatch: | ||
schedule: | ||
- cron: '20 16 * * *' # Run every day at 16:20 UTC / 8:20 PST | ||
pull_request: | ||
|
||
permissions: | ||
contents: read | ||
issues: write | ||
pull-requests: write | ||
|
||
concurrency: | ||
group: ${{ github.workflow }}-${{ github.ref }} | ||
cancel-in-progress: true | ||
jobs: | ||
validate_github_github_docs_urls: | ||
name: Validate github/github docs URLs | ||
if: github.repository == 'github/docs-internal' | ||
runs-on: ubuntu-20.04-xl | ||
steps: | ||
- name: Check out repo's default branch | ||
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 | ||
|
||
- uses: ./.github/actions/node-npm-setup | ||
|
||
- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 | ||
with: | ||
token: ${{ secrets.DOCS_BOT_PAT_READPUBLICKEY }} | ||
repository: github/github | ||
ref: master | ||
path: github | ||
|
||
- name: Run validation | ||
run: | | ||
# This will generate a .json file which we can use to | ||
# do other things in other steps. | ||
npm run validate-github-github-docs-urls -- validate \ | ||
--output checks.json \ | ||
github/config/docs-urls.json | ||
- name: Update config/docs-urls.json in github/github (possibly) | ||
if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.DOCS_BOT_PAT_WRITEORG_PROJECT }} | ||
run: | | ||
npm run validate-github-github-docs-urls -- generate-new-json checks.json github/config/docs-urls.json | ||
cd github | ||
git status | ||
git diff | ||
changes=$(git diff --name-only | wc -l) | ||
if [[ $changes -eq 0 ]]; then | ||
echo "There are no changes to commit after running generate-new-json. Exiting this step" | ||
exit 0 | ||
fi | ||
current_timestamp=$(date '+%Y-%m-%d-%H%M%S') | ||
branch_name="update-docs-urls-$current_timestamp" | ||
git checkout -b "$branch_name" | ||
current_daystamp=$(date '+%Y-%m-%d') | ||
git commit -a -m "Update Docs URLs from automation ($current_daystamp)" | ||
git push origin "$branch_name" | ||
# XXX TODO | ||
# Perhaps post an issue somewhere, about that the fact that this | ||
# branch has been created and now needs to be turned into a PR | ||
# that some human can take responsibility for. | ||
- name: Clean up old branches in github/github | ||
if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }} | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.DOCS_BOT_PAT_WRITEORG_PROJECT }} | ||
run: | | ||
npm run validate-github-github-docs-urls -- clean-up-old-branches --prefix update-docs-urls | ||
echo "To see them all, go to:" | ||
echo "https://github.com/github/github/branches/all?query=update-docs-urls-" | ||
# If a PR comes along to github/docs-internal that causes some | ||
# URLs in docs-urls.json (in github/github) to now fail, then | ||
# we'll want to make the PR author+reviewer aware of this. | ||
# For example, you moved a page without setting up a redirect. | ||
# Or you edited a heading that now breaks a URL with fragment. | ||
# In the latter case, you might want to update the URL in docs-urls.json | ||
# after this PR has landed, or consider using `<a name="..."></a>` as a | ||
# workaround for the time being. | ||
- name: Generate PR comment | ||
if: ${{ github.event_name == 'pull_request' }} | ||
env: | ||
GITHUB_TOKEN: ${{ secrets.DOCS_BOT_PAT_WRITEORG_PROJECT }} | ||
ISSUE_NUMBER: ${{ github.event.pull_request.number }} | ||
REPOSITORY: ${{ github.repository }} | ||
run: npm run validate-github-github-docs-urls -- post-pr-comment checks.json | ||
|
||
- uses: ./.github/actions/slack-alert | ||
if: ${{ failure() && github.event_name == 'schedule' }} | ||
with: | ||
slack_channel_id: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }} | ||
slack_token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
type Site = { | ||
pages: Record<String, Page> | ||
redirects: Record<string, string> | ||
unversionedTree: Record<string, string> | ||
siteTree: Record<string, string> | ||
pageList: Page[] | ||
} | ||
|
||
export default function warmServer(languages: string[]): Promise<Site> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,141 @@ | ||
import cheerio from 'cheerio' | ||
|
||
import warmServer from '@/frame/lib/warm-server.js' | ||
import { liquid } from '@/content-render/index.js' | ||
import shortVersions from '@/versions/middleware/short-versions.js' | ||
import contextualize from '@/frame/middleware/context/context.js' | ||
import features from '@/versions/middleware/features.js' | ||
import findPage from '@/frame/middleware/find-page.js' | ||
import { createMinimalProcessor } from '@/content-render/unified/processor.js' | ||
import getRedirect from '@/redirects/lib/get-redirect.js' | ||
|
||
export type DocsUrls = { | ||
[identifier: string]: string | ||
} | ||
|
||
type Page = { | ||
permalinks: Permalink[] | ||
relativePath: string | ||
rawIntro: string | ||
rawPermissions?: string | ||
markdown: string | ||
} | ||
type Permalink = { | ||
href: string | ||
languageCode: string | ||
} | ||
type PageMap = { | ||
[href: string]: Page | ||
} | ||
type Redirects = { | ||
[from: string]: string | ||
} | ||
|
||
export type Check = { | ||
identifier: string | ||
url: string | ||
pageURL: string | ||
found: boolean | ||
fragment: string | undefined | ||
fragmentFound?: boolean | ||
fragmentCandidates?: string[] | ||
// If the URL lead to a redirect, this is its URL (starting with /en/...) | ||
redirectPageURL?: string | ||
// If the URL lead to a redirect, this is what the new URL should be | ||
// (for example /the/new/pathname#my-fragment) | ||
redirect?: string | ||
} | ||
|
||
export async function validateDocsUrl(docsUrls: DocsUrls, { checkFragments = false } = {}) { | ||
const site = await warmServer(['en']) | ||
const pages: PageMap = site.pages | ||
const redirects: Redirects = site.redirects | ||
|
||
const checks: Check[] = [] | ||
for (const [identifier, url] of Object.entries(docsUrls)) { | ||
if (!url.startsWith('/')) { | ||
throw new Error(`URL doesn't start with '/': ${url} (identifier: ${identifier})`) | ||
} | ||
const pathname = url.split('?')[0] | ||
// If the url is just '/' we want to check the homepage, | ||
// which is `/en`, not `/en/`. | ||
const [pageURL, fragment] = `/en${pathname === '/' ? '' : pathname}`.split('#') | ||
|
||
const page = pages[pageURL] | ||
const check: Check = { | ||
identifier, | ||
url, | ||
pageURL, | ||
fragment, | ||
found: !!page, | ||
} | ||
let redirectedPage: Page | null = null | ||
if (!page) { | ||
const redirect = getRedirect(pageURL, { | ||
userLanguage: 'en', | ||
redirects, | ||
pages, | ||
}) | ||
if (redirect) { | ||
redirectedPage = pages[redirect] | ||
if (!redirectedPage) { | ||
throw new Error(`The redirected page doesn't exist: ${redirect}`) | ||
} | ||
check.found = true | ||
check.redirectPageURL = redirect | ||
check.redirect = stripLanguagePrefix(redirect) | ||
if (fragment) { | ||
check.redirect += `#${fragment}` | ||
} | ||
} | ||
} | ||
|
||
if (checkFragments && fragment) { | ||
const permalink = (redirectedPage || page).permalinks[0] | ||
const html = await renderInnerHTML(redirectedPage || page, permalink) | ||
const $ = cheerio.load(html) | ||
check.fragmentFound = $(`#${fragment}`).length > 0 || $(`a[name="${fragment}"]`).length > 0 | ||
if (!check.fragmentFound) { | ||
const fragmentCandidates: string[] = [] | ||
$('h2[id], h3[id]').each((_, el) => { | ||
const id = $(el).attr('id') | ||
if (id) { | ||
fragmentCandidates.push(id) | ||
} | ||
}) | ||
check.fragmentCandidates = fragmentCandidates | ||
} | ||
} | ||
checks.push(check) | ||
} | ||
return checks | ||
} | ||
|
||
async function renderInnerHTML(page: Page, permalink: Permalink) { | ||
const next = () => {} | ||
const res = {} | ||
|
||
const pagePath = permalink.href | ||
const req = { | ||
path: pagePath, | ||
language: permalink.languageCode, | ||
pagePath, | ||
cookies: {}, | ||
// The contextualize() middleware will create a new one. | ||
// Here it just exists for the sake of TypeScript. | ||
context: {}, | ||
} | ||
await contextualize(req, res, next) | ||
await shortVersions(req, res, next) | ||
await findPage(req, res, next) | ||
await features(req, res, next) | ||
|
||
const markdown = await liquid.parseAndRender(page.markdown, req.context) | ||
const processor = createMinimalProcessor(req.context) | ||
const vFile = await processor.process(markdown) | ||
return vFile.toString() | ||
} | ||
|
||
function stripLanguagePrefix(url: string) { | ||
return url.replace(/^\/en\//, '/') | ||
} |
62 changes: 62 additions & 0 deletions
62
src/links/scripts/validate-github-github-docs-urls/clean-up-old-branches.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
import { Octokit } from '@octokit/rest' | ||
import { retry } from '@octokit/plugin-retry' | ||
|
||
const DEFAULT_MIN_DAYS = 30 | ||
|
||
type Options = { | ||
prefix: string | ||
minDays: number | ||
repository: string | ||
} | ||
|
||
export async function cleanUpOldBranches(options: Options) { | ||
const minDays = parseInt(`${options.minDays || DEFAULT_MIN_DAYS}`, 10) | ||
|
||
if (!process.env.GITHUB_TOKEN) { | ||
throw new Error('You must set the GITHUB_TOKEN environment variable.') | ||
} | ||
const octokit = retryingOctokit(process.env.GITHUB_TOKEN) | ||
|
||
const [owner, repo] = options.repository.split('/') | ||
const { data: refs } = await octokit.request( | ||
'GET /repos/{owner}/{repo}/git/matching-refs/{ref}', | ||
{ | ||
owner, | ||
repo, | ||
ref: `heads/${options.prefix}`, | ||
}, | ||
) | ||
|
||
for (const ref of refs) { | ||
const branchName = ref.ref.replace('refs/heads/', '') | ||
const { data: branch } = await octokit.request('GET /repos/{owner}/{repo}/branches/{branch}', { | ||
owner, | ||
repo, | ||
branch: branchName, | ||
}) | ||
const { name, commit } = branch | ||
if (!commit.commit.author || !commit.commit.author.date) continue | ||
const lastUpdated = new Date(commit.commit.author.date) | ||
const ageDays = (Date.now() - lastUpdated.getTime()) / (1000 * 60 * 60 * 24) | ||
console.log( | ||
`Branch ${name} was last updated ${ageDays.toFixed(1)} days ago (${lastUpdated.toISOString()})`, | ||
) | ||
if (ageDays > minDays) { | ||
console.log(`Deleting branch ${name} !!`) | ||
await octokit.request('DELETE /repos/{owner}/{repo}/git/refs/{ref}', { | ||
owner, | ||
repo, | ||
ref: `heads/${name}`, | ||
}) | ||
} else { | ||
console.log(`Branch ${name} is not old enough (min days: ${minDays})`) | ||
} | ||
} | ||
} | ||
|
||
function retryingOctokit(token: string) { | ||
const RetryingOctokit = Octokit.plugin(retry) | ||
return new RetryingOctokit({ | ||
auth: `token ${token}`, | ||
}) | ||
} |
Oops, something went wrong.