Skip to content

Commit

Permalink
Periodically validate docs-urls.json in github/github (#49220)
Browse files Browse the repository at this point in the history
Co-authored-by: Robert Sese <[email protected]>
  • Loading branch information
peterbe and rsese authored Mar 18, 2024
1 parent 14ba0e9 commit bf4af51
Show file tree
Hide file tree
Showing 11 changed files with 750 additions and 4 deletions.
105 changes: 105 additions & 0 deletions .github/workflows/validate-github-github-docs-urls.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
name: Validate github/github docs URLs

# **What it does**: Checks the URLs in docs-urls.json in github/github
# **Why we have it**: To ensure the values in docs-urls.json are perfect.
# **Who does it impact**: Docs content.

on:
workflow_dispatch:
schedule:
- cron: '20 16 * * *' # Run every day at 16:20 UTC / 8:20 PST
pull_request:

permissions:
contents: read
issues: write
pull-requests: write

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
validate_github_github_docs_urls:
name: Validate github/github docs URLs
if: github.repository == 'github/docs-internal'
runs-on: ubuntu-20.04-xl
steps:
- name: Check out repo's default branch
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1

- uses: ./.github/actions/node-npm-setup

- uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
token: ${{ secrets.DOCS_BOT_PAT_READPUBLICKEY }}
repository: github/github
ref: master
path: github

- name: Run validation
run: |
# This will generate a .json file which we can use to
# do other things in other steps.
npm run validate-github-github-docs-urls -- validate \
--output checks.json \
github/config/docs-urls.json
- name: Update config/docs-urls.json in github/github (possibly)
if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
env:
GITHUB_TOKEN: ${{ secrets.DOCS_BOT_PAT_WRITEORG_PROJECT }}
run: |
npm run validate-github-github-docs-urls -- generate-new-json checks.json github/config/docs-urls.json
cd github
git status
git diff
changes=$(git diff --name-only | wc -l)
if [[ $changes -eq 0 ]]; then
echo "There are no changes to commit after running generate-new-json. Exiting this step"
exit 0
fi
current_timestamp=$(date '+%Y-%m-%d-%H%M%S')
branch_name="update-docs-urls-$current_timestamp"
git checkout -b "$branch_name"
current_daystamp=$(date '+%Y-%m-%d')
git commit -a -m "Update Docs URLs from automation ($current_daystamp)"
git push origin "$branch_name"
# XXX TODO
# Perhaps post an issue somewhere, about that the fact that this
# branch has been created and now needs to be turned into a PR
# that some human can take responsibility for.
- name: Clean up old branches in github/github
if: ${{ github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' }}
env:
GITHUB_TOKEN: ${{ secrets.DOCS_BOT_PAT_WRITEORG_PROJECT }}
run: |
npm run validate-github-github-docs-urls -- clean-up-old-branches --prefix update-docs-urls
echo "To see them all, go to:"
echo "https://github.com/github/github/branches/all?query=update-docs-urls-"
# If a PR comes along to github/docs-internal that causes some
# URLs in docs-urls.json (in github/github) to now fail, then
# we'll want to make the PR author+reviewer aware of this.
# For example, you moved a page without setting up a redirect.
# Or you edited a heading that now breaks a URL with fragment.
# In the latter case, you might want to update the URL in docs-urls.json
# after this PR has landed, or consider using `<a name="..."></a>` as a
# workaround for the time being.
- name: Generate PR comment
if: ${{ github.event_name == 'pull_request' }}
env:
GITHUB_TOKEN: ${{ secrets.DOCS_BOT_PAT_WRITEORG_PROJECT }}
ISSUE_NUMBER: ${{ github.event.pull_request.number }}
REPOSITORY: ${{ github.repository }}
run: npm run validate-github-github-docs-urls -- post-pr-comment checks.json

- uses: ./.github/actions/slack-alert
if: ${{ failure() && github.event_name == 'schedule' }}
with:
slack_channel_id: ${{ secrets.DOCS_ALERTS_SLACK_CHANNEL_ID }}
slack_token: ${{ secrets.SLACK_DOCS_BOT_TOKEN }}
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
"update-data-and-image-paths": "node src/early-access/scripts/update-data-and-image-paths.js",
"update-internal-links": "node src/links/scripts/update-internal-links.js",
"validate-asset-images": "node src/assets/scripts/validate-asset-images.js",
"validate-github-github-docs-urls": "tsx src/links/scripts/validate-github-github-docs-urls/index.ts",
"warmup-remotejson": "node src/archives/scripts/warmup-remotejson.js"
},
"lint-staged": {
Expand Down
9 changes: 9 additions & 0 deletions src/frame/lib/warm-server.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
type Site = {
pages: Record<String, Page>
redirects: Record<string, string>
unversionedTree: Record<string, string>
siteTree: Record<string, string>
pageList: Page[]
}

export default function warmServer(languages: string[]): Promise<Site>
9 changes: 6 additions & 3 deletions src/frame/lib/warm-server.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,17 @@ const dog = {
// For multiple-triggered Promise sharing
let promisedWarmServer

async function warmServer() {
async function warmServer(languagesOnly = []) {
const startTime = Date.now()

if (process.env.NODE_ENV !== 'test') {
console.log('Priming context information...')
console.log(
'Priming context information...',
languagesOnly && languagesOnly.length ? `${languagesOnly.join(',')} only` : '',
)
}

const unversionedTree = await dog.loadUnversionedTree()
const unversionedTree = await dog.loadUnversionedTree(languagesOnly)
const siteTree = await dog.loadSiteTree(unversionedTree)
const pageList = await dog.loadPages(unversionedTree)
const pageMap = await dog.loadPageMap(pageList)
Expand Down
141 changes: 141 additions & 0 deletions src/links/lib/validate-docs-urls.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import cheerio from 'cheerio'

import warmServer from '@/frame/lib/warm-server.js'
import { liquid } from '@/content-render/index.js'
import shortVersions from '@/versions/middleware/short-versions.js'
import contextualize from '@/frame/middleware/context/context.js'
import features from '@/versions/middleware/features.js'
import findPage from '@/frame/middleware/find-page.js'
import { createMinimalProcessor } from '@/content-render/unified/processor.js'
import getRedirect from '@/redirects/lib/get-redirect.js'

export type DocsUrls = {
[identifier: string]: string
}

type Page = {
permalinks: Permalink[]
relativePath: string
rawIntro: string
rawPermissions?: string
markdown: string
}
type Permalink = {
href: string
languageCode: string
}
type PageMap = {
[href: string]: Page
}
type Redirects = {
[from: string]: string
}

export type Check = {
identifier: string
url: string
pageURL: string
found: boolean
fragment: string | undefined
fragmentFound?: boolean
fragmentCandidates?: string[]
// If the URL lead to a redirect, this is its URL (starting with /en/...)
redirectPageURL?: string
// If the URL lead to a redirect, this is what the new URL should be
// (for example /the/new/pathname#my-fragment)
redirect?: string
}

export async function validateDocsUrl(docsUrls: DocsUrls, { checkFragments = false } = {}) {
const site = await warmServer(['en'])
const pages: PageMap = site.pages
const redirects: Redirects = site.redirects

const checks: Check[] = []
for (const [identifier, url] of Object.entries(docsUrls)) {
if (!url.startsWith('/')) {
throw new Error(`URL doesn't start with '/': ${url} (identifier: ${identifier})`)
}
const pathname = url.split('?')[0]
// If the url is just '/' we want to check the homepage,
// which is `/en`, not `/en/`.
const [pageURL, fragment] = `/en${pathname === '/' ? '' : pathname}`.split('#')

const page = pages[pageURL]
const check: Check = {
identifier,
url,
pageURL,
fragment,
found: !!page,
}
let redirectedPage: Page | null = null
if (!page) {
const redirect = getRedirect(pageURL, {
userLanguage: 'en',
redirects,
pages,
})
if (redirect) {
redirectedPage = pages[redirect]
if (!redirectedPage) {
throw new Error(`The redirected page doesn't exist: ${redirect}`)
}
check.found = true
check.redirectPageURL = redirect
check.redirect = stripLanguagePrefix(redirect)
if (fragment) {
check.redirect += `#${fragment}`
}
}
}

if (checkFragments && fragment) {
const permalink = (redirectedPage || page).permalinks[0]
const html = await renderInnerHTML(redirectedPage || page, permalink)
const $ = cheerio.load(html)
check.fragmentFound = $(`#${fragment}`).length > 0 || $(`a[name="${fragment}"]`).length > 0
if (!check.fragmentFound) {
const fragmentCandidates: string[] = []
$('h2[id], h3[id]').each((_, el) => {
const id = $(el).attr('id')
if (id) {
fragmentCandidates.push(id)
}
})
check.fragmentCandidates = fragmentCandidates
}
}
checks.push(check)
}
return checks
}

async function renderInnerHTML(page: Page, permalink: Permalink) {
const next = () => {}
const res = {}

const pagePath = permalink.href
const req = {
path: pagePath,
language: permalink.languageCode,
pagePath,
cookies: {},
// The contextualize() middleware will create a new one.
// Here it just exists for the sake of TypeScript.
context: {},
}
await contextualize(req, res, next)
await shortVersions(req, res, next)
await findPage(req, res, next)
await features(req, res, next)

const markdown = await liquid.parseAndRender(page.markdown, req.context)
const processor = createMinimalProcessor(req.context)
const vFile = await processor.process(markdown)
return vFile.toString()
}

function stripLanguagePrefix(url: string) {
return url.replace(/^\/en\//, '/')
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import { Octokit } from '@octokit/rest'
import { retry } from '@octokit/plugin-retry'

const DEFAULT_MIN_DAYS = 30

type Options = {
prefix: string
minDays: number
repository: string
}

export async function cleanUpOldBranches(options: Options) {
const minDays = parseInt(`${options.minDays || DEFAULT_MIN_DAYS}`, 10)

if (!process.env.GITHUB_TOKEN) {
throw new Error('You must set the GITHUB_TOKEN environment variable.')
}
const octokit = retryingOctokit(process.env.GITHUB_TOKEN)

const [owner, repo] = options.repository.split('/')
const { data: refs } = await octokit.request(
'GET /repos/{owner}/{repo}/git/matching-refs/{ref}',
{
owner,
repo,
ref: `heads/${options.prefix}`,
},
)

for (const ref of refs) {
const branchName = ref.ref.replace('refs/heads/', '')
const { data: branch } = await octokit.request('GET /repos/{owner}/{repo}/branches/{branch}', {
owner,
repo,
branch: branchName,
})
const { name, commit } = branch
if (!commit.commit.author || !commit.commit.author.date) continue
const lastUpdated = new Date(commit.commit.author.date)
const ageDays = (Date.now() - lastUpdated.getTime()) / (1000 * 60 * 60 * 24)
console.log(
`Branch ${name} was last updated ${ageDays.toFixed(1)} days ago (${lastUpdated.toISOString()})`,
)
if (ageDays > minDays) {
console.log(`Deleting branch ${name} !!`)
await octokit.request('DELETE /repos/{owner}/{repo}/git/refs/{ref}', {
owner,
repo,
ref: `heads/${name}`,
})
} else {
console.log(`Branch ${name} is not old enough (min days: ${minDays})`)
}
}
}

function retryingOctokit(token: string) {
const RetryingOctokit = Octokit.plugin(retry)
return new RetryingOctokit({
auth: `token ${token}`,
})
}
Loading

0 comments on commit bf4af51

Please sign in to comment.