Skip to content

Commit

Permalink
Merge pull request #34373 from github/repo-sync
Browse files Browse the repository at this point in the history
Repo sync
  • Loading branch information
docs-bot authored Aug 21, 2024
2 parents f2a4065 + eac25ac commit 21dd70b
Show file tree
Hide file tree
Showing 9 changed files with 132 additions and 50 deletions.
102 changes: 68 additions & 34 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
"sync-search-indices": "node src/search/scripts/sync-search-indices.js",
"sync-search-server": "cross-env NODE_ENV=production PORT=4002 MINIMAL_RENDER=true CHANGELOG_DISABLED=true tsx src/frame/server.ts",
"sync-secret-scanning": "tsx src/secret-scanning/scripts/sync.ts",
"sync-webhooks": "src/rest/scripts/update-files.js -o webhooks",
"sync-webhooks": "npx tsx src/rest/scripts/update-files.ts -o webhooks",
"test": "vitest",
"test-local-dev": "node src/workflows/test-local-dev.js",
"test-moved-content": "tsx src/content-render/scripts/test-moved-content.ts",
Expand Down Expand Up @@ -236,7 +236,7 @@
"@primer/octicons-react": "^19.11.0",
"@primer/react": "36.27.0",
"accept-language-parser": "^1.5.0",
"ajv": "^8.16.0",
"ajv": "^8.17.1",
"ajv-errors": "^3.0.0",
"ajv-formats": "^3.0.1",
"bottleneck": "2.19.5",
Expand All @@ -254,7 +254,7 @@
"express": "4.19.2",
"express-rate-limit": "7.4.0",
"fastest-levenshtein": "1.0.16",
"file-type": "19.1.0",
"file-type": "19.4.1",
"flat": "^6.0.1",
"github-slugger": "^2.0.0",
"glob": "11.0.0",
Expand Down Expand Up @@ -323,7 +323,7 @@
"@graphql-inspector/core": "^6.1.0",
"@graphql-tools/load": "^8.0.0",
"@octokit/rest": "^20.1.0",
"@playwright/test": "1.44.1",
"@playwright/test": "1.46.1",
"@types/accept-language-parser": "1.5.6",
"@types/connect-datadog": "0.0.10",
"@types/connect-timeout": "0.0.39",
Expand Down
2 changes: 1 addition & 1 deletion src/audit-logs/lib/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
"apiOnlyEvents": "This event is not available in the web interface, only via the REST API, audit log streaming, or JSON/CSV exports.",
"apiRequestEvent": "This event is only available via audit log streaming."
},
"sha": "156e6897dededb381697da9a39e7bb6eb7971480"
"sha": "4516a2f1ddf74032b4474b272c9850055470cad3"
}
19 changes: 19 additions & 0 deletions src/events/analyze-comment.js
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,25 @@ export const SIGNAL_RATINGS = [
},
]

export async function getGuessedLanguage(comment) {
if (!comment || !comment.trim()) {
return
}

const bestGuess = language.guessBest(comment.trim())
if (!bestGuess) return // Can happen if the text is just whitespace
// // @horizon-rs/language-guesser is based on tri-grams and can lead
// // to false positives. For example, it thinks that 'Thamk you ❤️🙏' is
// // Haitian! And that 'I wanne robux 1000' is Polish!
// // But that's because they are short and there's not enough clues to
// // guess what language it is. You and I might know those are actually
// // attempts to be English, despite the spelling.
// // But are they useful comments? Given that this is just a signal,
// // and not a hard blocker, it's more of a clue than a fact.

return bestGuess.alpha2
}

export async function analyzeComment(text, language = 'en') {
const signals = []
let rating = 1.0
Expand Down
1 change: 1 addition & 0 deletions src/events/components/events.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ type SendEventProps = {
survey_comment?: string
survey_email?: string
survey_rating?: number
survey_comment_language?: string
}
}

Expand Down
7 changes: 6 additions & 1 deletion src/events/lib/schema.js
Original file line number Diff line number Diff line change
Expand Up @@ -392,7 +392,12 @@ const survey = {
survey_rating: {
type: 'number',
description:
'The compute rating of the quality of the survey comment. Used for spam filtering and quality control.',
'The computed rating of the quality of the survey comment. Used for spam filtering and quality control.',
},
survey_comment_language: {
type: 'string',
description:
'The guessed language of the survey comment. The guessed language is very inaccurate when the string contains fewer than 3 or 4 words.',
},
},
}
Expand Down
3 changes: 2 additions & 1 deletion src/events/middleware.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { noCacheControl } from '#src/frame/middleware/cache-control.js'
import { getJsonValidator } from '#src/tests/lib/validate-json-schema.js'
import { formatErrors } from './lib/middleware-errors.js'
import { publish as _publish } from './lib/hydro.js'
import { analyzeComment } from './analyze-comment.js'
import { analyzeComment, getGuessedLanguage } from './analyze-comment.js'

const router = express.Router()
const OMIT_FIELDS = ['type']
Expand Down Expand Up @@ -74,6 +74,7 @@ router.post(
comment: req.body.survey_comment,
language: req.body.context.path_language,
})
req.body.survey_comment_language = await getGuessedLanguage(req.body.survey_comment)
}

await publish({
Expand Down
24 changes: 23 additions & 1 deletion src/events/tests/analyze-comments.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { describe, expect, test } from 'vitest'

import { analyzeComment } from '../analyze-comment.js'
import { analyzeComment, getGuessedLanguage } from '../analyze-comment.js'

describe('analyzeComment', () => {
test('email only', async () => {
Expand Down Expand Up @@ -248,4 +248,26 @@ describe('analyzeComment', () => {
expect(signals.includes('spammy-words')).toBeFalsy()
}
})

test('guessed-language', async () => {
// Yes
{
const guessedLanguage = await getGuessedLanguage('Garçon des la voituré')
expect(guessedLanguage).toBe('fr')
}
{
const guessedLanguage = await getGuessedLanguage('english words longer sentence this time')
expect(guessedLanguage).toBe('en')
}

// False positives due to short text
{
const guessedLanguage = await analyzeComment('Hello')
expect(guessedLanguage).not.toBe('en')
}
{
const guessedLanguage = await analyzeComment('Garçon')
expect(guessedLanguage).not.toBe('fr')
}
})
})
Loading

0 comments on commit 21dd70b

Please sign in to comment.