Skip to content

Commit

Permalink
Merge pull request #419 from GovTechSG/feat/grade_readability_custom_…
Browse files Browse the repository at this point in the history
…rule_2

feat: reflect text readability feature
  • Loading branch information
CKodes authored Dec 9, 2024
2 parents 15663f0 + 7af4497 commit ded697a
Show file tree
Hide file tree
Showing 8 changed files with 251 additions and 51 deletions.
46 changes: 46 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"prettier": "^3.1.0",
"print-message": "^3.0.1",
"safe-regex": "^2.1.1",
"text-readability": "^1.1.0",
"typescript": "^5.4.5",
"url": "^0.11.3",
"uuid": "^11.0.3",
Expand Down
40 changes: 36 additions & 4 deletions src/crawlers/commonCrawlerFunc.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { takeScreenshotForHTMLElements } from '../screenshotFunc/htmlScreenshotF
import { isFilePath } from '../constants/common.js';
import { customAxeConfig } from './customAxeFunctions.js';
import { flagUnlabelledClickableElements } from './custom/flagUnlabelledClickableElements.js';
import { extractAndGradeText } from './custom/extractAndGradeText.js';
import { ItemsInfo } from '../mergeAxeResults.js';

// types
Expand Down Expand Up @@ -83,6 +84,7 @@ export const filterAxeResults = (
if (rule === 'frame-tested') return;

const conformance = tags.filter(tag => tag.startsWith('wcag') || tag === 'best-practice');

// handle rare cases where conformance level is not the first element
const levels = ['wcag2a', 'wcag2aa', 'wcag2aaa'];
if (conformance[0] !== 'best-practice' && !levels.includes(conformance[0])) {
Expand Down Expand Up @@ -291,6 +293,8 @@ export const runAxeScript = async ({

const enableWcagAaa = ruleset.includes(RuleFlags.ENABLE_WCAG_AAA);

const gradingReadabilityFlag = await extractAndGradeText(page); // Ensure flag is obtained before proceeding

await crawlee.playwrightUtils.injectFile(page, axeScript);

const results = await page.evaluate(
Expand All @@ -301,6 +305,7 @@ export const runAxeScript = async ({
disableOobee,
enableWcagAaa,
oobeeAccessibleLabelFlaggedCssSelectors,
gradingReadabilityFlag,
}) => {
try {
const evaluateAltText = (node: Element) => {
Expand Down Expand Up @@ -339,6 +344,31 @@ export const runAxeScript = async ({
...customAxeConfig.checks[0],
evaluate: evaluateAltText,
},
{
...customAxeConfig.checks[1],
evaluate: (node: HTMLElement) => {
return !node.dataset.flagged; // fail any element with a data-flagged attribute set to true
},
},
{
...customAxeConfig.checks[2],
evaluate: (_node: HTMLElement) => {
if (gradingReadabilityFlag === '') {
return true; // Pass if no readability issues
}
// Dynamically update the grading messages
const gradingCheck = customAxeConfig.checks.find(
check => check.id === 'oobee-grading-text-contents',
);
if (gradingCheck) {
gradingCheck.metadata.messages.incomplete = `The text content is potentially difficult to read, with a Flesch-Kincaid Reading Ease score of ${
gradingReadabilityFlag
}.\nThe target passing score is above 50, indicating content readable by university students and lower grade levels.\nA higher score reflects better readability.`;
}

// Fail if readability issues are detected
},
},
],
rules: customAxeConfig.rules
.filter(rule => (disableOobee ? !rule.id.startsWith('oobee') : true))
Expand Down Expand Up @@ -385,19 +415,19 @@ export const runAxeScript = async ({
if (!element) {
const shadowRoots = [];
const allElements = document.querySelectorAll('*');

// Look for elements with shadow roots
allElements.forEach(el => {
if (el.shadowRoot) {
shadowRoots.push(el.shadowRoot);
}
});

// Search inside each shadow root for the element
for (const shadowRoot of shadowRoots) {
const shadowElement = shadowRoot.querySelector(cssSelector);
if (shadowElement) {
element = shadowElement; // Found the element inside shadow DOM
element = shadowElement; // Found the element inside shadow DOM
break;
}
}
Expand All @@ -414,7 +444,7 @@ export const runAxeScript = async ({
help: 'Clickable elements (i.e. elements with mouse-click interaction) must have accessible labels.',
helpUrl: 'https://www.deque.com/blog/accessible-aria-buttons',
nodes: escapedCssSelectors.map(cssSelector => ({
html: document.querySelector(cssSelector).outerHTML,
html: findElementByCssSelector(cssSelector),
target: [cssSelector],
impact: 'serious' as ImpactValue,
failureSummary:
Expand Down Expand Up @@ -452,6 +482,7 @@ export const runAxeScript = async ({
disableOobee,
enableWcagAaa,
oobeeAccessibleLabelFlaggedCssSelectors,
gradingReadabilityFlag,
},
);

Expand All @@ -461,6 +492,7 @@ export const runAxeScript = async ({
}

const pageTitle = await page.evaluate(() => document.title);

return filterAxeResults(results, pageTitle, customFlowDetails);
};

Expand Down
57 changes: 57 additions & 0 deletions src/crawlers/custom/extractAndGradeText.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import { Page } from 'playwright';
import textReadability from 'text-readability';

export async function extractAndGradeText(page: Page): Promise<string> {
try {
// Extract text content from all specified elements (e.g., paragraphs)
const sentences: string[] = await page.evaluate(() => {
const elements = document.querySelectorAll('p'); // Adjust selector as needed
const extractedSentences: string[] = [];

elements.forEach(element => {
const text = element.innerText.trim();
// Split the text into individual sentences
const sentencePattern = /[^.!?]*[.!?]+/g; // Match sentences ending with ., !, or ?
const matches = text.match(sentencePattern);
if (matches) {
// Add only sentences that end with punctuation
matches.forEach(sentence => {
const trimmedSentence = sentence.trim(); // Trim whitespace from each sentence
if (trimmedSentence.length > 0) {
extractedSentences.push(trimmedSentence);
}
});
}
});

return extractedSentences;
});

// Check if any valid sentences were extracted
if (sentences.length === 0) {
return ''; // Return an empty string if no valid sentences are found
}

// Join the valid sentences into a single string
const filteredText = sentences.join(' ').trim();

// Count the total number of words in the filtered text
const wordCount = filteredText.split(/\s+/).length;

// Grade the text content only if there are 20 words or more
const readabilityScore = wordCount >= 20 ? textReadability.fleschReadingEase(filteredText) : 0;

// Log details for debugging

// Determine the return value
const result =
readabilityScore === 0 || readabilityScore > 50 ? '' : readabilityScore.toString(); // Convert readabilityScore to string

const pageUrl = await page.url(); // Get the page URL

return result;
} catch (error) {
console.error('Error extracting and grading text:', error);
return ''; // Return an empty string in case of an error
}
}
Loading

0 comments on commit ded697a

Please sign in to comment.