Merge pull request #419 from GovTechSG/feat/grade_readability_custom_…

…rule_2 feat: reflect text readability feature
GovTechSG · Dec 9, 2024 · ded697a · ded697a
2 parents 15663f0 + 7af4497
commit ded697a
Show file tree

Hide file tree

Showing 8 changed files with 251 additions and 51 deletions.
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -25,6 +25,7 @@
     "prettier": "^3.1.0",
     "print-message": "^3.0.1",
     "safe-regex": "^2.1.1",
+    "text-readability": "^1.1.0",
     "typescript": "^5.4.5",
     "url": "^0.11.3",
     "uuid": "^11.0.3",

diff --git a/src/crawlers/commonCrawlerFunc.ts b/src/crawlers/commonCrawlerFunc.ts
@@ -13,6 +13,7 @@ import { takeScreenshotForHTMLElements } from '../screenshotFunc/htmlScreenshotF
 import { isFilePath } from '../constants/common.js';
 import { customAxeConfig } from './customAxeFunctions.js';
 import { flagUnlabelledClickableElements } from './custom/flagUnlabelledClickableElements.js';
+import { extractAndGradeText } from './custom/extractAndGradeText.js';
 import { ItemsInfo } from '../mergeAxeResults.js';
 
 // types
@@ -83,6 +84,7 @@ export const filterAxeResults = (
     if (rule === 'frame-tested') return;
 
     const conformance = tags.filter(tag => tag.startsWith('wcag') || tag === 'best-practice');
+
     // handle rare cases where conformance level is not the first element
     const levels = ['wcag2a', 'wcag2aa', 'wcag2aaa'];
     if (conformance[0] !== 'best-practice' && !levels.includes(conformance[0])) {
@@ -291,6 +293,8 @@ export const runAxeScript = async ({
 
   const enableWcagAaa = ruleset.includes(RuleFlags.ENABLE_WCAG_AAA);
 
+  const gradingReadabilityFlag = await extractAndGradeText(page); // Ensure flag is obtained before proceeding
+
   await crawlee.playwrightUtils.injectFile(page, axeScript);
 
   const results = await page.evaluate(
@@ -301,6 +305,7 @@ export const runAxeScript = async ({
       disableOobee,
       enableWcagAaa,
       oobeeAccessibleLabelFlaggedCssSelectors,
+      gradingReadabilityFlag,
     }) => {
       try {
         const evaluateAltText = (node: Element) => {
@@ -339,6 +344,31 @@ export const runAxeScript = async ({
               ...customAxeConfig.checks[0],
               evaluate: evaluateAltText,
             },
+            {
+              ...customAxeConfig.checks[1],
+              evaluate: (node: HTMLElement) => {
+                return !node.dataset.flagged; // fail any element with a data-flagged attribute set to true
+              },
+            },
+            {
+              ...customAxeConfig.checks[2],
+              evaluate: (_node: HTMLElement) => {
+                if (gradingReadabilityFlag === '') {
+                  return true; // Pass if no readability issues
+                }
+                // Dynamically update the grading messages
+                const gradingCheck = customAxeConfig.checks.find(
+                  check => check.id === 'oobee-grading-text-contents',
+                );
+                if (gradingCheck) {
+                  gradingCheck.metadata.messages.incomplete = `The text content is potentially difficult to read, with a Flesch-Kincaid Reading Ease score of ${
+                    gradingReadabilityFlag
+                  }.\nThe target passing score is above 50, indicating content readable by university students and lower grade levels.\nA higher score reflects better readability.`;
+                }
+
+                // Fail if readability issues are detected
+              },
+            },
           ],
           rules: customAxeConfig.rules
             .filter(rule => (disableOobee ? !rule.id.startsWith('oobee') : true))
@@ -385,19 +415,19 @@ export const runAxeScript = async ({
               if (!element) {
                 const shadowRoots = [];
                 const allElements = document.querySelectorAll('*');
-                
+
                 // Look for elements with shadow roots
                 allElements.forEach(el => {
                   if (el.shadowRoot) {
                     shadowRoots.push(el.shadowRoot);
                   }
                 });
-          
+
                 // Search inside each shadow root for the element
                 for (const shadowRoot of shadowRoots) {
                   const shadowElement = shadowRoot.querySelector(cssSelector);
                   if (shadowElement) {
-                    element = shadowElement;  // Found the element inside shadow DOM
+                    element = shadowElement; // Found the element inside shadow DOM
                     break;
                   }
                 }
@@ -414,7 +444,7 @@ export const runAxeScript = async ({
               help: 'Clickable elements (i.e. elements with mouse-click interaction) must have accessible labels.',
               helpUrl: 'https://www.deque.com/blog/accessible-aria-buttons',
               nodes: escapedCssSelectors.map(cssSelector => ({
-                html: document.querySelector(cssSelector).outerHTML,
+                html: findElementByCssSelector(cssSelector),
                 target: [cssSelector],
                 impact: 'serious' as ImpactValue,
                 failureSummary:
@@ -452,6 +482,7 @@ export const runAxeScript = async ({
       disableOobee,
       enableWcagAaa,
       oobeeAccessibleLabelFlaggedCssSelectors,
+      gradingReadabilityFlag,
     },
   );
 
@@ -461,6 +492,7 @@ export const runAxeScript = async ({
   }
 
   const pageTitle = await page.evaluate(() => document.title);
+
   return filterAxeResults(results, pageTitle, customFlowDetails);
 };
 

diff --git a/src/crawlers/custom/extractAndGradeText.ts b/src/crawlers/custom/extractAndGradeText.ts
@@ -0,0 +1,57 @@
+import { Page } from 'playwright';
+import textReadability from 'text-readability';
+
+export async function extractAndGradeText(page: Page): Promise<string> {
+  try {
+    // Extract text content from all specified elements (e.g., paragraphs)
+    const sentences: string[] = await page.evaluate(() => {
+      const elements = document.querySelectorAll('p'); // Adjust selector as needed
+      const extractedSentences: string[] = [];
+
+      elements.forEach(element => {
+        const text = element.innerText.trim();
+        // Split the text into individual sentences
+        const sentencePattern = /[^.!?]*[.!?]+/g; // Match sentences ending with ., !, or ?
+        const matches = text.match(sentencePattern);
+        if (matches) {
+          // Add only sentences that end with punctuation
+          matches.forEach(sentence => {
+            const trimmedSentence = sentence.trim(); // Trim whitespace from each sentence
+            if (trimmedSentence.length > 0) {
+              extractedSentences.push(trimmedSentence);
+            }
+          });
+        }
+      });
+
+      return extractedSentences;
+    });
+
+    // Check if any valid sentences were extracted
+    if (sentences.length === 0) {
+      return ''; // Return an empty string if no valid sentences are found
+    }
+
+    // Join the valid sentences into a single string
+    const filteredText = sentences.join(' ').trim();
+
+    // Count the total number of words in the filtered text
+    const wordCount = filteredText.split(/\s+/).length;
+
+    // Grade the text content only if there are 20 words or more
+    const readabilityScore = wordCount >= 20 ? textReadability.fleschReadingEase(filteredText) : 0;
+
+    // Log details for debugging
+
+    // Determine the return value
+    const result =
+      readabilityScore === 0 || readabilityScore > 50 ? '' : readabilityScore.toString(); // Convert readabilityScore to string
+
+    const pageUrl = await page.url(); // Get the page URL
+
+    return result;
+  } catch (error) {
+    console.error('Error extracting and grading text:', error);
+    return ''; // Return an empty string in case of an error
+  }
+}