-
Notifications
You must be signed in to change notification settings - Fork 45
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #419 from GovTechSG/feat/grade_readability_custom_…
…rule_2 feat: reflect text readability feature
- Loading branch information
Showing
8 changed files
with
251 additions
and
51 deletions.
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import { Page } from 'playwright'; | ||
import textReadability from 'text-readability'; | ||
|
||
export async function extractAndGradeText(page: Page): Promise<string> { | ||
try { | ||
// Extract text content from all specified elements (e.g., paragraphs) | ||
const sentences: string[] = await page.evaluate(() => { | ||
const elements = document.querySelectorAll('p'); // Adjust selector as needed | ||
const extractedSentences: string[] = []; | ||
|
||
elements.forEach(element => { | ||
const text = element.innerText.trim(); | ||
// Split the text into individual sentences | ||
const sentencePattern = /[^.!?]*[.!?]+/g; // Match sentences ending with ., !, or ? | ||
const matches = text.match(sentencePattern); | ||
if (matches) { | ||
// Add only sentences that end with punctuation | ||
matches.forEach(sentence => { | ||
const trimmedSentence = sentence.trim(); // Trim whitespace from each sentence | ||
if (trimmedSentence.length > 0) { | ||
extractedSentences.push(trimmedSentence); | ||
} | ||
}); | ||
} | ||
}); | ||
|
||
return extractedSentences; | ||
}); | ||
|
||
// Check if any valid sentences were extracted | ||
if (sentences.length === 0) { | ||
return ''; // Return an empty string if no valid sentences are found | ||
} | ||
|
||
// Join the valid sentences into a single string | ||
const filteredText = sentences.join(' ').trim(); | ||
|
||
// Count the total number of words in the filtered text | ||
const wordCount = filteredText.split(/\s+/).length; | ||
|
||
// Grade the text content only if there are 20 words or more | ||
const readabilityScore = wordCount >= 20 ? textReadability.fleschReadingEase(filteredText) : 0; | ||
|
||
// Log details for debugging | ||
|
||
// Determine the return value | ||
const result = | ||
readabilityScore === 0 || readabilityScore > 50 ? '' : readabilityScore.toString(); // Convert readabilityScore to string | ||
|
||
const pageUrl = await page.url(); // Get the page URL | ||
|
||
return result; | ||
} catch (error) { | ||
console.error('Error extracting and grading text:', error); | ||
return ''; // Return an empty string in case of an error | ||
} | ||
} |
Oops, something went wrong.