Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes the incorrect stemming of the verb "revocares" and of a word that looks like a verb but is not #21999

Open
wants to merge 7 commits into
base: trunk
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ const wordsToStem = [
[ "martes", "martes" ],
[ "microondas", "microondas" ],
[ "jesús", "jesus" ],
[ "práxedes", "praxedes" ],
// Input noun with: singular: -z, plural: -ces
[ "actriz", "actriz" ],
[ "actrices", "actriz" ],
Expand Down Expand Up @@ -73,7 +74,7 @@ const wordsToStem = [
[ "comienzo", "comenz" ],
// Input a word that ends in a common verb suffix.
[ "saltaron", "salt" ],
// [ "revocares", "revoc" ],
[ "revocares", "revoc" ],
// Input a word that ends in -os, -s, -a, -o, -á, -í,-ó, -é, -e.
[ "agostinas", "agostin" ],
[ "boboré", "bobor" ],
Expand All @@ -88,9 +89,10 @@ const wordsToStem = [
// Input a word that ends in -en, -es, -éis, -emos and is preceded by gu.
[ "distinguen", "distingu" ],
[ "alarguemos", "alarg" ],
// Input a word that looks like a verb form but it's not.
// Input a word that looks like a verb form, but it's not.
// [ "cabalgada", "cabalgad" ],
[ "abacería", "abaceri" ],
[ "lugar", "lugar" ],
// Input a word that looks like a verb form and is on the list of stems that belong together.
[ "san", "san" ],
[ "virgen", "virgen" ],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8169,7 +8169,8 @@ const wordsToStem = [
const wordsToStemWithoutFunctionWords = filterFunctionWordsFromArray( wordsToStem, functionWords );

describe( "Generate stems for Spanish words", () => {
const corpusWithStems = wordsToStemWithoutFunctionWords.map( word => [ word, stem( word, morphologyDataES ) ] );

console.log( JSON.stringify( corpusWithStems ) );
it( "Generate stems for Spanish words", () => {
const corpusWithStems = wordsToStemWithoutFunctionWords.map( word => [ word, stem( word, morphologyDataES ) ] );
console.log( JSON.stringify( corpusWithStems ) );
} );
} );
Loading