From 2a8af34c6cbebdd0e1fd118d372a88b393b8f10f Mon Sep 17 00:00:00 2001 From: John Lambert Date: Mon, 16 Dec 2024 12:36:30 -0500 Subject: [PATCH] Tests not complete yet --- .../Corpora/UpdateUsfmParserHandler.cs | 2 +- src/SIL.Machine/Corpora/UsfmParserState.cs | 7 ++-- src/SIL.Machine/Corpora/UsfmStylesheet.cs | 14 ++++++++ .../Corpora/TestData/usfm/Tes/41MATTes.SFM | 3 +- .../Corpora/UpdateUsfmParserHandlerTests.cs | 35 +++++++++++++++++-- 5 files changed, 53 insertions(+), 8 deletions(-) diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index 03c9bb12b..bb0b54173 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -378,7 +378,7 @@ private bool ReplaceWithNewTokens(UsfmParserState state) bool useNewTokens = _behavior == UpdateUsfmBehavior.StripExisting || (newText && !existingText) - || (newText && _behavior == UpdateUsfmBehavior.PreferNew); + || (newText && _behavior == UpdateUsfmBehavior.PreferNew && !state.IsReferenceText); if (useNewTokens) _tokens.AddRange(_newTokens); diff --git a/src/SIL.Machine/Corpora/UsfmParserState.cs b/src/SIL.Machine/Corpora/UsfmParserState.cs index 1ad2c85b0..e8cd08bc3 100644 --- a/src/SIL.Machine/Corpora/UsfmParserState.cs +++ b/src/SIL.Machine/Corpora/UsfmParserState.cs @@ -76,10 +76,9 @@ public UsfmParserState(UsfmStylesheet stylesheet, ScrVers versification, IReadOn /// public int SpecialTokenCount { get; internal set; } - /// - /// True if the token processed is a figure. - /// - public bool IsFigure => CharTag?.Marker == "fig"; + public bool IsReferenceText => + !(CharTag is null) + && (UsfmStylesheet.IsReference(CharTag.Marker) || UsfmStylesheet.IsFigure(CharTag.Marker)); /// /// Current paragraph tag or null for none. diff --git a/src/SIL.Machine/Corpora/UsfmStylesheet.cs b/src/SIL.Machine/Corpora/UsfmStylesheet.cs index e63f8c96b..072971d46 100644 --- a/src/SIL.Machine/Corpora/UsfmStylesheet.cs +++ b/src/SIL.Machine/Corpora/UsfmStylesheet.cs @@ -11,6 +11,10 @@ namespace SIL.Machine.Corpora public class UsfmStylesheet { private static readonly Regex CellRangeRegex = new Regex(@"^(t[ch][cr]?[1-5])-([2-5])$", RegexOptions.Compiled); + private static readonly Regex ReferenceRegex = new Regex( + @"^(fl|fr|fv|r|rq|va|vp|xo|xop|xot|xnt|xdc|xt|zpa-x[bcv])$", + RegexOptions.Compiled + ); private static readonly Dictionary JustificationMappings = new Dictionary< string, @@ -111,6 +115,16 @@ public static bool IsCellRange(string tag, out string baseMarker, out int colSpa return false; } + public static bool IsReference(string tag) + { + return !(tag is null) && ReferenceRegex.IsMatch(tag); + } + + public static bool IsFigure(string tag) + { + return tag == "fig"; + } + private static IEnumerable GetEmbeddedStylesheet(string fileName) { using ( diff --git a/tests/SIL.Machine.Tests/Corpora/TestData/usfm/Tes/41MATTes.SFM b/tests/SIL.Machine.Tests/Corpora/TestData/usfm/Tes/41MATTes.SFM index 672b93daa..2c8f11af7 100644 --- a/tests/SIL.Machine.Tests/Corpora/TestData/usfm/Tes/41MATTes.SFM +++ b/tests/SIL.Machine.Tests/Corpora/TestData/usfm/Tes/41MATTes.SFM @@ -22,6 +22,7 @@ \v 7 \v 8 \c 2 +\r (Mark 1:2-3; Luke 4:5-6) \tr \tc1 Row one, column one. \tc2 Row one, column two. \tr \tc1 Row two, column one. \tc2 Row two, column two. \s1 Chapter \it Two \it* @@ -38,7 +39,7 @@ \p \v 6 Chapter two, verse \w six|strong="12345" \w*. \p -\v 6 Bad verse. \x - \xo abc\xt 123\x* and more content. +\v 6 Bad verse. \x - \xo 2:3-4 \xt Cool Book 3:24 \xta The annotation \x* and more content. \p \v 5 Chapter two, verse five \rq (MAT 3:1)\rq*. \v 7a Chapter two, verse seven A, diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index d27873d3b..8873adbcf 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -76,7 +76,7 @@ public void GetUsfm_Verse_SkipNote() } [Test] - public void GetUsfm_Verse_ReplaceNote() + public void GetUsfm_Verse_ReplaceNoteKeepReference() { var rows = new List<(IReadOnlyList, string)> { @@ -87,7 +87,38 @@ public void GetUsfm_Verse_ReplaceNote() string target = UpdateUsfm(rows); Assert.That( target, - Contains.Substring("\\v 1 First verse of the second chapter. \\f + \\ft This is a new footnote.\\f*\r\n") + Contains.Substring( + "\\v 1 First verse of the second chapter. \\f + \\fr 2:1: \\ft This is a new footnote.\\f*\r\n" + ) + ); + } + + [Test] + public void GetUsfm_Verse_PreserveFiguresAndReferences() + { + var rows = new List<(IReadOnlyList, string)> + { + // fig + (ScrRef("MAT 1:5"), "Fifth verse of the first chapter."), + (ScrRef("MAT 1:5/1:fig"), "figure text not updated"), + // rq + (ScrRef("MAT 2:5/1:rq"), "quote reference not updated"), + // r + (ScrRef("MAT 2/1:r"), "parallel reference not updated"), + // xo + (ScrRef("MAT 2:6/3:xo"), "Cross reference not update"), + // xt + (ScrRef("MAT 2:6/4:xt"), "cross reference - target reference not updated"), + // xta + (ScrRef("MAT 2:6/5:xta"), "cross reference annotation updated"), + }; + + string target = UpdateUsfm(rows); + Assert.That( + target, + Contains.Substring( + "\\v 1 First verse of the second chapter. \\f + \\fr 2:1: \\ft This is a new footnote.\\f*\r\n" + ) ); }