diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs index 03c9bb12..da941864 100644 --- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs +++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs @@ -380,7 +380,7 @@ private bool ReplaceWithNewTokens(UsfmParserState state) || (newText && !existingText) || (newText && _behavior == UpdateUsfmBehavior.PreferNew); - if (useNewTokens) + if (useNewTokens && !state.DoNotTranslate) _tokens.AddRange(_newTokens); _newTokens.Clear(); diff --git a/src/SIL.Machine/Corpora/UsfmParserState.cs b/src/SIL.Machine/Corpora/UsfmParserState.cs index 1ad2c85b..5b1f56dd 100644 --- a/src/SIL.Machine/Corpora/UsfmParserState.cs +++ b/src/SIL.Machine/Corpora/UsfmParserState.cs @@ -81,6 +81,10 @@ public UsfmParserState(UsfmStylesheet stylesheet, ScrVers versification, IReadOn /// public bool IsFigure => CharTag?.Marker == "fig"; + public bool DoNotTranslate => _doNotTranslateIndex >= 0; + + private int _doNotTranslateIndex = -1; + /// /// Current paragraph tag or null for none. /// Note that book and table rows are considered paragraphs for legacy checking reasons. @@ -189,12 +193,21 @@ internal UsfmParserElement Peek() internal void Push(UsfmParserElement elem) { _stack.Add(elem); + if ( + _doNotTranslateIndex == -1 + && (UsfmStylesheet.IsReference(elem.Marker) || UsfmStylesheet.IsFigure(elem.Marker)) + ) + { + _doNotTranslateIndex = _stack.Count - 1; + } } internal UsfmParserElement Pop() { UsfmParserElement element = _stack[_stack.Count - 1]; _stack.RemoveAt(_stack.Count - 1); + if (_doNotTranslateIndex == _stack.Count) + _doNotTranslateIndex = -1; return element; } } diff --git a/src/SIL.Machine/Corpora/UsfmStylesheet.cs b/src/SIL.Machine/Corpora/UsfmStylesheet.cs index e63f8c96..3fc244b2 100644 --- a/src/SIL.Machine/Corpora/UsfmStylesheet.cs +++ b/src/SIL.Machine/Corpora/UsfmStylesheet.cs @@ -11,6 +11,7 @@ namespace SIL.Machine.Corpora public class UsfmStylesheet { private static readonly Regex CellRangeRegex = new Regex(@"^(t[ch][cr]?[1-5])-([2-5])$", RegexOptions.Compiled); + private static readonly Regex ReferenceRegex = new Regex(@"^(fr|r|rq|xo|xt|)$", RegexOptions.Compiled); private static readonly Dictionary JustificationMappings = new Dictionary< string, @@ -111,6 +112,16 @@ public static bool IsCellRange(string tag, out string baseMarker, out int colSpa return false; } + public static bool IsReference(string tag) + { + return !(tag is null) && ReferenceRegex.IsMatch(tag); + } + + public static bool IsFigure(string tag) + { + return tag == "fig"; + } + private static IEnumerable GetEmbeddedStylesheet(string fileName) { using ( diff --git a/src/SIL.Machine/Corpora/UsfmTextBase.cs b/src/SIL.Machine/Corpora/UsfmTextBase.cs index af2820e4..2e623073 100644 --- a/src/SIL.Machine/Corpora/UsfmTextBase.cs +++ b/src/SIL.Machine/Corpora/UsfmTextBase.cs @@ -289,7 +289,7 @@ protected override void StartNonVerseText(UsfmParserState state, ScriptureRef sc protected override void EndNonVerseText(UsfmParserState state, ScriptureRef scriptureRef) { string text = _rowTexts.Pop().ToString(); - if (_text._includeAllText) + if (_text._includeAllText && !state.DoNotTranslate) _rows.Add(_text.CreateRow(scriptureRef, text, _sentenceStart)); } diff --git a/tests/SIL.Machine.Tests/Corpora/TestData/usfm/Tes/41MATTes.SFM b/tests/SIL.Machine.Tests/Corpora/TestData/usfm/Tes/41MATTes.SFM index 672b93da..2c8f11af 100644 --- a/tests/SIL.Machine.Tests/Corpora/TestData/usfm/Tes/41MATTes.SFM +++ b/tests/SIL.Machine.Tests/Corpora/TestData/usfm/Tes/41MATTes.SFM @@ -22,6 +22,7 @@ \v 7 \v 8 \c 2 +\r (Mark 1:2-3; Luke 4:5-6) \tr \tc1 Row one, column one. \tc2 Row one, column two. \tr \tc1 Row two, column one. \tc2 Row two, column two. \s1 Chapter \it Two \it* @@ -38,7 +39,7 @@ \p \v 6 Chapter two, verse \w six|strong="12345" \w*. \p -\v 6 Bad verse. \x - \xo abc\xt 123\x* and more content. +\v 6 Bad verse. \x - \xo 2:3-4 \xt Cool Book 3:24 \xta The annotation \x* and more content. \p \v 5 Chapter two, verse five \rq (MAT 3:1)\rq*. \v 7a Chapter two, verse seven A, diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs index d27873d3..8873adbc 100644 --- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs +++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs @@ -76,7 +76,7 @@ public void GetUsfm_Verse_SkipNote() } [Test] - public void GetUsfm_Verse_ReplaceNote() + public void GetUsfm_Verse_ReplaceNoteKeepReference() { var rows = new List<(IReadOnlyList, string)> { @@ -87,7 +87,38 @@ public void GetUsfm_Verse_ReplaceNote() string target = UpdateUsfm(rows); Assert.That( target, - Contains.Substring("\\v 1 First verse of the second chapter. \\f + \\ft This is a new footnote.\\f*\r\n") + Contains.Substring( + "\\v 1 First verse of the second chapter. \\f + \\fr 2:1: \\ft This is a new footnote.\\f*\r\n" + ) + ); + } + + [Test] + public void GetUsfm_Verse_PreserveFiguresAndReferences() + { + var rows = new List<(IReadOnlyList, string)> + { + // fig + (ScrRef("MAT 1:5"), "Fifth verse of the first chapter."), + (ScrRef("MAT 1:5/1:fig"), "figure text not updated"), + // rq + (ScrRef("MAT 2:5/1:rq"), "quote reference not updated"), + // r + (ScrRef("MAT 2/1:r"), "parallel reference not updated"), + // xo + (ScrRef("MAT 2:6/3:xo"), "Cross reference not update"), + // xt + (ScrRef("MAT 2:6/4:xt"), "cross reference - target reference not updated"), + // xta + (ScrRef("MAT 2:6/5:xta"), "cross reference annotation updated"), + }; + + string target = UpdateUsfm(rows); + Assert.That( + target, + Contains.Substring( + "\\v 1 First verse of the second chapter. \\f + \\fr 2:1: \\ft This is a new footnote.\\f*\r\n" + ) ); }