diff --git a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs
index 03c9bb12..bb0b5417 100644
--- a/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs
+++ b/src/SIL.Machine/Corpora/UpdateUsfmParserHandler.cs
@@ -378,7 +378,7 @@ private bool ReplaceWithNewTokens(UsfmParserState state)
bool useNewTokens =
_behavior == UpdateUsfmBehavior.StripExisting
|| (newText && !existingText)
- || (newText && _behavior == UpdateUsfmBehavior.PreferNew);
+ || (newText && _behavior == UpdateUsfmBehavior.PreferNew && !state.IsReferenceText);
if (useNewTokens)
_tokens.AddRange(_newTokens);
diff --git a/src/SIL.Machine/Corpora/UsfmParserState.cs b/src/SIL.Machine/Corpora/UsfmParserState.cs
index 1ad2c85b..e8cd08bc 100644
--- a/src/SIL.Machine/Corpora/UsfmParserState.cs
+++ b/src/SIL.Machine/Corpora/UsfmParserState.cs
@@ -76,10 +76,9 @@ public UsfmParserState(UsfmStylesheet stylesheet, ScrVers versification, IReadOn
///
public int SpecialTokenCount { get; internal set; }
- ///
- /// True if the token processed is a figure.
- ///
- public bool IsFigure => CharTag?.Marker == "fig";
+ public bool IsReferenceText =>
+ !(CharTag is null)
+ && (UsfmStylesheet.IsReference(CharTag.Marker) || UsfmStylesheet.IsFigure(CharTag.Marker));
///
/// Current paragraph tag or null for none.
diff --git a/src/SIL.Machine/Corpora/UsfmStylesheet.cs b/src/SIL.Machine/Corpora/UsfmStylesheet.cs
index e63f8c96..072971d4 100644
--- a/src/SIL.Machine/Corpora/UsfmStylesheet.cs
+++ b/src/SIL.Machine/Corpora/UsfmStylesheet.cs
@@ -11,6 +11,10 @@ namespace SIL.Machine.Corpora
public class UsfmStylesheet
{
private static readonly Regex CellRangeRegex = new Regex(@"^(t[ch][cr]?[1-5])-([2-5])$", RegexOptions.Compiled);
+ private static readonly Regex ReferenceRegex = new Regex(
+ @"^(fl|fr|fv|r|rq|va|vp|xo|xop|xot|xnt|xdc|xt|zpa-x[bcv])$",
+ RegexOptions.Compiled
+ );
private static readonly Dictionary JustificationMappings = new Dictionary<
string,
@@ -111,6 +115,16 @@ public static bool IsCellRange(string tag, out string baseMarker, out int colSpa
return false;
}
+ public static bool IsReference(string tag)
+ {
+ return !(tag is null) && ReferenceRegex.IsMatch(tag);
+ }
+
+ public static bool IsFigure(string tag)
+ {
+ return tag == "fig";
+ }
+
private static IEnumerable GetEmbeddedStylesheet(string fileName)
{
using (
diff --git a/tests/SIL.Machine.Tests/Corpora/TestData/usfm/Tes/41MATTes.SFM b/tests/SIL.Machine.Tests/Corpora/TestData/usfm/Tes/41MATTes.SFM
index 672b93da..2c8f11af 100644
--- a/tests/SIL.Machine.Tests/Corpora/TestData/usfm/Tes/41MATTes.SFM
+++ b/tests/SIL.Machine.Tests/Corpora/TestData/usfm/Tes/41MATTes.SFM
@@ -22,6 +22,7 @@
\v 7
\v 8
\c 2
+\r (Mark 1:2-3; Luke 4:5-6)
\tr \tc1 Row one, column one. \tc2 Row one, column two.
\tr \tc1 Row two, column one. \tc2 Row two, column two.
\s1 Chapter \it Two \it*
@@ -38,7 +39,7 @@
\p
\v 6 Chapter two, verse \w six|strong="12345" \w*.
\p
-\v 6 Bad verse. \x - \xo abc\xt 123\x* and more content.
+\v 6 Bad verse. \x - \xo 2:3-4 \xt Cool Book 3:24 \xta The annotation \x* and more content.
\p
\v 5 Chapter two, verse five \rq (MAT 3:1)\rq*.
\v 7a Chapter two, verse seven A,
diff --git a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs
index d27873d3..8873adbc 100644
--- a/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs
+++ b/tests/SIL.Machine.Tests/Corpora/UpdateUsfmParserHandlerTests.cs
@@ -76,7 +76,7 @@ public void GetUsfm_Verse_SkipNote()
}
[Test]
- public void GetUsfm_Verse_ReplaceNote()
+ public void GetUsfm_Verse_ReplaceNoteKeepReference()
{
var rows = new List<(IReadOnlyList, string)>
{
@@ -87,7 +87,38 @@ public void GetUsfm_Verse_ReplaceNote()
string target = UpdateUsfm(rows);
Assert.That(
target,
- Contains.Substring("\\v 1 First verse of the second chapter. \\f + \\ft This is a new footnote.\\f*\r\n")
+ Contains.Substring(
+ "\\v 1 First verse of the second chapter. \\f + \\fr 2:1: \\ft This is a new footnote.\\f*\r\n"
+ )
+ );
+ }
+
+ [Test]
+ public void GetUsfm_Verse_PreserveFiguresAndReferences()
+ {
+ var rows = new List<(IReadOnlyList, string)>
+ {
+ // fig
+ (ScrRef("MAT 1:5"), "Fifth verse of the first chapter."),
+ (ScrRef("MAT 1:5/1:fig"), "figure text not updated"),
+ // rq
+ (ScrRef("MAT 2:5/1:rq"), "quote reference not updated"),
+ // r
+ (ScrRef("MAT 2/1:r"), "parallel reference not updated"),
+ // xo
+ (ScrRef("MAT 2:6/3:xo"), "Cross reference not update"),
+ // xt
+ (ScrRef("MAT 2:6/4:xt"), "cross reference - target reference not updated"),
+ // xta
+ (ScrRef("MAT 2:6/5:xta"), "cross reference annotation updated"),
+ };
+
+ string target = UpdateUsfm(rows);
+ Assert.That(
+ target,
+ Contains.Substring(
+ "\\v 1 First verse of the second chapter. \\f + \\fr 2:1: \\ft This is a new footnote.\\f*\r\n"
+ )
);
}