From d7bf96f1019ba7af1efeee45f7005ff58f1b5be1 Mon Sep 17 00:00:00 2001 From: Gabe Stocco <98900+gfs@users.noreply.github.com> Date: Wed, 10 Aug 2022 23:34:54 -0700 Subject: [PATCH] Workaround for namespace based parsing. (#499) --- AppInspector.RulesEngine/TextContainer.cs | 9 ++- .../RuleProcessor/WithinClauseTests.cs | 80 ++++++++++++++++++- 2 files changed, 86 insertions(+), 3 deletions(-) diff --git a/AppInspector.RulesEngine/TextContainer.cs b/AppInspector.RulesEngine/TextContainer.cs index 30f27506..8930c54c 100644 --- a/AppInspector.RulesEngine/TextContainer.cs +++ b/AppInspector.RulesEngine/TextContainer.cs @@ -148,8 +148,13 @@ public TextContainer(string content, string language, Languages languages, ILogg { if (nodeIter.Current is not null) { - var outerLoc = FullContent[minIndex..].IndexOf(nodeIter.Current.OuterXml); - var offset = FullContent[outerLoc..].IndexOf(nodeIter.Current.InnerXml) + outerLoc + minIndex; + // First we find the name + var nameIndex = FullContent[minIndex..].IndexOf(nodeIter.Current.Name); + // Then we grab the index of the end of this tag. + // We can't use OuterXML because the parser will inject the namespace if present into the OuterXML so it doesn't match the original text. + var endTagIndex = FullContent[nameIndex..].IndexOf('>'); + var totalOffset = nameIndex + endTagIndex + minIndex; + var offset = FullContent[totalOffset..].IndexOf(nodeIter.Current.InnerXml) + totalOffset; // Move the minimum index up in case there are multiple instances of identical OuterXML // This ensures we won't re-find the same one minIndex = offset; diff --git a/AppInspector.Tests/RuleProcessor/WithinClauseTests.cs b/AppInspector.Tests/RuleProcessor/WithinClauseTests.cs index b1da5bc6..15dccb20 100644 --- a/AppInspector.Tests/RuleProcessor/WithinClauseTests.cs +++ b/AppInspector.Tests/RuleProcessor/WithinClauseTests.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Diagnostics.CodeAnalysis; using System.IO; using System.Linq; @@ -220,6 +221,83 @@ public void WithinClauseWithMultipleConditions() Assert.AreEqual(2, matches.Count); } } + + [TestMethod] + public void TestXmlWithAndWithoutNamespace() + { + var content = @" + + 4.0.0 + + xxx + xxx + 0.1.0-SNAPSHOT + pom + + ${project.groupId}:${project.artifactId} + + + + 17 + + +"; + // The same as above but with no namespace specified + var noNamespaceContent = @" + + 4.0.0 + + xxx + xxx + 0.1.0-SNAPSHOT + pom + + ${project.groupId}:${project.artifactId} + + + + 17 + + +"; + var rule = @"[{ + ""name"": ""Source code: Java 17"", + ""id"": ""CODEJAVA000000"", + ""description"": ""Java 17 maven configuration"", + ""applies_to_file_regex"": [ + ""pom.xml"" + ], + ""tags"": [ + ""Code.Java.17"" + ], + ""severity"": ""critical"", + ""patterns"": [ + { + ""pattern"": ""17"", + ""xpaths"" : [""/*[local-name(.)='project']/*[local-name(.)='properties']/*[local-name(.)='java.version']""], + ""type"": ""regex"", + ""scopes"": [ + ""code"" + ], + ""modifiers"": [ + ""i"" + ], + ""confidence"": ""high"" + } + ] + }]"; + RuleSet rules = new(null); + var originalSource = "TestRules"; + rules.AddString(rule, originalSource); + var analyzer = new Microsoft.ApplicationInspector.RulesEngine.RuleProcessor(rules, new RuleProcessorOptions(){Parallel = false, AllowAllTagsInBuildFiles = true}); + if (_languages.FromFileNameOut("pom.xml", out LanguageInfo info)) + { + var matches = analyzer.AnalyzeFile(content, new Microsoft.CST.RecursiveExtractor.FileEntry("pom.xml", new MemoryStream()), info); + Assert.AreEqual(1, matches.Count); + matches = analyzer.AnalyzeFile(noNamespaceContent, new Microsoft.CST.RecursiveExtractor.FileEntry("pom.xml", new MemoryStream()), info); + Assert.AreEqual(1, matches.Count); + } + } [DataRow(true, 1, new[] { 2 })] [DataRow(false, 1, new[] { 3 })]