From d7bf96f1019ba7af1efeee45f7005ff58f1b5be1 Mon Sep 17 00:00:00 2001
From: Gabe Stocco <98900+gfs@users.noreply.github.com>
Date: Wed, 10 Aug 2022 23:34:54 -0700
Subject: [PATCH] Workaround for namespace based parsing. (#499)
---
AppInspector.RulesEngine/TextContainer.cs | 9 ++-
.../RuleProcessor/WithinClauseTests.cs | 80 ++++++++++++++++++-
2 files changed, 86 insertions(+), 3 deletions(-)
diff --git a/AppInspector.RulesEngine/TextContainer.cs b/AppInspector.RulesEngine/TextContainer.cs
index 30f27506..8930c54c 100644
--- a/AppInspector.RulesEngine/TextContainer.cs
+++ b/AppInspector.RulesEngine/TextContainer.cs
@@ -148,8 +148,13 @@ public TextContainer(string content, string language, Languages languages, ILogg
{
if (nodeIter.Current is not null)
{
- var outerLoc = FullContent[minIndex..].IndexOf(nodeIter.Current.OuterXml);
- var offset = FullContent[outerLoc..].IndexOf(nodeIter.Current.InnerXml) + outerLoc + minIndex;
+ // First we find the name
+ var nameIndex = FullContent[minIndex..].IndexOf(nodeIter.Current.Name);
+ // Then we grab the index of the end of this tag.
+ // We can't use OuterXML because the parser will inject the namespace if present into the OuterXML so it doesn't match the original text.
+ var endTagIndex = FullContent[nameIndex..].IndexOf('>');
+ var totalOffset = nameIndex + endTagIndex + minIndex;
+ var offset = FullContent[totalOffset..].IndexOf(nodeIter.Current.InnerXml) + totalOffset;
// Move the minimum index up in case there are multiple instances of identical OuterXML
// This ensures we won't re-find the same one
minIndex = offset;
diff --git a/AppInspector.Tests/RuleProcessor/WithinClauseTests.cs b/AppInspector.Tests/RuleProcessor/WithinClauseTests.cs
index b1da5bc6..15dccb20 100644
--- a/AppInspector.Tests/RuleProcessor/WithinClauseTests.cs
+++ b/AppInspector.Tests/RuleProcessor/WithinClauseTests.cs
@@ -1,4 +1,5 @@
-using System.Collections.Generic;
+using System;
+using System.Collections.Generic;
using System.Diagnostics.CodeAnalysis;
using System.IO;
using System.Linq;
@@ -220,6 +221,83 @@ public void WithinClauseWithMultipleConditions()
Assert.AreEqual(2, matches.Count);
}
}
+
+ [TestMethod]
+ public void TestXmlWithAndWithoutNamespace()
+ {
+ var content = @"
+
+ 4.0.0
+
+ xxx
+ xxx
+ 0.1.0-SNAPSHOT
+ pom
+
+ ${project.groupId}:${project.artifactId}
+
+
+
+ 17
+
+
+";
+ // The same as above but with no namespace specified
+ var noNamespaceContent = @"
+
+ 4.0.0
+
+ xxx
+ xxx
+ 0.1.0-SNAPSHOT
+ pom
+
+ ${project.groupId}:${project.artifactId}
+
+
+
+ 17
+
+
+";
+ var rule = @"[{
+ ""name"": ""Source code: Java 17"",
+ ""id"": ""CODEJAVA000000"",
+ ""description"": ""Java 17 maven configuration"",
+ ""applies_to_file_regex"": [
+ ""pom.xml""
+ ],
+ ""tags"": [
+ ""Code.Java.17""
+ ],
+ ""severity"": ""critical"",
+ ""patterns"": [
+ {
+ ""pattern"": ""17"",
+ ""xpaths"" : [""/*[local-name(.)='project']/*[local-name(.)='properties']/*[local-name(.)='java.version']""],
+ ""type"": ""regex"",
+ ""scopes"": [
+ ""code""
+ ],
+ ""modifiers"": [
+ ""i""
+ ],
+ ""confidence"": ""high""
+ }
+ ]
+ }]";
+ RuleSet rules = new(null);
+ var originalSource = "TestRules";
+ rules.AddString(rule, originalSource);
+ var analyzer = new Microsoft.ApplicationInspector.RulesEngine.RuleProcessor(rules, new RuleProcessorOptions(){Parallel = false, AllowAllTagsInBuildFiles = true});
+ if (_languages.FromFileNameOut("pom.xml", out LanguageInfo info))
+ {
+ var matches = analyzer.AnalyzeFile(content, new Microsoft.CST.RecursiveExtractor.FileEntry("pom.xml", new MemoryStream()), info);
+ Assert.AreEqual(1, matches.Count);
+ matches = analyzer.AnalyzeFile(noNamespaceContent, new Microsoft.CST.RecursiveExtractor.FileEntry("pom.xml", new MemoryStream()), info);
+ Assert.AreEqual(1, matches.Count);
+ }
+ }
[DataRow(true, 1, new[] { 2 })]
[DataRow(false, 1, new[] { 3 })]