From 5df84816f5473d2f2662a8b87560dc584ca01a96 Mon Sep 17 00:00:00 2001 From: Peter Monks Date: Thu, 2 Nov 2023 19:11:26 -0700 Subject: [PATCH] :new: Add normalisation of concluded SPDX expressions as well as declared ones --- src/lice_comb/impl/matching.clj | 6 +++++- src/lice_comb/impl/utils.clj | 8 ++++++++ test/lice_comb/matching_test.clj | 2 +- 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/src/lice_comb/impl/matching.clj b/src/lice_comb/impl/matching.clj index 0121131..d39b762 100644 --- a/src/lice_comb/impl/matching.clj +++ b/src/lice_comb/impl/matching.clj @@ -25,6 +25,7 @@ [spdx.licenses :as sl] [spdx.exceptions :as se] [spdx.matching :as sm] + [spdx.expressions :as sexp] [lice-comb.impl.spdx :as lcis] [lice-comb.impl.regex-matching :as lcirm] [lice-comb.impl.expressions-info :as lciei] @@ -132,6 +133,7 @@ lic (sm/licenses-within-text s @lcis/license-ids-d) ids (set/union lic @exc)] (when ids + ; We don't need to sexp/normalise the keys here, as we never detect an expression from a text (manual-fixes (into {} (map #(hash-map % (list {:id % :type :concluded :confidence :high :strategy :spdx-text-matching})) ids)))))) (defmethod text->expressions java.io.Reader @@ -155,6 +157,7 @@ are found." [uri] (when-not (s/blank? uri) + ; We don't need to sexp/normalise the keys here, as we never detect an expression from a URI (lciei/prepend-source uri (manual-fixes (let [suri (lciu/simplify-uri uri)] @@ -304,7 +307,8 @@ (map #(if (keyword? %) % (string->ids-info %))) flatten seq - build-expressions-info-map)))))) + build-expressions-info-map + (lciu/mapfonk sexp/normalise))))))) (defn init! "Initialises this namespace upon first call (and does nothing on subsequent diff --git a/src/lice_comb/impl/utils.clj b/src/lice_comb/impl/utils.clj index 4266011..2d7f5b1 100644 --- a/src/lice_comb/impl/utils.clj +++ b/src/lice_comb/impl/utils.clj @@ -24,6 +24,14 @@ [clojure.java.io :as io] [clj-base62.core :as base62])) +(defn mapfonk + "Returns a new map where f has been applied to all of the keys of m." + [f m] + (when m + (into {} + (for [[k v] m] + [(f k) v])))) + (defn mapfonv "Returns a new map where f has been applied to all of the values of m." [f m] diff --git a/test/lice_comb/matching_test.clj b/test/lice_comb/matching_test.clj index 33155c0..5ca8682 100644 --- a/test/lice_comb/matching_test.clj +++ b/test/lice_comb/matching_test.clj @@ -115,7 +115,7 @@ (is (valid= #{"MIT" "BSD-4-Clause"} (name->expressions "MIT / BSD"))) (is (valid= #{"Apache-2.0" "GPL-3.0-only"} (name->expressions "Apache License version 2.0 / GNU General Public License version 3"))) (is (valid= #{"Apache-2.0" "GPL-3.0-only WITH Classpath-exception-2.0"} (name->expressions "Apache License version 2.0 / GNU General Public License version 3 with classpath exception"))) - (is (valid= #{"EPL-2.0 OR GPL-2.0-or-later WITH Classpath-exception-2.0 OR MIT OR BSD-3-Clause AND Apache-2.0"} (name->expressions "Eclipse Public License or General Public License 2.0 or (at your discretion) later w/ classpath exception or MIT Licence or three clause bsd and Apache Licence")))) + (is (valid= #{"EPL-2.0 OR (GPL-2.0-or-later WITH Classpath-exception-2.0 AND MIT) OR (BSD-3-Clause AND Apache-2.0)"} (name->expressions "Eclipse Public License or General Public License 2.0 or (at your discretion) later w/ classpath exception aNd MIT Licence or three clause bsd and Apache Licence")))) (testing "Messed up license expressions" (is (valid= #{"Apache-2.0" "MIT"} (name->expressions "Apache with MIT")))) (testing "Names seen in handpicked POMs on Maven Central"