From ce64722d02d6db7505985466f2dd8d61f2529e01 Mon Sep 17 00:00:00 2001 From: prabhu Date: Fri, 15 Nov 2024 14:51:07 +0000 Subject: [PATCH] Minimum confidence filter (#1457) * Minimum confidence filter Signed-off-by: Prabhu Subramanian * technique filter Signed-off-by: Prabhu Subramanian --------- Signed-off-by: Prabhu Subramanian --- .github/workflows/repotests.yml | 2 + README.md | 5 ++ bin/cdxgen.js | 19 +++++++ docs/ADVANCED.md | 38 +++++++++++++ docs/CLI.md | 5 ++ lib/stages/postgen/postgen.js | 69 +++++++++++++++++++++++ types/lib/stages/postgen/postgen.d.ts.map | 2 +- 7 files changed, 139 insertions(+), 1 deletion(-) diff --git a/.github/workflows/repotests.yml b/.github/workflows/repotests.yml index c9a50f5ee..c7d6f79ef 100644 --- a/.github/workflows/repotests.yml +++ b/.github/workflows/repotests.yml @@ -415,6 +415,8 @@ jobs: - name: repotests openpbs run: | bin/cdxgen.js -p -r -t c repotests/openpbs -o bomresults/bom-openpbs.json + bin/cdxgen.js -p -r -t c repotests/openpbs -o bomresults/bom-openpbs.json --min-confidence 0.4 + bin/cdxgen.js -p -r -t c repotests/openpbs -o bomresults/bom-openpbs.json --technique manifest-analysis shell: bash - name: repotests Jackalope run: | diff --git a/README.md b/README.md index 30559a578..5c5357f9a 100644 --- a/README.md +++ b/README.md @@ -158,6 +158,11 @@ Options: luated against or attested to. [array] [choices: "asvs-4.0.3", "bsimm-v13", "masvs-2.0.0", "nist_ssdf-1.1", "pcissc-secure-slc-1.1", "scvs-1.0.0", "s saf-DRAFT-2023-11"] + --min-confidence Minimum confidence needed for the identity of a component from 0 - 1, where 1 is 100% con + fidence. [number] [default: 0] + --technique Analysis technique to use + [array] [choices: "auto", "source-code-analysis", "binary-analysis", "manifest-analysis", "hash-comparison", "instrume + ntation", "filename"] --auto-compositions Automatically set compositions when the BOM was filtered. Defaults to true [boolean] [default: true] -h, --help Show help [boolean] diff --git a/bin/cdxgen.js b/bin/cdxgen.js index a699cf3d4..4fc71d1cd 100755 --- a/bin/cdxgen.js +++ b/bin/cdxgen.js @@ -297,6 +297,24 @@ const args = yargs(hideBin(process.argv)) hidden: true, choices: ["safe-pip-install", "suggest-build-tools"], }) + .option("min-confidence", { + description: + "Minimum confidence needed for the identity of a component from 0 - 1, where 1 is 100% confidence.", + default: 0, + type: "number", + }) + .option("technique", { + description: "Analysis technique to use", + choices: [ + "auto", + "source-code-analysis", + "binary-analysis", + "manifest-analysis", + "hash-comparison", + "instrumentation", + "filename", + ], + }) .completion("completion", "Generate bash/zsh completion") .array("type") .array("excludeType") @@ -306,6 +324,7 @@ const args = yargs(hideBin(process.argv)) .array("exclude") .array("standard") .array("feature-flags") + .array("technique") .option("auto-compositions", { type: "boolean", default: true, diff --git a/docs/ADVANCED.md b/docs/ADVANCED.md index 9040897e1..4a09971bf 100644 --- a/docs/ADVANCED.md +++ b/docs/ADVANCED.md @@ -93,6 +93,44 @@ Use `--only` to include only those components containing the string in the purl. cdxgen -t java -o /tmp/bom.json -p --only org.springframework ``` +### Minimum confidence filter + +Use `--min-confidence` with a value between 0 and 1 to filter components based on the confidence of their purl [identify](https://cyclonedx.org/docs/1.6/json/#components_items_evidence_identity_oneOf_i0_items_field). The logic involves looking for `field=purl` in `evidence.identity` and collecting the maximum `confidence` value. This is then compared against the minimum confidence passed as an argument. + +```shell +cdxgen -t c . --min-confidence 0.1 +``` + +The above would filter out all the zero confidence components in c/c++, so use it with caution. + +### Analysis technique filter + +Use `--technique` to list the techniques that cdxgen is allowed to use for the xBOM generation. Leaving this argument or using the value `auto` enables default behaviour. + +Example 1 - only allow manifest-analysis: + +```shell +cdxgen -t c . --technique manifest-analysis +``` + +Example 2 - allow manifest-analysis and source-code-analysis: + +```shell +cdxgen -t c . --technique manifest-analysis --technique source-code-analysis +``` + +List of supported techniques: + +- auto (default) +- source-code-analysis +- binary-analysis +- manifest-analysis +- hash-comparison +- instrumentation +- filename + +Currently, this capability is implemented as a filter during post-processing, so unlikely to yield any performance benefits. + ## Automatic compositions When using any filters, cdxgen would automatically set the [compositions.aggregate](https://cyclonedx.org/docs/1.5/json/#compositions_items_aggregate) property to "incomplete" or "incomplete_first_party_only". diff --git a/docs/CLI.md b/docs/CLI.md index c181887ba..f786b5126 100644 --- a/docs/CLI.md +++ b/docs/CLI.md @@ -119,6 +119,11 @@ Options: luated against or attested to. [array] [choices: "asvs-4.0.3", "bsimm-v13", "masvs-2.0.0", "nist_ssdf-1.1", "pcissc-secure-slc-1.1", "scvs-1.0.0", "s saf-DRAFT-2023-11"] + --min-confidence Minimum confidence needed for the identity of a component from 0 - 1, where 1 is 100% con + fidence. [number] [default: 0] + --technique Analysis technique to use + [array] [choices: "auto", "source-code-analysis", "binary-analysis", "manifest-analysis", "hash-comparison", "instrume + ntation", "filename"] --auto-compositions Automatically set compositions when the BOM was filtered. Defaults to true [boolean] [default: true] -h, --help Show help [boolean] diff --git a/lib/stages/postgen/postgen.js b/lib/stages/postgen/postgen.js index 936d416a4..1773c1626 100644 --- a/lib/stages/postgen/postgen.js +++ b/lib/stages/postgen/postgen.js @@ -127,6 +127,50 @@ export function applyStandards(bomJson, options) { return bomJson; } +/** + * Method to get the purl identity confidence. + * + * @param comp Component + * @returns {undefined|number} Max of all the available purl identity confidence or undefined + */ +function getIdentityConfidence(comp) { + if (!comp.evidence) { + return undefined; + } + let confidence; + for (const aidentity of comp?.evidence?.identity || []) { + if (aidentity?.field === "purl") { + if (confidence === undefined) { + confidence = aidentity.confidence || 0; + } else { + confidence = Math.max(aidentity.confidence, confidence); + } + } + } + return confidence; +} + +/** + * Method to get the list of techniques used for identity. + * + * @param comp Component + * @returns {Set|undefined} Set of technique. evidence.identity.methods.technique + */ +function getIdentityTechniques(comp) { + if (!comp.evidence) { + return undefined; + } + const techniques = new Set(); + for (const aidentity of comp?.evidence?.identity || []) { + if (aidentity?.field === "purl") { + for (const amethod of aidentity.methods || []) { + techniques.add(amethod?.technique); + } + } + } + return techniques; +} + /** * Filter BOM based on options * @@ -143,6 +187,31 @@ export function filterBom(bomJson, options) { return bomJson; } for (const comp of bomJson.components) { + // minimum confidence filter + if (options?.minConfidence > 0) { + const confidence = Math.min(options.minConfidence, 1); + const identityConfidence = getIdentityConfidence(comp); + if (identityConfidence !== undefined && identityConfidence < confidence) { + filtered = true; + continue; + } + } + // identity technique filter + if (options?.technique?.length && !options.technique.includes("auto")) { + const allowedTechniques = new Set( + Array.isArray(options.technique) + ? options.technique + : [options.technique], + ); + const usedTechniques = getIdentityTechniques(comp); + if ( + usedTechniques && + !usedTechniques.intersection(allowedTechniques).size + ) { + filtered = true; + continue; + } + } if ( options.requiredOnly && comp.scope && diff --git a/types/lib/stages/postgen/postgen.d.ts.map b/types/lib/stages/postgen/postgen.d.ts.map index d92a077e9..e08802328 100644 --- a/types/lib/stages/postgen/postgen.d.ts.map +++ b/types/lib/stages/postgen/postgen.d.ts.map @@ -1 +1 @@ -{"version":3,"file":"postgen.d.ts","sourceRoot":"","sources":["../../../../lib/stages/postgen/postgen.js"],"names":[],"mappings":"AAaA;;;;;;;GAOG;AACH,+DAkBC;AAED;;;;;;;GAOG;AACH,gEAqCC;AAED;;;;;;;GAOG;AACH,gEA+BC;AAED;;;;;;;GAOG;AACH,2DAyIC;AAED;;GAEG;AACH,gDAIC;AAMD;;;;;;;GAOG;AACH,0DAmHC"} \ No newline at end of file +{"version":3,"file":"postgen.d.ts","sourceRoot":"","sources":["../../../../lib/stages/postgen/postgen.js"],"names":[],"mappings":"AAaA;;;;;;;GAOG;AACH,+DAkBC;AAED;;;;;;;GAOG;AACH,gEAqCC;AAED;;;;;;;GAOG;AACH,gEA+BC;AA8CD;;;;;;;GAOG;AACH,2DAkKC;AAED;;GAEG;AACH,gDAIC;AAMD;;;;;;;GAOG;AACH,0DAmHC"} \ No newline at end of file