From a2325910f6c6ff0831dd9aa4f67e246f0574386c Mon Sep 17 00:00:00 2001 From: eeisegn <44410969+eeisegn@users.noreply.github.com> Date: Tue, 2 Jan 2024 23:14:25 +0100 Subject: [PATCH] Scan Dependency Decoration (#28) * added support for dependency decoration as part of scanning * added support for dependency decoration as part of scanning --- CHANGELOG.md | 8 +++++- CLIENT_HELP.md | 22 ++++++++++++++++ src/scanoss/__init__.py | 2 +- src/scanoss/cli.py | 24 ++++++++++------- src/scanoss/scancodedeps.py | 20 ++++++++++++++ src/scanoss/scanner.py | 41 ++++++++++++++++++++++++----- src/scanoss/threadeddependencies.py | 32 +++++++++++++++------- 7 files changed, 122 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index caed073..9aaea4e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Upcoming changes... +## [1.9.0] - 2023-12-29 +### Added +- Added dependency file decoration option to scanning (`scan`) using `--dep` + - More details can be found in [CLIENT_HELP.md](CLIENT_HELP.md) + ## [1.8.0] - 2023-11-13 ### Added - Added Component Decoration sub-command: @@ -274,4 +279,5 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 [1.6.2]: https://github.com/scanoss/scanoss.py/compare/v1.6.1...v1.6.2 [1.6.3]: https://github.com/scanoss/scanoss.py/compare/v1.6.2...v1.6.3 [1.7.0]: https://github.com/scanoss/scanoss.py/compare/v1.6.3...v1.7.0 -[1.7.0]: https://github.com/scanoss/scanoss.py/compare/v1.7.0...v1.8.0 +[1.8.0]: https://github.com/scanoss/scanoss.py/compare/v1.7.0...v1.8.0 +[1.9.0]: https://github.com/scanoss/scanoss.py/compare/v1.8.0...v1.9.0 diff --git a/CLIENT_HELP.md b/CLIENT_HELP.md index d2af6ca..e013161 100644 --- a/CLIENT_HELP.md +++ b/CLIENT_HELP.md @@ -156,6 +156,22 @@ This fingerprint (WFP) can then be sent to the SCANOSS engine using the scanning scanoss-py scan -w src-fingers.wfp -o scan-results.json ``` +### Dependency file parsing +The dependency files of a project can be fingerprinted/parsed using the `dep` command: +```bash +scanoss-py dep -o src-deps.json src +``` + +This parsed dependency file can then be sent to the SCANOSS for decoration using the scanning command: +```bash +scanoss-py scan --dep src-deps.json --dependencies-only -o scan-results.json +``` + +It is possible to combine a WFP & Dependency file into a single scan also: +```bash +scanoss-py scan -w src-fingers.wfp --dep src-deps.json -o scan-results.json +``` + ### Scan a project folder The following command provides the capability to scan a given file/folder: ```bash @@ -167,6 +183,12 @@ The following command scans the `src` folder and writes the output to `scan-resu scanoss-py scan -o scan-results.json src ``` +### Scan a project folder with dependencies +The following command scans the `src` folder file, snippet & dependency matches, writing the output to `scan-results.json`: +```bash +scanoss-py scan -o scan-results.json -D src +``` + ### Converting RAW results into other formats The following command provides the capability to convert the RAW scan results from a SCANOSS scan into multiple different formats, including CycloneDX, SPDX Lite, CSV, etc. For the full set of formats, please run: diff --git a/src/scanoss/__init__.py b/src/scanoss/__init__.py index 75dfd30..9273caa 100644 --- a/src/scanoss/__init__.py +++ b/src/scanoss/__init__.py @@ -22,4 +22,4 @@ THE SOFTWARE. """ -__version__ = '1.8.0' +__version__ = '1.9.0' diff --git a/src/scanoss/cli.py b/src/scanoss/cli.py index 4b8b1fa..36a41f5 100644 --- a/src/scanoss/cli.py +++ b/src/scanoss/cli.py @@ -406,7 +406,7 @@ def get_scan_options(args): scan_dependencies = 0 if args.skip_snippets: scan_snippets = 0 - if args.dependencies: + if args.dependencies or args.dep: scan_dependencies = ScanType.SCAN_DEPENDENCIES.value if args.dependencies_only: scan_files = scan_snippets = 0 @@ -437,8 +437,8 @@ def scan(parser, args): args: Namespace Parsed arguments """ - if not args.scan_dir and not args.wfp and not args.stdin: - print_stderr('Please specify a file/folder, fingerprint (--wfp) or STDIN (--stdin)') + if not args.scan_dir and not args.wfp and not args.stdin and not args.dep: + print_stderr('Please specify a file/folder, fingerprint (--wfp), dependency (--dep), or STDIN (--stdin)') parser.parse_args([args.subparser, '-h']) exit(1) if args.pac and args.proxy: @@ -536,10 +536,10 @@ def scan(parser, args): if not scanner.is_file_or_snippet_scan(): print_stderr(f'Error: Cannot specify WFP scanning if file/snippet options are disabled ({scan_options})') exit(1) - if args.threads > 1: - scanner.scan_wfp_file_threaded(args.wfp) - else: - scanner.scan_wfp_file(args.wfp) + if scanner.is_dependency_scan() and not args.dep: + print_stderr(f'Error: Cannot specify WFP & Dependency scanning without a dependency file ({--dep})') + exit(1) + scanner.scan_wfp_with_options(args.wfp, args.dep) elif args.stdin: contents = sys.stdin.buffer.read() if not scanner.scan_contents(args.stdin, contents): @@ -549,14 +549,20 @@ def scan(parser, args): print_stderr(f'Error: File or folder specified does not exist: {args.scan_dir}.') exit(1) if os.path.isdir(args.scan_dir): - if not scanner.scan_folder_with_options(args.scan_dir, scanner.winnowing.file_map): + if not scanner.scan_folder_with_options(args.scan_dir, args.dep, scanner.winnowing.file_map): exit(1) elif os.path.isfile(args.scan_dir): - if not scanner.scan_file_with_options(args.scan_dir, scanner.winnowing.file_map): + if not scanner.scan_file_with_options(args.scan_dir, args.dep, scanner.winnowing.file_map): exit(1) else: print_stderr(f'Error: Path specified is neither a file or a folder: {args.scan_dir}.') exit(1) + elif args.dep: + if not args.dependencies_only: + print_stderr(f'Error: No file or folder specified to scan. Please add --dependencies-only to decorate dependency file only.') + exit(1) + if not scanner.scan_folder_with_options(".", args.dep, scanner.winnowing.file_map): + exit(1) else: print_stderr('No action found to process') exit(1) diff --git a/src/scanoss/scancodedeps.py b/src/scanoss/scancodedeps.py index e3e3dca..e1b1beb 100644 --- a/src/scanoss/scancodedeps.py +++ b/src/scanoss/scancodedeps.py @@ -215,6 +215,26 @@ def run_scan(self, output_file: str = None, what_to_scan: str = None) -> bool: self.print_stderr(f'ERROR: Issue running scancode dependency scan on {what_to_scan}: {e}') return False return True + + def load_from_file(self, json_file: str = None) -> json: + """ + Load the parsed JSON dependencies file and return the json object + :param json_file: dependency json file + :return: SCANOSS dependency JSON + """ + if not json_file: + self.print_stderr('ERROR: No parsed JSON file provided to load.') + return None + if not os.path.isfile(json_file): + self.print_stderr(f'ERROR: parsed JSON file does not exist or is not a file: {json_file}') + return None + with open(json_file, 'r') as f: + try: + return json.loads(f.read()) + except Exception as e: + self.print_stderr(f'ERROR: Problem loading input JSON: {e}') + return None + # # End of ScancodeDeps Class # diff --git a/src/scanoss/scanner.py b/src/scanoss/scanner.py index 62c627d..e2f6e93 100644 --- a/src/scanoss/scanner.py +++ b/src/scanoss/scanner.py @@ -313,10 +313,11 @@ def is_dependency_scan(self): return True return False - def scan_folder_with_options(self, scan_dir: str, file_map: dict = None) -> bool: + def scan_folder_with_options(self, scan_dir: str, deps_file: str = None, file_map: dict = None) -> bool: """ Scan the given folder for whatever scaning options that have been configured :param scan_dir: directory to scan + :param deps_file: pre-parsed dependency file to decorate :param file_map: mapping of obfuscated files back into originals :return: True if successful, False otherwise """ @@ -331,7 +332,7 @@ def scan_folder_with_options(self, scan_dir: str, file_map: dict = None) -> bool if self.scan_output: self.print_msg(f'Writing results to {self.scan_output}...') if self.is_dependency_scan(): - if not self.threaded_deps.run(what_to_scan=scan_dir, wait=False): # Kick off a background dependency scan + if not self.threaded_deps.run(what_to_scan=scan_dir, deps_file=deps_file, wait=False): # Kick off a background dependency scan success = False if self.is_file_or_snippet_scan(): if not self.scan_folder(scan_dir): @@ -542,10 +543,11 @@ def __finish_scan_threaded(self, file_map: dict = None) -> bool: success = False return success - def scan_file_with_options(self, file: str, file_map: dict = None) -> bool: + def scan_file_with_options(self, file: str, deps_file: str = None, file_map: dict = None) -> bool: """ Scan the given file for whatever scaning options that have been configured :param file: file to scan + :param deps_file: pre-parsed dependency file to decorate :param file_map: mapping of obfuscated files back into originals :return: True if successful, False otherwise """ @@ -560,7 +562,7 @@ def scan_file_with_options(self, file: str, file_map: dict = None) -> bool: if self.scan_output: self.print_msg(f'Writing results to {self.scan_output}...') if self.is_dependency_scan(): - if not self.threaded_deps.run(what_to_scan=file, wait=False): # Kick off a background dependency scan + if not self.threaded_deps.run(what_to_scan=file, deps_file=deps_file, wait=False): # Kick off a background dependency scan success = False if self.is_file_or_snippet_scan(): if not self.scan_file(file): @@ -725,6 +727,35 @@ def scan_wfp_file(self, file: str = None) -> bool: return success + def scan_wfp_with_options(self, wfp: str, deps_file: str, file_map: dict = None) -> bool: + """ + Scan the given WFP file for whatever scaning options that have been configured + :param wfp: WFP file to scan + :param deps_file: pre-parsed dependency file to decorate + :param file_map: mapping of obfuscated files back into originals + :return: True if successful, False otherwise + """ + success = True + wfp_file = wfp if wfp else self.wfp # If a WFP file is specified, use it, otherwise us the default + if not os.path.exists(wfp_file) or not os.path.isfile(wfp_file): + raise Exception(f"ERROR: Specified WFP file does not exist or is not a file: {wfp_file}") + + if not self.is_file_or_snippet_scan() and not self.is_dependency_scan(): + raise Exception(f"ERROR: No scan options defined to scan folder: {scan_dir}") + + if self.scan_output: + self.print_msg(f'Writing results to {self.scan_output}...') + if self.is_dependency_scan(): + if not self.threaded_deps.run(deps_file=deps_file, wait=False): # Kick off a background dependency scan + success = False + if self.is_file_or_snippet_scan(): + if not self.scan_wfp_file_threaded(wfp_file, file_map): + success = False + if self.threaded_scan: + if not self.__finish_scan_threaded(file_map): + success = False + return success + def scan_wfp_file_threaded(self, file: str = None, file_map: dict = None) -> bool: """ Scan the contents of the specified WFP file (threaded) @@ -778,8 +809,6 @@ def scan_wfp_file_threaded(self, file: str = None, file_map: dict = None) -> boo if not self.__run_scan_threaded(scan_started, file_count): success = False - elif not self.__finish_scan_threaded(file_map): - success = False return success def scan_wfp(self, wfp: str) -> bool: diff --git a/src/scanoss/threadeddependencies.py b/src/scanoss/threadeddependencies.py index c6ca12b..43463eb 100644 --- a/src/scanoss/threadeddependencies.py +++ b/src/scanoss/threadeddependencies.py @@ -31,6 +31,8 @@ from .scanossbase import ScanossBase from .scanossgrpc import ScanossGrpc +DEP_FILE_PREFIX = "file=" # Default prefix to signify an existing parsed dependency file + @dataclass class ThreadedDependencies(ScanossBase): @@ -64,18 +66,23 @@ def responses(self) -> Dict: return resp return None - def run(self, what_to_scan: str = None, wait: bool = True) -> bool: + def run(self, what_to_scan: str = None, deps_file: str = None, wait: bool = True) -> bool: """ Initiate a background scan for the specified file/dir :param what_to_scan: file/folder to scan + :param deps_file: file to decorate instead of scan (overrides what_to_scan option) :param wait: wait for completion :return: True if successful, False if error encountered """ what_to_scan = what_to_scan if what_to_scan else self.what_to_scan self._errors = False try: - self.print_msg(f'Searching {what_to_scan} for dependencies...') - self.inputs.put(what_to_scan) # Set up an input queue to enable the parent to wait for completion + if deps_file: # Decorate the given dependencies file + self.print_msg(f'Decorating {deps_file} dependencies...') + self.inputs.put(f'{DEP_FILE_PREFIX}{deps_file}') # Add to queue and have parent wait on it + else: # Search for dependencies to decorate + self.print_msg(f'Searching {what_to_scan} for dependencies...') + self.inputs.put(what_to_scan) # Add to queue and have parent wait on it self._thread = threading.Thread(target=self.scan_dependencies, daemon=True) self._thread.start() except Exception as e: @@ -87,22 +94,27 @@ def run(self, what_to_scan: str = None, wait: bool = True) -> bool: def scan_dependencies(self) -> None: """ - Scan for dependencies from the given file/dir (from the input queue) + Scan for dependencies from the given file/dir or from an input file (from the input queue). """ current_thread = threading.get_ident() self.print_trace(f'Starting dependency worker {current_thread}...') try: - what_to_scan = self.inputs.get(timeout=5) # Begin processing the dependency request - if not self.sc_deps.run_scan(what_to_scan=what_to_scan): - self._errors = True - else: - deps = self.sc_deps.produce_from_file() + what_to_scan = self.inputs.get(timeout=5) # Begin processing the dependency request + deps = None + if what_to_scan.startswith(DEP_FILE_PREFIX): # We have a pre-parsed dependency file, load it + deps = self.sc_deps.load_from_file(what_to_scan.strip(DEP_FILE_PREFIX)) + else: # Search the file/folder for dependency files to parse + if not self.sc_deps.run_scan(what_to_scan=what_to_scan): + self._errors = True + else: + deps = self.sc_deps.produce_from_file() + if not self._errors: if deps is None: self.print_stderr(f'Problem searching for dependencies for: {what_to_scan}') self._errors = True elif not deps: self.print_trace(f'No dependencies found to decorate for: {what_to_scan}') - else: # TODO add API call to get dep data + else: decorated_deps = self.grpc_api.get_dependencies(deps) if decorated_deps: self.output.put(decorated_deps)