From 55394dac97d1f864b075d36085910934e4422b97 Mon Sep 17 00:00:00 2001 From: Roman Babenko Date: Tue, 3 Dec 2024 08:42:03 +0200 Subject: [PATCH] Python3.12 support (#523) * Python3.12 support * Python3.10 by default for workflows * Upd sphinx8 * suppress userwarning in windows-latest & py3.12 --- .github/workflows/benchmark.yml | 4 +-- .github/workflows/check.yml | 13 ++++++-- .github/workflows/fuzz.yml | 4 +-- .github/workflows/pypi.yml | 5 ++-- .github/workflows/test.yml | 15 +++++++--- README.md | 2 +- credsweeper/ml_model/features/word_in.py | 4 +-- credsweeper/ml_model/ml_validator.py | 12 ++++---- docs/howto/how-to-contribute.md | 22 +++++++++----- docs/requirements.txt | 8 ++--- docs/source/conf.py | 30 +++++++++++++++---- docs/source/credsweeper.deep_scanner.rst | 16 ++++++++++ docs/source/credsweeper.filters.rst | 8 +++++ docs/source/credsweeper.ml_model.features.rst | 16 +++++----- docs/source/how-to-contribute.rst | 1 - docs/source/how_to_contribute.rst | 5 ++++ docs/source/index.rst | 2 +- fuzz/requirements.txt | 2 +- pyproject.toml | 1 + requirements.txt | 5 ++-- tests/test_app.py | 4 ++- 21 files changed, 126 insertions(+), 53 deletions(-) delete mode 100644 docs/source/how-to-contribute.rst create mode 100644 docs/source/how_to_contribute.rst diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 6d9eb4ce6..11e71c1da 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -136,7 +136,7 @@ jobs: - name: Run CredSweeper tool run: | - credsweeper --banner --log info --jobs $(nproc) --subtext --path data --save-json report.${{ github.event.pull_request.head.sha }}.json | tee credsweeper.${{ github.event.pull_request.head.sha }}.log + credsweeper --banner --log info --jobs $(nproc) --sort --subtext --path data --save-json report.${{ github.event.pull_request.head.sha }}.json | tee credsweeper.${{ github.event.pull_request.head.sha }}.log - name: Run Benchmark run: | @@ -177,7 +177,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.9", "3.10", "3.11" ] + python-version: [ "3.9", "3.10", "3.11", "3.12" ] steps: diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 7ca8080ce..0202e6049 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -76,7 +76,7 @@ jobs: id: setup_python uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4.7.1 with: - python-version: "3.11" + python-version: "3.12" - name: Install CredSweeper and auxiliary packages id: setup_credsweeper @@ -105,6 +105,10 @@ jobs: if: ${{ always() && steps.setup_credsweeper.conclusion == 'success' }} run: pylint --py-version=3.11 --errors-only credsweeper + - name: Analysing the code with pylint and minimum Python version 3.12 + if: ${{ always() && steps.setup_credsweeper.conclusion == 'success' }} + run: pylint --py-version=3.12 --errors-only credsweeper + # # # mypy - name: Analysing the code with mypy and minimum Python version 3.8 @@ -127,6 +131,11 @@ jobs: run: | mypy --config-file .mypy.ini --python-version=3.11 credsweeper + - name: Analysing the code with mypy and minimum Python version 3.12 + if: ${{ always() && steps.setup_credsweeper.conclusion == 'success' }} + run: | + mypy --config-file .mypy.ini --python-version=3.12 credsweeper + # # # documentation - name: Analysing the code with pylint for NEW missed docstrings of classes or functions @@ -140,7 +149,7 @@ jobs: if: ${{ always() && steps.setup_credsweeper.conclusion == 'success' }} run: | cd docs - pip install -r requirements.txt + pip install --requirement requirements.txt make html cd source python -m sphinx -T -E -b html -d _build/doctrees -D language=en . ./_html diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index 6b36c1b2b..89bc77089 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -44,8 +44,8 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - python -m pip install -r requirements.txt - python -m pip install -r fuzz/requirements.txt + python -m pip install --requirement requirements.txt + python -m pip install --requirement fuzz/requirements.txt - name: Run fuzzing test with COVERAGE id: run_fuzz diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index af4357e97..31b778bf6 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -31,12 +31,13 @@ jobs: - name: Set up Python uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4.7.1 with: - python-version: "3.8" + python-version: "3.10" - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt + pip install --requirement requirements.txt + pip freeze python -m build - name: Publish diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 0d2ffae81..48689d699 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -25,7 +25,7 @@ jobs: fail-fast: false matrix: os: [ ubuntu-latest, windows-latest, macos-latest ] - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: @@ -52,7 +52,8 @@ jobs: - name: Install application run: | - pip install . + python -m pip install . + python -m pip freeze - name: Remove sources dir to check installation if: runner.os != 'Windows' @@ -70,6 +71,11 @@ jobs: run: | pip install pytest pytest-random-order deepdiff + - name: Suppress warning ``...Unsupported Windows version (2022server)...`` + if: ${{ matrix.python-version == '3.12' && matrix.os == 'windows-latest' }} + run: | + echo "PYTHONWARNINGS=ignore::UserWarning:onnxruntime.capi.onnxruntime_validation:26" >> $env:GITHUB_ENV + - name: UnitTest with pytest run: | # put the command into one line to use in various OS to avoid processing differences in new line char sequence @@ -83,7 +89,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: @@ -106,6 +112,7 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install --requirement requirements.txt + python -m pip freeze - name: UnitTest with pytest and coverage run: | @@ -159,7 +166,7 @@ jobs: path: coverage_html - name: Upload coverage reports to Codecov - if: ${{ matrix.python-version == '3.8' }} + if: ${{ matrix.python-version == '3.10' }} uses: codecov/codecov-action@ab904c41d6ece82784817410c45d8b8c02684457 # v3.1.6 with: token: ${{ secrets.CODECOV_TOKEN }} diff --git a/README.md b/README.md index 5ed927c74..69a687094 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ Full documentation can be found here: ### Main Requirements -- Python 3.8, 3.9, 3.10, 3.11 +- Python 3.8, 3.9, 3.10, 3.11, 3.12 ### Installation diff --git a/credsweeper/ml_model/features/word_in.py b/credsweeper/ml_model/features/word_in.py index 289c804c5..8c83b19b8 100644 --- a/credsweeper/ml_model/features/word_in.py +++ b/credsweeper/ml_model/features/word_in.py @@ -44,7 +44,7 @@ def extract(self, candidate: Candidate) -> Any: def word_in_str(self, a_string: str) -> np.ndarray: """Returns array with words included in a string""" - result = np.zeros(shape=[self.dimension], dtype=np.int8) + result: np.ndarray = np.zeros(shape=[self.dimension], dtype=np.int8) for i, word in self.enumerated_words: if word in a_string: result[i] = 1 @@ -52,7 +52,7 @@ def word_in_str(self, a_string: str) -> np.ndarray: def word_in_set(self, a_strings_set: Set[str]) -> np.ndarray: """Returns array with words matches in a_strings_set""" - result = np.zeros(shape=[self.dimension], dtype=np.int8) + result: np.ndarray = np.zeros(shape=[self.dimension], dtype=np.int8) for i, word in self.enumerated_words: if word in a_strings_set: result[i] = 1 diff --git a/credsweeper/ml_model/ml_validator.py b/credsweeper/ml_model/ml_validator.py index e5f38f725..589a4bb60 100644 --- a/credsweeper/ml_model/ml_validator.py +++ b/credsweeper/ml_model/ml_validator.py @@ -2,7 +2,7 @@ import logging import string from pathlib import Path -from typing import List, Tuple, Union, Optional +from typing import List, Tuple, Union, Optional, Dict import numpy as np import onnxruntime as ort @@ -90,7 +90,7 @@ def __init__( @staticmethod def encode(text: str, limit: int) -> np.ndarray: """Encodes prepared text to array""" - result_array = np.zeros(shape=(limit, MlValidator.NUM_CLASSES), dtype=np.float32) + result_array: np.ndarray = np.zeros(shape=(limit, MlValidator.NUM_CLASSES), dtype=np.float32) if text is None: return result_array len_text = len(text) @@ -122,7 +122,7 @@ def encode_value(text: str) -> np.ndarray: def _call_model(self, line_input: np.ndarray, variable_input: np.ndarray, value_input: np.ndarray, feature_input: np.ndarray) -> np.ndarray: - input_feed = { + input_feed: Dict[str, np.ndarray] = { "line_input": line_input.astype(np.float32), "variable_input": variable_input.astype(np.float32), "value_input": value_input.astype(np.float32), @@ -135,7 +135,7 @@ def _call_model(self, line_input: np.ndarray, variable_input: np.ndarray, value_ def extract_common_features(self, candidates: List[Candidate]) -> np.ndarray: """Extract features that are guaranteed to be the same for all candidates on the same line with same value.""" - feature_array = np.array([], dtype=np.float32) + feature_array: np.ndarray = np.array([], dtype=np.float32) # Extract features from credential candidate default_candidate = candidates[0] for feature in self.common_feature_list: @@ -147,7 +147,7 @@ def extract_common_features(self, candidates: List[Candidate]) -> np.ndarray: def extract_unique_features(self, candidates: List[Candidate]) -> np.ndarray: """Extract features that can be different between candidates. Join them with or operator.""" - feature_array = np.array([], dtype=np.int8) + feature_array: np.ndarray = np.array([], dtype=np.int8) default_candidate = candidates[0] for feature in self.unique_feature_list: new_feature = feature([default_candidate])[0] @@ -220,7 +220,7 @@ def validate_groups(self, group_list: List[Tuple[CandidateKey, List[Candidate]]] variable_input_list = [] value_input_list = [] features_list = [] - probability = np.zeros(len(group_list), dtype=np.float32) + probability: np.ndarray = np.zeros(len(group_list), dtype=np.float32) head = tail = 0 for group_key, candidates in group_list: line_input, variable_input, value_input, feature_array = self.get_group_features(candidates) diff --git a/docs/howto/how-to-contribute.md b/docs/howto/how-to-contribute.md index 129a7f397..e36f206a1 100644 --- a/docs/howto/how-to-contribute.md +++ b/docs/howto/how-to-contribute.md @@ -4,13 +4,15 @@ Thank you for your interest in contributing to the CredSweeper tool! The document covers the process for contributing to the CredSweeper code and documentation. Contributions may be as simple as typo corrections or as complex as new features. -1. [Process for contributing](#process-for-contributing) +1. [Process for contributing](#process-for-contributing) + 1. [Repository structure](#repository-structure) - 1. [File Name](#file-name) - 1. [Self Test & Verification](#self-test-and-verification) -1. [How to PR](#how-to-pr) -1. [DOs and DON'Ts](#dos-and-donts) + 2. [File Name](#file-name) + 3. [Self Test & Verification](#self-test-and-verification) + +2. [How to PR](#how-to-pr) +3. [DOs and DON'Ts](#dos-and-donts) ## Process for contributing @@ -20,7 +22,7 @@ You need a basic understanding of [Git and GitHub.com](https://guides.github.com You can also look at our [issues](https://github.com/Samsung/CredSweeper/issues) list and volunteer to work on the ones you're interested in. -**Step 2:** Fork the `/Samsung/CredSweeper` repo and create a branch for your changes. +**Step 2:** Fork the [Samsung/CredSweeper](https://github.com/Samsung/CredSweeper/fork) repo and create a branch for your changes. For small changes, you can use GitHub's web interface. Simply click the **Edit the file in your fork of this project** on the file you'd like to change. GitHub creates the new branch for you when you submit the changes. @@ -77,6 +79,7 @@ The maintainers will merge your PR into the master branch once feedback has been ```bash git clone https://github.com/YOUR-USERNAME/CredSweeper.git ``` + 3. Set to synchronize the original repository and the forked repository. ```bash @@ -84,18 +87,21 @@ The maintainers will merge your PR into the master branch once feedback has been git remote add upstream https://github.com/Samsung/CredSweeper.git git remote -v ``` + 4. Create a new branch on the forked repository or the local repository, and switch to the new branch. ```bash git checkout -b ``` + 5. Install Yapf as a pre-commit hook with ``` bash pip install pre-commit pre-commit install ``` + 6. Create a local commit. ```bash @@ -103,14 +109,16 @@ The maintainers will merge your PR into the master branch once feedback has been git add git commit -a ``` + 7. Push the branch ```bash git push origin ``` + 8. Open a pull request on https://github.com/Samsung/CredSweeper. - All tests and checks MUST be passed. + All tests and checks MUST be passed - Codestyle check - Static analysis - Unit tests diff --git a/docs/requirements.txt b/docs/requirements.txt index c3f037fa7..cfa90ead1 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,5 +1,3 @@ -m2r2==0.3.2 -# ^ the version is latest which has no conflicts -sphinx_rtd_theme==1.3.0 -sphinx-autodoc-typehints==1.24.0 -autodocsumm==0.2.11 +myst_parser[linkify]==4.0.0 +sphinx==8.1.3 +sphinx_rtd_theme==3.0.2 diff --git a/docs/source/conf.py b/docs/source/conf.py index 8f707894a..8e7ccdcfb 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -5,7 +5,6 @@ import os import sys -import sphinx_rtd_theme sys.path.insert(0, os.path.abspath('../..')) di = os.path.abspath(os.pardir) @@ -43,8 +42,7 @@ 'sphinx.ext.viewcode', 'sphinx.ext.todo', 'sphinx.ext.napoleon', - 'sphinx_autodoc_typehints', - 'm2r2', + 'myst_parser', ] intersphinx_mapping = { @@ -52,7 +50,30 @@ 'numpy': ('https://numpy.org/doc/stable/', None), } -source_suffix = ['.rst', '.md'] +myst_enable_extensions = [ + "amsmath", + "attrs_inline", + "colon_fence", + "deflist", + "dollarmath", + "fieldlist", + "html_admonition", + "html_image", + "linkify", + "replacements", + "smartquotes", + "strikethrough", + "substitution", + "tasklist", +] + +source_suffix = { + '.rst': 'restructuredtext', + '.txt': 'markdown', + '.md': 'markdown', +} + +myst_heading_anchors = 3 # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] @@ -94,7 +115,6 @@ html_logo = 'https://raw.githubusercontent.com/Samsung/CredSweeper/main/docs/images/Logo.png' html_scaled_image_link = False -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # -- Options for HTMLHelp output ------------------------------------------ diff --git a/docs/source/credsweeper.deep_scanner.rst b/docs/source/credsweeper.deep_scanner.rst index e436610c5..a4821f22c 100644 --- a/docs/source/credsweeper.deep_scanner.rst +++ b/docs/source/credsweeper.deep_scanner.rst @@ -108,6 +108,14 @@ credsweeper.deep\_scanner.pkcs12\_scanner module :undoc-members: :show-inheritance: +credsweeper.deep\_scanner.pptx\_scanner module +---------------------------------------------- + +.. automodule:: credsweeper.deep_scanner.pptx_scanner + :members: + :undoc-members: + :show-inheritance: + credsweeper.deep\_scanner.tar\_scanner module --------------------------------------------- @@ -116,6 +124,14 @@ credsweeper.deep\_scanner.tar\_scanner module :undoc-members: :show-inheritance: +credsweeper.deep\_scanner.xlsx\_scanner module +---------------------------------------------- + +.. automodule:: credsweeper.deep_scanner.xlsx_scanner + :members: + :undoc-members: + :show-inheritance: + credsweeper.deep\_scanner.xml\_scanner module --------------------------------------------- diff --git a/docs/source/credsweeper.filters.rst b/docs/source/credsweeper.filters.rst index 64dcca664..e3c324e76 100644 --- a/docs/source/credsweeper.filters.rst +++ b/docs/source/credsweeper.filters.rst @@ -36,6 +36,14 @@ credsweeper.filters.line\_specific\_key\_check module :undoc-members: :show-inheritance: +credsweeper.filters.line\_uue\_part\_check module +------------------------------------------------- + +.. automodule:: credsweeper.filters.line_uue_part_check + :members: + :undoc-members: + :show-inheritance: + credsweeper.filters.value\_allowlist\_check module -------------------------------------------------- diff --git a/docs/source/credsweeper.ml_model.features.rst b/docs/source/credsweeper.ml_model.features.rst index 3b82b3ec4..bcf548485 100644 --- a/docs/source/credsweeper.ml_model.features.rst +++ b/docs/source/credsweeper.ml_model.features.rst @@ -52,14 +52,6 @@ credsweeper.ml\_model.features.is\_secret\_numeric module :undoc-members: :show-inheritance: -credsweeper.ml\_model.features.search\_in\_attribute module ------------------------------------------------------------ - -.. automodule:: credsweeper.ml_model.features.search_in_attribute - :members: - :undoc-members: - :show-inheritance: - credsweeper.ml\_model.features.reny\_entropy module --------------------------------------------------- @@ -76,6 +68,14 @@ credsweeper.ml\_model.features.rule\_name module :undoc-members: :show-inheritance: +credsweeper.ml\_model.features.search\_in\_attribute module +----------------------------------------------------------- + +.. automodule:: credsweeper.ml_model.features.search_in_attribute + :members: + :undoc-members: + :show-inheritance: + credsweeper.ml\_model.features.shannon\_entropy module ------------------------------------------------------ diff --git a/docs/source/how-to-contribute.rst b/docs/source/how-to-contribute.rst deleted file mode 100644 index 168224e78..000000000 --- a/docs/source/how-to-contribute.rst +++ /dev/null @@ -1 +0,0 @@ -.. mdinclude:: ../howto/how-to-contribute.md diff --git a/docs/source/how_to_contribute.rst b/docs/source/how_to_contribute.rst new file mode 100644 index 000000000..71eb2f3ca --- /dev/null +++ b/docs/source/how_to_contribute.rst @@ -0,0 +1,5 @@ +How To Contribute +================= + +.. include:: ../howto/how-to-contribute.md + :parser: myst_parser.sphinx_ diff --git a/docs/source/index.rst b/docs/source/index.rst index a1f05abcb..1fd98410b 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -43,7 +43,7 @@ How to Contribute .. toctree:: :maxdepth: 2 - how-to-contribute + how_to_contribute Overall architecture diff --git a/fuzz/requirements.txt b/fuzz/requirements.txt index 91f5329fc..83613f208 100644 --- a/fuzz/requirements.txt +++ b/fuzz/requirements.txt @@ -1,5 +1,5 @@ # Fuzzing -atheris==2.3.0 +atheris==2.3.0; python_version < '3.12' # auxiliary. should be installed from ../requirements.txt (pytest-cov) coverage diff --git a/pyproject.toml b/pyproject.toml index 9b23da1ee..f7adfec16 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ classifiers = [ "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Topic :: Security", diff --git a/requirements.txt b/requirements.txt index 888ef4976..a503eb483 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,9 +13,8 @@ GitPython==3.1.43 google-auth-oauthlib==1.2.1 humanfriendly==10.0 lxml==5.3.0 -numpy==1.24.4 -# ^ the version supports python 3.8-3.11 -# ^ todo: check for py3.12 later https://github.com/numpy/numpy/issues/23808 +numpy==1.24.4; python_version < '3.10' +numpy==1.26.4; python_version >= '3.10' oauthlib==3.2.2 odfpy==1.4.1 onnxruntime==1.19.2 diff --git a/tests/test_app.py b/tests/test_app.py index 030b235cf..1f1654936 100644 --- a/tests/test_app.py +++ b/tests/test_app.py @@ -20,6 +20,9 @@ class TestApp(TestCase): + def setUp(self): + self.maxDiff = None + @staticmethod def _m_credsweeper(args) -> Tuple[str, str]: proc = subprocess.Popen( @@ -325,7 +328,6 @@ def test_help_p(self) -> None: else: text = ' '.join([text, line]) expected = " ".join(text.split()) - self.maxDiff = None self.assertEqual(expected, output) # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #