From 55394dac97d1f864b075d36085910934e4422b97 Mon Sep 17 00:00:00 2001
From: Roman Babenko <babenek@users.noreply.github.com>
Date: Tue, 3 Dec 2024 08:42:03 +0200
Subject: [PATCH] Python3.12 support (#523)

* Python3.12 support

* Python3.10 by default for workflows

* Upd sphinx8

* suppress userwarning in windows-latest & py3.12
---
 .github/workflows/benchmark.yml               |  4 +--
 .github/workflows/check.yml                   | 13 ++++++--
 .github/workflows/fuzz.yml                    |  4 +--
 .github/workflows/pypi.yml                    |  5 ++--
 .github/workflows/test.yml                    | 15 +++++++---
 README.md                                     |  2 +-
 credsweeper/ml_model/features/word_in.py      |  4 +--
 credsweeper/ml_model/ml_validator.py          | 12 ++++----
 docs/howto/how-to-contribute.md               | 22 +++++++++-----
 docs/requirements.txt                         |  8 ++---
 docs/source/conf.py                           | 30 +++++++++++++++----
 docs/source/credsweeper.deep_scanner.rst      | 16 ++++++++++
 docs/source/credsweeper.filters.rst           |  8 +++++
 docs/source/credsweeper.ml_model.features.rst | 16 +++++-----
 docs/source/how-to-contribute.rst             |  1 -
 docs/source/how_to_contribute.rst             |  5 ++++
 docs/source/index.rst                         |  2 +-
 fuzz/requirements.txt                         |  2 +-
 pyproject.toml                                |  1 +
 requirements.txt                              |  5 ++--
 tests/test_app.py                             |  4 ++-
 21 files changed, 126 insertions(+), 53 deletions(-)
 delete mode 100644 docs/source/how-to-contribute.rst
 create mode 100644 docs/source/how_to_contribute.rst

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 6d9eb4ce6..11e71c1da 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -136,7 +136,7 @@ jobs:
 
       - name: Run CredSweeper tool
         run: |
-          credsweeper --banner --log info --jobs $(nproc) --subtext --path data --save-json report.${{ github.event.pull_request.head.sha }}.json | tee credsweeper.${{ github.event.pull_request.head.sha }}.log
+          credsweeper --banner --log info --jobs $(nproc) --sort --subtext --path data --save-json report.${{ github.event.pull_request.head.sha }}.json | tee credsweeper.${{ github.event.pull_request.head.sha }}.log
 
       - name: Run Benchmark
         run: |
@@ -177,7 +177,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: [ "3.9", "3.10", "3.11" ]
+        python-version: [ "3.9", "3.10", "3.11", "3.12" ]
 
     steps:
 
diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml
index 7ca8080ce..0202e6049 100644
--- a/.github/workflows/check.yml
+++ b/.github/workflows/check.yml
@@ -76,7 +76,7 @@ jobs:
       id: setup_python
       uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4.7.1
       with:
-        python-version: "3.11"
+        python-version: "3.12"
 
     - name: Install CredSweeper and auxiliary packages
       id: setup_credsweeper
@@ -105,6 +105,10 @@ jobs:
       if: ${{ always() && steps.setup_credsweeper.conclusion == 'success' }}
       run: pylint --py-version=3.11 --errors-only credsweeper
 
+    - name: Analysing the code with pylint and minimum Python version 3.12
+      if: ${{ always() && steps.setup_credsweeper.conclusion == 'success' }}
+      run: pylint --py-version=3.12 --errors-only credsweeper
+
     # # # mypy
 
     - name: Analysing the code with mypy and minimum Python version 3.8
@@ -127,6 +131,11 @@ jobs:
       run: |
         mypy --config-file .mypy.ini --python-version=3.11 credsweeper
 
+    - name: Analysing the code with mypy and minimum Python version 3.12
+      if: ${{ always() && steps.setup_credsweeper.conclusion == 'success' }}
+      run: |
+        mypy --config-file .mypy.ini --python-version=3.12 credsweeper
+
     # # # documentation
 
     - name: Analysing the code with pylint for NEW missed docstrings of classes or functions
@@ -140,7 +149,7 @@ jobs:
       if: ${{ always() && steps.setup_credsweeper.conclusion == 'success' }}
       run: |
         cd docs
-        pip install -r requirements.txt
+        pip install --requirement requirements.txt
         make html
         cd source
         python -m sphinx -T -E -b html -d _build/doctrees -D language=en . ./_html
diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml
index 6b36c1b2b..89bc77089 100644
--- a/.github/workflows/fuzz.yml
+++ b/.github/workflows/fuzz.yml
@@ -44,8 +44,8 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        python -m pip install -r requirements.txt
-        python -m pip install -r fuzz/requirements.txt
+        python -m pip install --requirement requirements.txt
+        python -m pip install --requirement fuzz/requirements.txt
 
     - name: Run fuzzing test with COVERAGE
       id: run_fuzz
diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml
index af4357e97..31b778bf6 100644
--- a/.github/workflows/pypi.yml
+++ b/.github/workflows/pypi.yml
@@ -31,12 +31,13 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@65d7f2d534ac1bc67fcd62888c5f4f3d2cb2b236 # v4.7.1
       with:
-        python-version: "3.8"
+        python-version: "3.10"
 
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install -r requirements.txt
+        pip install --requirement requirements.txt
+        pip freeze
         python -m build
 
     - name: Publish
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 0d2ffae81..48689d699 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -25,7 +25,7 @@ jobs:
       fail-fast: false
       matrix:
         os: [ ubuntu-latest, windows-latest, macos-latest ]
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
 
     steps:
 
@@ -52,7 +52,8 @@ jobs:
 
     - name: Install application
       run: |
-        pip install .
+        python -m pip install .
+        python -m pip freeze
 
     - name: Remove sources dir to check installation
       if: runner.os != 'Windows'
@@ -70,6 +71,11 @@ jobs:
       run: |
         pip install pytest pytest-random-order deepdiff
 
+    - name: Suppress warning ``...Unsupported Windows version (2022server)...``
+      if: ${{ matrix.python-version == '3.12' && matrix.os == 'windows-latest' }}
+      run: |
+        echo "PYTHONWARNINGS=ignore::UserWarning:onnxruntime.capi.onnxruntime_validation:26" >> $env:GITHUB_ENV
+
     - name: UnitTest with pytest
       run: |
         # put the command into one line to use in various OS to avoid processing differences in new line char sequence
@@ -83,7 +89,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
 
     steps:
 
@@ -106,6 +112,7 @@ jobs:
       run: |
         python -m pip install --upgrade pip
         python -m pip install --requirement requirements.txt
+        python -m pip freeze
 
     - name: UnitTest with pytest and coverage
       run: |
@@ -159,7 +166,7 @@ jobs:
         path: coverage_html
 
     - name: Upload coverage reports to Codecov
-      if: ${{ matrix.python-version == '3.8' }}
+      if: ${{ matrix.python-version == '3.10' }}
       uses: codecov/codecov-action@ab904c41d6ece82784817410c45d8b8c02684457 # v3.1.6
       with:
         token: ${{ secrets.CODECOV_TOKEN }}
diff --git a/README.md b/README.md
index 5ed927c74..69a687094 100644
--- a/README.md
+++ b/README.md
@@ -46,7 +46,7 @@ Full documentation can be found here: <https://credsweeper.readthedocs.io/>
 
 ### Main Requirements
 
-- Python 3.8, 3.9, 3.10, 3.11
+- Python 3.8, 3.9, 3.10, 3.11, 3.12
 
 ### Installation
 
diff --git a/credsweeper/ml_model/features/word_in.py b/credsweeper/ml_model/features/word_in.py
index 289c804c5..8c83b19b8 100644
--- a/credsweeper/ml_model/features/word_in.py
+++ b/credsweeper/ml_model/features/word_in.py
@@ -44,7 +44,7 @@ def extract(self, candidate: Candidate) -> Any:
 
     def word_in_str(self, a_string: str) -> np.ndarray:
         """Returns array with words included in a string"""
-        result = np.zeros(shape=[self.dimension], dtype=np.int8)
+        result: np.ndarray = np.zeros(shape=[self.dimension], dtype=np.int8)
         for i, word in self.enumerated_words:
             if word in a_string:
                 result[i] = 1
@@ -52,7 +52,7 @@ def word_in_str(self, a_string: str) -> np.ndarray:
 
     def word_in_set(self, a_strings_set: Set[str]) -> np.ndarray:
         """Returns array with words matches in a_strings_set"""
-        result = np.zeros(shape=[self.dimension], dtype=np.int8)
+        result: np.ndarray = np.zeros(shape=[self.dimension], dtype=np.int8)
         for i, word in self.enumerated_words:
             if word in a_strings_set:
                 result[i] = 1
diff --git a/credsweeper/ml_model/ml_validator.py b/credsweeper/ml_model/ml_validator.py
index e5f38f725..589a4bb60 100644
--- a/credsweeper/ml_model/ml_validator.py
+++ b/credsweeper/ml_model/ml_validator.py
@@ -2,7 +2,7 @@
 import logging
 import string
 from pathlib import Path
-from typing import List, Tuple, Union, Optional
+from typing import List, Tuple, Union, Optional, Dict
 
 import numpy as np
 import onnxruntime as ort
@@ -90,7 +90,7 @@ def __init__(
     @staticmethod
     def encode(text: str, limit: int) -> np.ndarray:
         """Encodes prepared text to array"""
-        result_array = np.zeros(shape=(limit, MlValidator.NUM_CLASSES), dtype=np.float32)
+        result_array: np.ndarray = np.zeros(shape=(limit, MlValidator.NUM_CLASSES), dtype=np.float32)
         if text is None:
             return result_array
         len_text = len(text)
@@ -122,7 +122,7 @@ def encode_value(text: str) -> np.ndarray:
 
     def _call_model(self, line_input: np.ndarray, variable_input: np.ndarray, value_input: np.ndarray,
                     feature_input: np.ndarray) -> np.ndarray:
-        input_feed = {
+        input_feed: Dict[str, np.ndarray] = {
             "line_input": line_input.astype(np.float32),
             "variable_input": variable_input.astype(np.float32),
             "value_input": value_input.astype(np.float32),
@@ -135,7 +135,7 @@ def _call_model(self, line_input: np.ndarray, variable_input: np.ndarray, value_
 
     def extract_common_features(self, candidates: List[Candidate]) -> np.ndarray:
         """Extract features that are guaranteed to be the same for all candidates on the same line with same value."""
-        feature_array = np.array([], dtype=np.float32)
+        feature_array: np.ndarray = np.array([], dtype=np.float32)
         # Extract features from credential candidate
         default_candidate = candidates[0]
         for feature in self.common_feature_list:
@@ -147,7 +147,7 @@ def extract_common_features(self, candidates: List[Candidate]) -> np.ndarray:
 
     def extract_unique_features(self, candidates: List[Candidate]) -> np.ndarray:
         """Extract features that can be different between candidates. Join them with or operator."""
-        feature_array = np.array([], dtype=np.int8)
+        feature_array: np.ndarray = np.array([], dtype=np.int8)
         default_candidate = candidates[0]
         for feature in self.unique_feature_list:
             new_feature = feature([default_candidate])[0]
@@ -220,7 +220,7 @@ def validate_groups(self, group_list: List[Tuple[CandidateKey, List[Candidate]]]
         variable_input_list = []
         value_input_list = []
         features_list = []
-        probability = np.zeros(len(group_list), dtype=np.float32)
+        probability: np.ndarray = np.zeros(len(group_list), dtype=np.float32)
         head = tail = 0
         for group_key, candidates in group_list:
             line_input, variable_input, value_input, feature_array = self.get_group_features(candidates)
diff --git a/docs/howto/how-to-contribute.md b/docs/howto/how-to-contribute.md
index 129a7f397..e36f206a1 100644
--- a/docs/howto/how-to-contribute.md
+++ b/docs/howto/how-to-contribute.md
@@ -4,13 +4,15 @@ Thank you for your interest in contributing to the CredSweeper tool!
 
 The document covers the process for contributing to the CredSweeper code and documentation. Contributions may be as simple as typo corrections or as complex as new features.
 
-1.  [Process for contributing](#process-for-contributing)
+1. [Process for contributing](#process-for-contributing)
+
     1. [Repository structure](#repository-structure)
-    1. [File Name](#file-name)
-    1. [Self Test & Verification](#self-test-and-verification)
-1.  [How to PR](#how-to-pr)
-1.  [DOs and DON'Ts](#dos-and-donts)
+    2. [File Name](#file-name)
+    3. [Self Test & Verification](#self-test-and-verification)
+    
+2. [How to PR](#how-to-pr)
 
+3. [DOs and DON'Ts](#dos-and-donts)
 
 ## Process for contributing
 
@@ -20,7 +22,7 @@ You need a basic understanding of [Git and GitHub.com](https://guides.github.com
 
 You can also look at our [issues](https://github.com/Samsung/CredSweeper/issues) list and volunteer to work on the ones you're interested in.
 
-**Step 2:** Fork the `/Samsung/CredSweeper` repo and create a branch for your changes.
+**Step 2:** Fork the [Samsung/CredSweeper](https://github.com/Samsung/CredSweeper/fork) repo and create a branch for your changes.
 
 For small changes, you can use GitHub's web interface. Simply click the **Edit the file in your fork of this project** on the file you'd like to change.
 GitHub creates the new branch for you when you submit the changes.
@@ -77,6 +79,7 @@ The maintainers will merge your PR into the master branch once feedback has been
    ```bash
    git clone https://github.com/YOUR-USERNAME/CredSweeper.git
    ```
+   
 3. Set to synchronize the original repository and the forked repository.
 
    ```bash
@@ -84,18 +87,21 @@ The maintainers will merge your PR into the master branch once feedback has been
    git remote add upstream https://github.com/Samsung/CredSweeper.git
    git remote -v
    ```
+   
 4. Create a new branch on the forked repository or the local repository,
    and switch to the new branch.
 
    ```bash
    git checkout -b <new branch name>
    ```
+   
 5. Install Yapf as a pre-commit hook with
 
    ``` bash
    pip install pre-commit
    pre-commit install
    ```
+   
 6. Create a local commit.
 
    ```bash
@@ -103,14 +109,16 @@ The maintainers will merge your PR into the master branch once feedback has been
    git add
    git commit -a
    ```
+
 7. Push the branch
 
    ```bash
    git push origin <new branch name>
    ```
+
 8. Open a pull request on https://github.com/Samsung/CredSweeper.
 
-    All tests and checks MUST be passed.
+   All tests and checks MUST be passed
    - Codestyle check
    - Static analysis
    - Unit tests
diff --git a/docs/requirements.txt b/docs/requirements.txt
index c3f037fa7..cfa90ead1 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,5 +1,3 @@
-m2r2==0.3.2
-# ^ the version is latest which has no conflicts
-sphinx_rtd_theme==1.3.0
-sphinx-autodoc-typehints==1.24.0
-autodocsumm==0.2.11
+myst_parser[linkify]==4.0.0
+sphinx==8.1.3
+sphinx_rtd_theme==3.0.2
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 8f707894a..8e7ccdcfb 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -5,7 +5,6 @@
 
 import os
 import sys
-import sphinx_rtd_theme
 
 sys.path.insert(0, os.path.abspath('../..'))
 di = os.path.abspath(os.pardir)
@@ -43,8 +42,7 @@
     'sphinx.ext.viewcode',
     'sphinx.ext.todo',
     'sphinx.ext.napoleon',
-    'sphinx_autodoc_typehints',
-    'm2r2',
+    'myst_parser',
 ]
 
 intersphinx_mapping = {
@@ -52,7 +50,30 @@
     'numpy': ('https://numpy.org/doc/stable/', None),
 }
 
-source_suffix = ['.rst', '.md']
+myst_enable_extensions = [
+    "amsmath",
+    "attrs_inline",
+    "colon_fence",
+    "deflist",
+    "dollarmath",
+    "fieldlist",
+    "html_admonition",
+    "html_image",
+    "linkify",
+    "replacements",
+    "smartquotes",
+    "strikethrough",
+    "substitution",
+    "tasklist",
+]
+
+source_suffix = {
+    '.rst': 'restructuredtext',
+    '.txt': 'markdown',
+    '.md': 'markdown',
+}
+
+myst_heading_anchors = 3
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
@@ -94,7 +115,6 @@
 html_logo = 'https://raw.githubusercontent.com/Samsung/CredSweeper/main/docs/images/Logo.png'
 
 html_scaled_image_link = False
-html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
 
 # -- Options for HTMLHelp output ------------------------------------------
 
diff --git a/docs/source/credsweeper.deep_scanner.rst b/docs/source/credsweeper.deep_scanner.rst
index e436610c5..a4821f22c 100644
--- a/docs/source/credsweeper.deep_scanner.rst
+++ b/docs/source/credsweeper.deep_scanner.rst
@@ -108,6 +108,14 @@ credsweeper.deep\_scanner.pkcs12\_scanner module
    :undoc-members:
    :show-inheritance:
 
+credsweeper.deep\_scanner.pptx\_scanner module
+----------------------------------------------
+
+.. automodule:: credsweeper.deep_scanner.pptx_scanner
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 credsweeper.deep\_scanner.tar\_scanner module
 ---------------------------------------------
 
@@ -116,6 +124,14 @@ credsweeper.deep\_scanner.tar\_scanner module
    :undoc-members:
    :show-inheritance:
 
+credsweeper.deep\_scanner.xlsx\_scanner module
+----------------------------------------------
+
+.. automodule:: credsweeper.deep_scanner.xlsx_scanner
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 credsweeper.deep\_scanner.xml\_scanner module
 ---------------------------------------------
 
diff --git a/docs/source/credsweeper.filters.rst b/docs/source/credsweeper.filters.rst
index 64dcca664..e3c324e76 100644
--- a/docs/source/credsweeper.filters.rst
+++ b/docs/source/credsweeper.filters.rst
@@ -36,6 +36,14 @@ credsweeper.filters.line\_specific\_key\_check module
    :undoc-members:
    :show-inheritance:
 
+credsweeper.filters.line\_uue\_part\_check module
+-------------------------------------------------
+
+.. automodule:: credsweeper.filters.line_uue_part_check
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 credsweeper.filters.value\_allowlist\_check module
 --------------------------------------------------
 
diff --git a/docs/source/credsweeper.ml_model.features.rst b/docs/source/credsweeper.ml_model.features.rst
index 3b82b3ec4..bcf548485 100644
--- a/docs/source/credsweeper.ml_model.features.rst
+++ b/docs/source/credsweeper.ml_model.features.rst
@@ -52,14 +52,6 @@ credsweeper.ml\_model.features.is\_secret\_numeric module
    :undoc-members:
    :show-inheritance:
 
-credsweeper.ml\_model.features.search\_in\_attribute module
------------------------------------------------------------
-
-.. automodule:: credsweeper.ml_model.features.search_in_attribute
-   :members:
-   :undoc-members:
-   :show-inheritance:
-
 credsweeper.ml\_model.features.reny\_entropy module
 ---------------------------------------------------
 
@@ -76,6 +68,14 @@ credsweeper.ml\_model.features.rule\_name module
    :undoc-members:
    :show-inheritance:
 
+credsweeper.ml\_model.features.search\_in\_attribute module
+-----------------------------------------------------------
+
+.. automodule:: credsweeper.ml_model.features.search_in_attribute
+   :members:
+   :undoc-members:
+   :show-inheritance:
+
 credsweeper.ml\_model.features.shannon\_entropy module
 ------------------------------------------------------
 
diff --git a/docs/source/how-to-contribute.rst b/docs/source/how-to-contribute.rst
deleted file mode 100644
index 168224e78..000000000
--- a/docs/source/how-to-contribute.rst
+++ /dev/null
@@ -1 +0,0 @@
-.. mdinclude:: ../howto/how-to-contribute.md
diff --git a/docs/source/how_to_contribute.rst b/docs/source/how_to_contribute.rst
new file mode 100644
index 000000000..71eb2f3ca
--- /dev/null
+++ b/docs/source/how_to_contribute.rst
@@ -0,0 +1,5 @@
+How To Contribute
+=================
+
+.. include:: ../howto/how-to-contribute.md
+   :parser: myst_parser.sphinx_
diff --git a/docs/source/index.rst b/docs/source/index.rst
index a1f05abcb..1fd98410b 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -43,7 +43,7 @@ How to Contribute
 .. toctree::
    :maxdepth: 2
 
-   how-to-contribute
+   how_to_contribute
 
 
 Overall architecture
diff --git a/fuzz/requirements.txt b/fuzz/requirements.txt
index 91f5329fc..83613f208 100644
--- a/fuzz/requirements.txt
+++ b/fuzz/requirements.txt
@@ -1,5 +1,5 @@
 # Fuzzing
-atheris==2.3.0
+atheris==2.3.0; python_version < '3.12'
 
 # auxiliary. should be installed from ../requirements.txt (pytest-cov)
 coverage
diff --git a/pyproject.toml b/pyproject.toml
index 9b23da1ee..f7adfec16 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,6 +39,7 @@ classifiers = [
     "Programming Language :: Python :: 3 :: Only",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
     "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Topic :: Security",
diff --git a/requirements.txt b/requirements.txt
index 888ef4976..a503eb483 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,9 +13,8 @@ GitPython==3.1.43
 google-auth-oauthlib==1.2.1
 humanfriendly==10.0
 lxml==5.3.0
-numpy==1.24.4
-# ^ the version supports python 3.8-3.11
-# ^ todo: check for py3.12 later https://github.com/numpy/numpy/issues/23808
+numpy==1.24.4; python_version < '3.10'
+numpy==1.26.4; python_version >= '3.10'
 oauthlib==3.2.2
 odfpy==1.4.1
 onnxruntime==1.19.2
diff --git a/tests/test_app.py b/tests/test_app.py
index 030b235cf..1f1654936 100644
--- a/tests/test_app.py
+++ b/tests/test_app.py
@@ -20,6 +20,9 @@
 
 class TestApp(TestCase):
 
+    def setUp(self):
+        self.maxDiff = None
+
     @staticmethod
     def _m_credsweeper(args) -> Tuple[str, str]:
         proc = subprocess.Popen(
@@ -325,7 +328,6 @@ def test_help_p(self) -> None:
                     else:
                         text = ' '.join([text, line])
             expected = " ".join(text.split())
-            self.maxDiff = None
             self.assertEqual(expected, output)
 
     # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #