diff --git a/.mypy.ini b/.mypy.ini
index 38c842392..cf4fd1082 100644
--- a/.mypy.ini
+++ b/.mypy.ini
@@ -45,3 +45,6 @@ ignore_missing_imports = True
 
 [mypy-password_strength.*]
 ignore_missing_imports = True
+
+[mypy-docx.*]
+ignore_missing_imports = True
diff --git a/credsweeper/config/config.py b/credsweeper/config/config.py
index 275b49107..f53d9b794 100644
--- a/credsweeper/config/config.py
+++ b/credsweeper/config/config.py
@@ -19,6 +19,7 @@ def __init__(self, config: Dict[str, Any]) -> None:
         self.exclude_patterns: List[re.Pattern] = [re.compile(pattern) for pattern in config["exclude"]["pattern"]]
         self.exclude_paths: List[str] = config["exclude"]["path"]
         self.exclude_containers: List[str] = config["exclude"]["containers"]
+        self.exclude_documents: List[str] = config["exclude"]["documents"]
         self.exclude_extensions: List[str] = config["exclude"]["extension"]
         self.exclude_lines: Set[str] = set(config["exclude"].get("lines", []))
         self.exclude_values: Set[str] = set(config["exclude"].get("values", []))
diff --git a/credsweeper/deep_scanner/deep_scanner.py b/credsweeper/deep_scanner/deep_scanner.py
index e5db0bd76..16838cccb 100644
--- a/credsweeper/deep_scanner/deep_scanner.py
+++ b/credsweeper/deep_scanner/deep_scanner.py
@@ -16,6 +16,7 @@
 from credsweeper.utils import Util
 from .byte_scanner import ByteScanner
 from .bzip2_scanner import Bzip2Scanner
+from .docx_scanner import DocxScanner
 from .encoder_scanner import EncoderScanner
 from .gzip_scanner import GzipScanner
 from .html_scanner import HtmlScanner
@@ -34,6 +35,7 @@
 class DeepScanner(
     ByteScanner,  #
     Bzip2Scanner,  #
+    DocxScanner,  #
     EncoderScanner,  #
     GzipScanner,  #
     HtmlScanner,  #
@@ -71,6 +73,7 @@ def get_deep_scanners(data: bytes) -> List[Any]:
             deep_scanners.append(ZipScanner)
             # probably, there might be a docx, xlxs and so on.
             # It might be scanned with text representation in third-party libraries.
+            deep_scanners.append(DocxScanner)
         elif Util.is_bzip2(data):
             deep_scanners.append(Bzip2Scanner)
         elif Util.is_tar(data):
diff --git a/credsweeper/deep_scanner/docx_scanner.py b/credsweeper/deep_scanner/docx_scanner.py
new file mode 100644
index 000000000..c40df37b7
--- /dev/null
+++ b/credsweeper/deep_scanner/docx_scanner.py
@@ -0,0 +1,43 @@
+import io
+import logging
+from abc import ABC
+from typing import List
+
+import docx
+
+from credsweeper.credentials import Candidate
+from credsweeper.deep_scanner.abstract_scanner import AbstractScanner
+from credsweeper.file_handler.data_content_provider import DataContentProvider
+from credsweeper.file_handler.string_content_provider import StringContentProvider
+
+logger = logging.getLogger(__name__)
+
+
+class DocxScanner(AbstractScanner, ABC):
+    """Implements docx scanning"""
+
+    def data_scan(
+            self,  #
+            data_provider: DataContentProvider,  #
+            depth: int,  #
+            recursive_limit_size: int) -> List[Candidate]:
+        """Tries to scan DOCX text with splitting by lines"""
+        candidates: List[Candidate] = []
+
+        try:
+            docx_lines: List[str] = []
+
+            doc = docx.Document(io.BytesIO(data_provider.data))
+            for paragraph in doc.paragraphs:
+                for line in paragraph.text.splitlines():
+                    if line:
+                        docx_lines.append(line)
+
+            string_data_provider = StringContentProvider(lines=docx_lines,
+                                                         file_path=data_provider.file_path,
+                                                         file_type=data_provider.file_type,
+                                                         info=f"{data_provider.info}|DOCX")
+            candidates = self.scanner.scan(string_data_provider)
+        except Exception as docx_exc:
+            logger.debug(f"{data_provider.file_path}:{docx_exc}")
+        return candidates
diff --git a/credsweeper/file_handler/file_path_extractor.py b/credsweeper/file_handler/file_path_extractor.py
index 84fa1213c..ba8dc6f5b 100644
--- a/credsweeper/file_handler/file_path_extractor.py
+++ b/credsweeper/file_handler/file_path_extractor.py
@@ -143,6 +143,9 @@ def check_exclude_file(config: Config, path: str) -> bool:
             return True
         if not config.depth and file_extension in config.exclude_containers:
             return True
+        # --depth or --doc enables scan for all documents extensions
+        if not (config.depth or config.doc) and file_extension in config.exclude_documents:
+            return True
         return False
 
     @staticmethod
diff --git a/credsweeper/secret/config.json b/credsweeper/secret/config.json
index 6914ac849..fa50bb5f5 100644
--- a/credsweeper/secret/config.json
+++ b/credsweeper/secret/config.json
@@ -4,13 +4,15 @@
         "containers": [
             ".apk",
             ".bz2",
-            ".docx",
             ".gz",
-            ".pdf",
             ".tar",
             ".xlsx",
             ".zip"
         ],
+        "documents": [
+            ".docx",
+            ".pdf"
+        ],
         "extension": [
             ".7z",
             ".aac",
@@ -71,6 +73,7 @@
             "/__pycache__/",
             "/node_modules/",
             "/target/",
+            "/.venv/",
             "/venv/"
         ],
         "lines": [],
diff --git a/docs/source/overall_architecture.rst b/docs/source/overall_architecture.rst
index 3344454a1..47412c496 100644
--- a/docs/source/overall_architecture.rst
+++ b/docs/source/overall_architecture.rst
@@ -15,6 +15,7 @@ When paths to scan are entered, get the files in that paths and the files are ex
 - exclude
    - pattern: Regex patterns to exclude scan.
    - containers: Extensions in lower case of container files which might be scan with --depth option
+   - documents: Extensions in lower case of container files which might be scan with --doc and/or --depth option
    - extension: Extensions in lower case to exclude scan.
    - path: Paths to exclude scan.
 - source_ext: List of extensions for scanning categorized as source files.
@@ -36,6 +37,11 @@ When paths to scan are entered, get the files in that paths and the files are ex
             ".zip",
             ...
         ],
+        "documents": [
+            ".docx",
+            ".pdf",
+            ...
+        ],
         "extension": [
             ".7z",
             ".jpg",
diff --git a/requirements.txt b/requirements.txt
index 0e903ccea..bee75005f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,6 +9,7 @@ openpyxl==3.1.2
 pandas==2.0.3
 # ^ the version supports by python 3.8
 PyYAML==6.0.1
+python-docx==1.0.1
 requests==2.31.0
 schwifty==2023.9.0
 typing_extensions==4.8.0
@@ -48,3 +49,4 @@ types-python-dateutil
 types-regex
 types-humanfriendly
 yapf
+
diff --git a/setup.py b/setup.py
index 0353ce5e6..a5f591e8c 100644
--- a/setup.py
+++ b/setup.py
@@ -15,6 +15,7 @@
     "password-strength",  #
     "pdfminer.six",  #
     "PyYAML",  #
+    "python-docx",  #
     "requests",  #
     "scipy",  #
     "schwifty",  #
diff --git a/tests/__init__.py b/tests/__init__.py
index b62f5b743..7bdebc357 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1,7 +1,7 @@
 from pathlib import Path
 
 # total number of files in test samples
-SAMPLES_FILES_COUNT: int = 120
+SAMPLES_FILES_COUNT: int = 123
 
 # credentials count after scan
 SAMPLES_CRED_COUNT: int = 383
@@ -11,10 +11,10 @@
 SAMPLES_POST_CRED_COUNT: int = 296
 
 # with option --doc
-SAMPLES_IN_DOC = 422
+SAMPLES_IN_DOC = 427
 
 # archived credentials that are not found without --depth
-SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 16
+SAMPLES_IN_DEEP_1 = SAMPLES_POST_CRED_COUNT + 21
 SAMPLES_IN_DEEP_2 = SAMPLES_IN_DEEP_1 + 16
 SAMPLES_IN_DEEP_3 = SAMPLES_IN_DEEP_2 + 3
 
diff --git a/tests/conftest.py b/tests/conftest.py
index 49f685146..4f8da811b 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -38,6 +38,9 @@ def config() -> Config:
     config_dict["validation"]["api_validation"] = False
     config_dict["use_filters"] = True
     config_dict["find_by_ext"] = False
+    config_dict["exclude"]["containers"] = [".gz", ".zip"]
+    config_dict["exclude"]["documents"] = [".docx", ".pdf"]
+    config_dict["exclude"]["extension"] = [".jpg", ".bmp"]
     config_dict["depth"] = 0
     config_dict["doc"] = False
     config_dict["find_by_ext_list"] = [".txt", ".inf"]
diff --git a/tests/data/depth_3.json b/tests/data/depth_3.json
index a28445ab9..880f7a216 100644
--- a/tests/data/depth_3.json
+++ b/tests/data/depth_3.json
@@ -6338,30 +6338,6 @@
             }
         ]
     },
-    {
-        "api_validation": "NOT_AVAILABLE",
-        "ml_validation": "VALIDATED_KEY",
-        "ml_probability": 0.97709,
-        "rule": "Password",
-        "severity": "medium",
-        "line_data_list": [
-            {
-                "line": "password = Xdj@jcN834b.",
-                "line_num": 2,
-                "path": "tests/samples/password.docx",
-                "info": "tests/samples/password.docx|ZIP|word/document.xml|HTML",
-                "value": "Xdj@jcN834b.",
-                "value_start": 11,
-                "value_end": 23,
-                "variable": "password",
-                "entropy_validation": {
-                    "iterator": "BASE64_CHARS",
-                    "entropy": 2.8208020839342964,
-                    "valid": false
-                }
-            }
-        ]
-    },
     {
         "api_validation": "NOT_AVAILABLE",
         "ml_validation": "VALIDATED_KEY",
@@ -8216,6 +8192,102 @@
             }
         ]
     },
+    {
+        "api_validation": "NOT_AVAILABLE",
+        "ml_validation": "VALIDATED_KEY",
+        "ml_probability": 0.94412,
+        "rule": "Password",
+        "severity": "medium",
+        "line_data_list": [
+            {
+                "line": "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}t : Password = WeR15tr0n6",
+                "line_num": 1,
+                "path": "tests/samples/sample.docx",
+                "info": "tests/samples/sample.docx|ZIP|word/document.xml|XML",
+                "value": "WeR15tr0n6",
+                "value_start": 77,
+                "value_end": 87,
+                "variable": "Password",
+                "entropy_validation": {
+                    "iterator": "BASE64_CHARS",
+                    "entropy": 3.321928094887362,
+                    "valid": false
+                }
+            }
+        ]
+    },
+    {
+        "api_validation": "NOT_AVAILABLE",
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "Github Token",
+        "severity": "high",
+        "line_data_list": [
+            {
+                "line": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf",
+                "line_num": 2,
+                "path": "tests/samples/sample.docx",
+                "info": "tests/samples/sample.docx|DOCX",
+                "value": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf",
+                "value_start": 0,
+                "value_end": 40,
+                "variable": null,
+                "entropy_validation": {
+                    "iterator": "BASE64_CHARS",
+                    "entropy": 4.632263329852917,
+                    "valid": true
+                }
+            }
+        ]
+    },
+    {
+        "api_validation": "NOT_AVAILABLE",
+        "ml_validation": "VALIDATED_KEY",
+        "ml_probability": 0.94412,
+        "rule": "Password",
+        "severity": "medium",
+        "line_data_list": [
+            {
+                "line": "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}t : Password = WeR15tr0n6",
+                "line_num": 1,
+                "path": "tests/samples/sample.docx.gz",
+                "info": "tests/samples/sample.docx.gz|GZIP|tests/samples/sample.docx|ZIP|word/document.xml|XML",
+                "value": "WeR15tr0n6",
+                "value_start": 77,
+                "value_end": 87,
+                "variable": "Password",
+                "entropy_validation": {
+                    "iterator": "BASE64_CHARS",
+                    "entropy": 3.321928094887362,
+                    "valid": false
+                }
+            }
+        ]
+    },
+    {
+        "api_validation": "NOT_AVAILABLE",
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "Github Token",
+        "severity": "high",
+        "line_data_list": [
+            {
+                "line": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf",
+                "line_num": 2,
+                "path": "tests/samples/sample.docx.gz",
+                "info": "tests/samples/sample.docx.gz|GZIP|tests/samples/sample.docx|DOCX",
+                "value": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf",
+                "value_start": 0,
+                "value_end": 40,
+                "variable": null,
+                "entropy_validation": {
+                    "iterator": "BASE64_CHARS",
+                    "entropy": 4.632263329852917,
+                    "valid": true
+                }
+            }
+        ]
+    },
     {
         "api_validation": "NOT_AVAILABLE",
         "ml_validation": "NOT_AVAILABLE",
@@ -8264,6 +8336,30 @@
             }
         ]
     },
+    {
+        "api_validation": "NOT_AVAILABLE",
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "Github Token",
+        "severity": "high",
+        "line_data_list": [
+            {
+                "line": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2CLN230RP1I8Vd",
+                "line_num": 1,
+                "path": "tests/samples/sample.pdf",
+                "info": "tests/samples/sample.pdf|PDF:1|RAW",
+                "value": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2CLN230RP1I8Vd",
+                "value_start": 0,
+                "value_end": 40,
+                "variable": null,
+                "entropy_validation": {
+                    "iterator": "BASE64_CHARS",
+                    "entropy": 4.732263329852917,
+                    "valid": true
+                }
+            }
+        ]
+    },
     {
         "api_validation": "NOT_AVAILABLE",
         "ml_validation": "VALIDATED_KEY",
@@ -8408,6 +8504,30 @@
             }
         ]
     },
+    {
+        "api_validation": "NOT_AVAILABLE",
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "Azure Secret Value",
+        "severity": "high",
+        "line_data_list": [
+            {
+                "line": "qpF8Q~PCM5MhMoyTFc5TYEomnzRUKim9UJhe8a2P",
+                "line_num": 1,
+                "path": "tests/samples/small.pdf",
+                "info": "tests/samples/small.pdf|PDF:1|RAW",
+                "value": "qpF8Q~PCM5MhMoyTFc5TYEomnzRUKim9UJhe8a2P",
+                "value_start": 0,
+                "value_end": 40,
+                "variable": null,
+                "entropy_validation": {
+                    "iterator": "BASE64_CHARS",
+                    "entropy": 4.620007704961091,
+                    "valid": true
+                }
+            }
+        ]
+    },
     {
         "api_validation": "NOT_AVAILABLE",
         "ml_validation": "NOT_AVAILABLE",
diff --git a/tests/data/doc.json b/tests/data/doc.json
index f505ae0ad..e88d39e40 100644
--- a/tests/data/doc.json
+++ b/tests/data/doc.json
@@ -11222,6 +11222,102 @@
             }
         ]
     },
+    {
+        "api_validation": "NOT_AVAILABLE",
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "PASSWD_PAIR",
+        "severity": "medium",
+        "line_data_list": [
+            {
+                "line": "Password = WeR15tr0n6",
+                "line_num": 1,
+                "path": "tests/samples/sample.docx",
+                "info": "tests/samples/sample.docx|DOCX",
+                "value": "WeR15tr0n6",
+                "value_start": 11,
+                "value_end": 21,
+                "variable": "Password",
+                "entropy_validation": {
+                    "iterator": "BASE64_CHARS",
+                    "entropy": 3.321928094887362,
+                    "valid": false
+                }
+            }
+        ]
+    },
+    {
+        "api_validation": "NOT_AVAILABLE",
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "Github Token",
+        "severity": "high",
+        "line_data_list": [
+            {
+                "line": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf",
+                "line_num": 2,
+                "path": "tests/samples/sample.docx",
+                "info": "tests/samples/sample.docx|DOCX",
+                "value": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf",
+                "value_start": 0,
+                "value_end": 40,
+                "variable": null,
+                "entropy_validation": {
+                    "iterator": "BASE64_CHARS",
+                    "entropy": 4.632263329852917,
+                    "valid": true
+                }
+            }
+        ]
+    },
+    {
+        "api_validation": "NOT_AVAILABLE",
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "PASSWD_PAIR",
+        "severity": "medium",
+        "line_data_list": [
+            {
+                "line": "password = Xdj@jcN834b",
+                "line_num": 1,
+                "path": "tests/samples/sample.pdf",
+                "info": "tests/samples/sample.pdf|PDF",
+                "value": "Xdj@jcN834b",
+                "value_start": 11,
+                "value_end": 22,
+                "variable": "password",
+                "entropy_validation": {
+                    "iterator": "BASE64_CHARS",
+                    "entropy": 2.963119653306635,
+                    "valid": false
+                }
+            }
+        ]
+    },
+    {
+        "api_validation": "NOT_AVAILABLE",
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "Github Token",
+        "severity": "high",
+        "line_data_list": [
+            {
+                "line": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2CLN230RP1I8Vd",
+                "line_num": 3,
+                "path": "tests/samples/sample.pdf",
+                "info": "tests/samples/sample.pdf|PDF",
+                "value": "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2CLN230RP1I8Vd",
+                "value_start": 0,
+                "value_end": 40,
+                "variable": null,
+                "entropy_validation": {
+                    "iterator": "BASE64_CHARS",
+                    "entropy": 4.732263329852917,
+                    "valid": true
+                }
+            }
+        ]
+    },
     {
         "api_validation": "NOT_AVAILABLE",
         "ml_validation": "NOT_AVAILABLE",
@@ -11342,6 +11438,30 @@
             }
         ]
     },
+    {
+        "api_validation": "NOT_AVAILABLE",
+        "ml_validation": "NOT_AVAILABLE",
+        "ml_probability": null,
+        "rule": "Azure Secret Value",
+        "severity": "high",
+        "line_data_list": [
+            {
+                "line": "qpF8Q~PCM5MhMoyTFc5TYEomnzRUKim9UJhe8a2P",
+                "line_num": 1,
+                "path": "tests/samples/small.pdf",
+                "info": "tests/samples/small.pdf|PDF",
+                "value": "qpF8Q~PCM5MhMoyTFc5TYEomnzRUKim9UJhe8a2P",
+                "value_start": 0,
+                "value_end": 40,
+                "variable": null,
+                "entropy_validation": {
+                    "iterator": "BASE64_CHARS",
+                    "entropy": 4.620007704961091,
+                    "valid": true
+                }
+            }
+        ]
+    },
     {
         "api_validation": "NOT_AVAILABLE",
         "ml_validation": "NOT_AVAILABLE",
diff --git a/tests/file_handler/test_file_path_extractor.py b/tests/file_handler/test_file_path_extractor.py
index 16fb5236c..49bcfaf13 100644
--- a/tests/file_handler/test_file_path_extractor.py
+++ b/tests/file_handler/test_file_path_extractor.py
@@ -1,9 +1,11 @@
 import os.path
+import re
 import tempfile
+import unittest
+from typing import List
 from unittest import mock
 
 import git
-import pytest
 from humanfriendly import parse_size
 
 from credsweeper.config import Config
@@ -11,20 +13,62 @@
 from tests import AZ_STRING
 
 
-class TestFilePathExtractor:
+class TestFilePathExtractor(unittest.TestCase):
+
+    def setUp(self):
+        config_dict = {
+            "size_limit": None,
+            "find_by_ext": False,
+            "find_by_ext_list": [],
+            "doc": False,
+            "depth": 0,
+            "exclude": {
+                "path": [],
+                "pattern": [],
+                "containers": [],
+                "documents": [],
+                "extension": []
+            },
+            "source_ext": [],
+            "source_quote_ext": [],
+            "check_for_literals": [],
+            "validation": {
+                "api_validation": False
+            },
+            "use_filters": False,
+            "line_data_output": [],
+            "candidate_output": [],
+            "min_keyword_value_length": 0,
+            "min_pattern_value_length": 0,
+        }
+        self.config = Config(config_dict)
+
+        # excluded always not_allowed_path_pattern
+        self.paths_not = ["dummy.css", "tmp/dummy.css", "c:\\temp\\dummy.css"]
+        # pattern
+        self.paths_reg = ["tmp/Magic/dummy.Number", "/tmp/log/MagicNumber.txt"]
+        # "/.git/"
+        self.paths_git = ["C:\\.git\\dummy", "./.git/dummy.sample", "~/.git\\dummy.txt"]
+        # not excluded
+        self.paths_src = ["dummy.py", "/tmp/dummy.py", "tmp/dummy.py", "C:\\dummy.py", "temp\\dummy.py"]
+        # not excluded when --depth are set
+        self.paths_pak = ["dummy.gz", "/tmp/dummy.gz", "tmp/dummy.gz", "C:\\dummy.gz", "temp\\dummy.gz"]
+        # not excluded when --doc or --depth are set
+        self.paths_doc = ["dummy.pdf", "/tmp/dummy.pdf", "tmp/dummy.pdf", "C:\\dummy.pdf", "temp\\dummy.pdf"]
+        # extension to be excluded always
+        self.paths_ext = ["dummy.so", "dummy.so", "/tmp/dummy.so", "tmp/dummy.so", "C:\\dummy.so", "temp\\dummy.so"]
+
+    def tearDown(self):
+        del self.config
 
     def test_apply_gitignore_p(self) -> None:
         """Evaluate that code files would be included after filtering with .gitignore"""
-
         files = ["file.py", "src/file.py", "src/dir/file.py"]
-
         filtered_files = FilePathExtractor.apply_gitignore(files)
-
-        assert set(filtered_files) == set(files)
+        self.assertSetEqual(set(files), set(filtered_files))
 
     def test_apply_gitignore_n(self) -> None:
         """Evaluate that .gitignore correctly filters out files from project"""
-
         with tempfile.TemporaryDirectory() as tmp_dir:
             git.Repo.init(tmp_dir)
             with open(os.path.join(tmp_dir, ".gitignore"), "w") as f:
@@ -40,53 +84,97 @@ def test_apply_gitignore_n(self) -> None:
             ]
             filtered_files = FilePathExtractor.apply_gitignore(files)
 
-        assert len(filtered_files) == 1
-        assert filtered_files[0] == os.path.join(tmp_dir, "src", "dir", "file.cpp")
-
-    @pytest.mark.parametrize("file_path", [
-        "/tmp/test/dummy.p12",
-        "C:\\Users\\RUNNER~1\\AppData\\Local\\Temp\\tmptjz2p1zk\\test\\dummy.p12",
-        "C:\\Users\\RUNNER~1\\AppData\\Local\\Temp\\tmptjz2p1zk\\TarGet\\dummy.p12",
-    ])
-    def test_check_exclude_file_p(self, config: Config, file_path: pytest.fixture) -> None:
-        config.find_by_ext = True
-        assert not FilePathExtractor.check_exclude_file(config, file_path), f"{file_path}"
-
-    @pytest.mark.parametrize("file_path", [
-        "dummy.JPG",
-        "/tmp/target/dummy.p12",
-        "C:\\Users\\RUNNER~1\\AppData\\Local\\Temp\\tmptjz2p1zk\\target\\dummy.p12",
-    ])
-    def test_check_exclude_file_n(self, config: Config, file_path: pytest.fixture) -> None:
-        config.find_by_ext = True
-        assert FilePathExtractor.check_exclude_file(config, file_path)
-
-    @pytest.mark.parametrize("file_type", [".inf", ".txt"])
-    def test_find_by_ext_file_p(self, config: Config, file_type: pytest.fixture) -> None:
-        config.find_by_ext = True
-        assert FilePathExtractor.is_find_by_ext_file(config, file_type)
-
-    @pytest.mark.parametrize("file_type", [".bmp", ".doc"])
-    def test_find_by_ext_file_n(self, config: Config, file_type: pytest.fixture) -> None:
-        assert not FilePathExtractor.is_find_by_ext_file(config, file_type)
-        config.find_by_ext = False
-        assert not FilePathExtractor.is_find_by_ext_file(config, file_type)
+        self.assertEqual(1, len(filtered_files))
+        expected_path = os.path.join(tmp_dir, "src", "dir", "file.cpp")
+        self.assertEqual(expected_path, filtered_files[0])
+
+    def assert_true_check_exclude_file(self, paths: List[str]):
+        for i in paths:
+            self.assertTrue(FilePathExtractor.check_exclude_file(self.config, i), i)
+
+    def assert_false_check_exclude_file(self, paths: List[str]):
+        for i in paths:
+            self.assertFalse(FilePathExtractor.check_exclude_file(self.config, i), i)
+
+    def test_check_exclude_file_p(self) -> None:
+        # matched only not_allowed_path_pattern
+        self.config.exclude_containers = [".gz"]
+        self.config.exclude_documents = [".pdf"]
+        self.config.exclude_extensions = [".so"]
+        self.config.exclude_paths = ["/.git/"]
+        self.config.exclude_patterns = [re.compile(r".*magic.*number.*")]
+        self.config.depth = 1
+        self.config.doc = False
+        self.assert_true_check_exclude_file(self.paths_not)
+        self.assert_true_check_exclude_file(self.paths_reg)
+        self.assert_true_check_exclude_file(self.paths_git)
+        self.assert_false_check_exclude_file(self.paths_src)
+        self.assert_false_check_exclude_file(self.paths_pak)
+        self.assert_false_check_exclude_file(self.paths_doc)
+        self.assert_true_check_exclude_file(self.paths_ext)
+
+        # pdf should be not filtered
+        self.config.depth = 0
+        self.config.doc = True
+        self.assert_true_check_exclude_file(self.paths_not)
+        self.assert_true_check_exclude_file(self.paths_reg)
+        self.assert_true_check_exclude_file(self.paths_git)
+        self.assert_false_check_exclude_file(self.paths_src)
+        self.assert_true_check_exclude_file(self.paths_pak)
+        self.assert_false_check_exclude_file(self.paths_doc)
+        self.assert_true_check_exclude_file(self.paths_ext)
+
+    def test_check_exclude_file_n(self) -> None:
+        # none of extension are in config, only not_allowed_path_pattern matches
+        self.assert_true_check_exclude_file(self.paths_not)
+        self.assert_false_check_exclude_file(self.paths_reg)
+        self.assert_false_check_exclude_file(self.paths_git)
+        self.assert_false_check_exclude_file(self.paths_src)
+        self.assert_false_check_exclude_file(self.paths_pak)
+        self.assert_false_check_exclude_file(self.paths_doc)
+        self.assert_false_check_exclude_file(self.paths_ext)
+
+        # matched only exclude_extensions
+        self.config.exclude_containers = [".gz"]
+        self.config.exclude_documents = [".pdf"]
+        self.config.exclude_extensions = [".so"]
+        self.assert_true_check_exclude_file(self.paths_not)
+        self.assert_false_check_exclude_file(self.paths_reg)
+        self.assert_false_check_exclude_file(self.paths_git)
+        self.assert_false_check_exclude_file(self.paths_src)
+        self.assert_true_check_exclude_file(self.paths_pak)
+        self.assert_true_check_exclude_file(self.paths_doc)
+        self.assert_true_check_exclude_file(self.paths_ext)
+
+    def test_find_by_ext_file_p(self) -> None:
+        self.config.find_by_ext = True
+        self.config.find_by_ext_list = [".p12", ".jpg"]
+        self.assertTrue(FilePathExtractor.is_find_by_ext_file(self.config, ".p12"))
+        self.assertTrue(FilePathExtractor.is_find_by_ext_file(self.config, ".jpg"))
+        self.assertFalse(FilePathExtractor.is_find_by_ext_file(self.config, ".bmp"))
+
+    def test_find_by_ext_file_n(self) -> None:
+        self.config.find_by_ext = False
+        self.config.find_by_ext_list = [".p12", ".bmp"]
+        self.assertFalse(FilePathExtractor.is_find_by_ext_file(self.config, ".p12"))
+        self.assertFalse(FilePathExtractor.is_find_by_ext_file(self.config, ".bmp"))
+        self.assertFalse(FilePathExtractor.is_find_by_ext_file(self.config, ".jpg"))
 
     @mock.patch("os.path.getsize")
-    def test_check_file_size_p(self, mock_getsize, config: Config) -> None:
+    def test_check_file_size_p(self, mock_getsize) -> None:
         mock_getsize.return_value = parse_size("11MiB")
-        config.size_limit = parse_size("10MiB")
-        assert FilePathExtractor.check_file_size(config, "")
+        self.config.size_limit = parse_size("10MiB")
+        self.assertTrue(FilePathExtractor.check_file_size(self.config, ""))
 
     @mock.patch("os.path.getsize")
-    def test_check_file_size_n(self, mock_getsize, config: Config) -> None:
+    def test_check_file_size_n(self, mock_getsize) -> None:
         mock_getsize.return_value = parse_size("11MiB")
-        config.size_limit = None
-        assert not FilePathExtractor.check_file_size(config, "")
-        config.size_limit = parse_size("11MiB")
-        assert not FilePathExtractor.check_file_size(config, "")
+        self.config.size_limit = None
+        self.assertFalse(FilePathExtractor.check_file_size(self.config, ""))
+        self.config.size_limit = parse_size("11MiB")
+        self.assertFalse(FilePathExtractor.check_file_size(self.config, ""))
 
-    def test_skip_symlink_n(self, config: Config) -> None:
+    def test_skip_symlink_n(self) -> None:
         with tempfile.TemporaryDirectory() as tmp_dir:
             sub_dir = os.path.join(tmp_dir, "sub_dir")
             os.mkdir(sub_dir)
@@ -103,9 +191,9 @@ def test_skip_symlink_n(self, config: Config) -> None:
             for root, dirs, files in os.walk(tmp_dir):
                 files_walked.update(files)
                 dirs_walked.update(dirs)
-            assert dirs_walked == {"sub_dir", "s_dir_link"}
-            assert files_walked == {"target", "s_link"}
+            self.assertEqual({"sub_dir", "s_dir_link"}, dirs_walked)
+            self.assertEqual({"target", "s_link"}, files_walked)
 
-            paths = FilePathExtractor.get_file_paths(config, tmp_dir)
-            assert len(paths) == 1
-            assert paths[0] == target_path
+            paths = FilePathExtractor.get_file_paths(self.config, tmp_dir)
+            self.assertEqual(1, len(paths))
+            self.assertEqual(target_path, paths[0])
diff --git a/tests/samples/password.docx b/tests/samples/password.docx
deleted file mode 100644
index 6d6db3a52..000000000
Binary files a/tests/samples/password.docx and /dev/null differ
diff --git a/tests/samples/sample.docx b/tests/samples/sample.docx
new file mode 100644
index 000000000..a8762b33f
Binary files /dev/null and b/tests/samples/sample.docx differ
diff --git a/tests/samples/sample.docx.gz b/tests/samples/sample.docx.gz
new file mode 100644
index 000000000..7c4c56012
Binary files /dev/null and b/tests/samples/sample.docx.gz differ
diff --git a/tests/samples/sample.pdf b/tests/samples/sample.pdf
index e4b0a8229..d4a1c8acf 100644
Binary files a/tests/samples/sample.pdf and b/tests/samples/sample.pdf differ
diff --git a/tests/samples/sample_bad_empty.docx b/tests/samples/sample_bad_empty.docx
new file mode 100644
index 000000000..3b60e1683
Binary files /dev/null and b/tests/samples/sample_bad_empty.docx differ
diff --git a/tests/samples/small.pdf b/tests/samples/small.pdf
new file mode 100644
index 000000000..7b6e0bf5f
Binary files /dev/null and b/tests/samples/small.pdf differ
diff --git a/tests/test_main.py b/tests/test_main.py
index 6619cc1a8..22412c89a 100644
--- a/tests/test_main.py
+++ b/tests/test_main.py
@@ -463,9 +463,9 @@ def test_pdf_p(self) -> None:
         cred_sweeper = CredSweeper(depth=33)
         cred_sweeper.run(content_provider=content_provider)
         found_credentials = cred_sweeper.credential_manager.get_credentials()
-        self.assertEqual(2, len(found_credentials))
-        self.assertSetEqual({"AWS Client ID", "Password"}, set(i.rule_name for i in found_credentials))
-        self.assertSetEqual({"Xdj@jcN834b", "AKIAGIREOGIAWSKEY123"},
+        self.assertEqual(3, len(found_credentials))
+        self.assertSetEqual({"AWS Client ID", "Password", "Github Token"}, set(i.rule_name for i in found_credentials))
+        self.assertSetEqual({"Xdj@jcN834b", "AKIAGIREOGIAWSKEY123", "ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2CLN230RP1I8Vd"},
                             set(i.line_data_list[0].value for i in found_credentials))
 
     # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
@@ -556,19 +556,20 @@ def test_encoded_p(self) -> None:
 
     def test_docx_p(self) -> None:
         # test for finding credentials in docx
-        content_provider: FilesProvider = TextProvider([SAMPLES_PATH / "password.docx"])
-        cred_sweeper = CredSweeper(depth=5)
+        content_provider: FilesProvider = TextProvider([SAMPLES_PATH / "sample.docx"])
+        cred_sweeper = CredSweeper(doc=True)
         cred_sweeper.run(content_provider=content_provider)
         found_credentials = cred_sweeper.credential_manager.get_credentials()
-        self.assertEqual(1, len(found_credentials))
-        self.assertEqual("Xdj@jcN834b.", found_credentials[0].line_data_list[0].value)
+        self.assertEqual(2, len(found_credentials))
+        self.assertEqual("WeR15tr0n6", found_credentials[0].line_data_list[0].value)
+        self.assertEqual("ghr_Ku7ikDwqD1Ge2u3Wf1UM3z2SLN230RP1I8Wf", found_credentials[1].line_data_list[0].value)
 
     # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
 
     def test_docx_n(self) -> None:
-        # test docx  - no credential should be found without 'depth'
-        content_provider: FilesProvider = TextProvider([SAMPLES_PATH / "password.docx"])
-        cred_sweeper = CredSweeper()
+        # test docx  - no credential should be found without 'doc'
+        content_provider: FilesProvider = TextProvider([SAMPLES_PATH / "sample.docx"])
+        cred_sweeper = CredSweeper(doc=False)
         cred_sweeper.run(content_provider=content_provider)
         found_credentials = cred_sweeper.credential_manager.get_credentials()
         self.assertEqual(0, len(found_credentials))