From 8635eff81a89a3ada51441188848b71b530fe828 Mon Sep 17 00:00:00 2001 From: shelld3v <59408894+shelld3v@users.noreply.github.com> Date: Tue, 22 Oct 2024 15:46:10 +0700 Subject: [PATCH 1/5] Optimize wildcard filter --- CONTRIBUTORS.md | 1 - lib/core/settings.py | 2 -- lib/utils/diff.py | 35 ++++++++++++++++++++++------------- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 5391c4f4a..ab2c275a3 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -71,7 +71,6 @@ - [at0m](https://github.com/atomiczsec/) - [junmoka](https://github.com/junmoka) - [Akshay Ravi](https://www.linkedin.com/in/c09yc47/) -- [kosyan62](https://https://github.com/kosyan62) - [Maxence Zolnieurck](https://github.com/mxcezl) - [Giorgos Drosos](https://github.com/gdrosos) - [huyphan](https://github.com/huyphan) diff --git a/lib/core/settings.py b/lib/core/settings.py index 7d663b107..e24216906 100755 --- a/lib/core/settings.py +++ b/lib/core/settings.py @@ -115,8 +115,6 @@ RATE_UPDATE_DELAY = 0.15 -MAX_MATCH_RATIO = 0.98 - ITER_CHUNK_SIZE = 1024 * 1024 MAX_RESPONSE_SIZE = 80 * 1024 * 1024 diff --git a/lib/utils/diff.py b/lib/utils/diff.py index f78f6d7be..aba450221 100755 --- a/lib/utils/diff.py +++ b/lib/utils/diff.py @@ -19,7 +19,7 @@ import difflib import re -from lib.core.settings import MAX_MATCH_RATIO +from lib.utils.common import lstrip_once class DynamicContentParser: @@ -38,20 +38,29 @@ def compare_to(self, content): """ DynamicContentParser.compare_to() workflow - 1. Check if the wildcard response is static or not, if yes, compare 2 responses - 2. If it's not static, get static patterns (splitting by space) in both responses - and check if they match - 3. In some rare cases, checking static patterns fails, so make a final confirmation - if the similarity ratio of 2 responses is not high enough to prove they are the same + 1. Check if the wildcard response is static or not, if yes, compare two responses. + 2. If it's not static, get static patterns (splitted by space) and check if the response + has all of them. + 3. In some cases, checking static patterns isn't reliable enough, so we check the similarity + ratio of the two responses. """ - if self._is_static and content == self._base_content: - return True + if self._is_static: + return content == self._base_content - diff = self._differ.compare(self._base_content.split(), content.split()) - static_patterns_are_matched = self._static_patterns == self.get_static_patterns(diff) - match_ratio = difflib.SequenceMatcher(None, self._base_content, content).ratio() - return static_patterns_are_matched or match_ratio > MAX_MATCH_RATIO + i = -1 + splitted_content = content.split() + for pattern in self._static_patterns: + try: + i = splitted_content.index(pattern, i + 1) + except IndexError: + return False + + # The number of static patterns is not big enough to say it's a reliable method + if len(self._static_patterns) < 20: + return difflib.SequenceMatcher(None, self._base_content, content).ratio() > 0.75 + + return True @staticmethod def get_static_patterns(patterns): @@ -59,7 +68,7 @@ def get_static_patterns(patterns): # [" str1", "- str2", "+ str3", " str4"] # # Get only stable patterns in the contents - return [pattern for pattern in patterns if pattern.startswith(" ")] + return [lstrip_once(pattern, " ") for pattern in patterns if pattern.startswith(" ")] def generate_matching_regex(string1: str, string2: str) -> str: From 9f4412c06d53e23e8e6cc32c2e4ce2801192ede7 Mon Sep 17 00:00:00 2001 From: shelld3v <59408894+shelld3v@users.noreply.github.com> Date: Tue, 22 Oct 2024 16:24:43 +0700 Subject: [PATCH 2/5] Fix --- .github/workflows/ci.yml | 2 +- config.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a0642d664..da398e833 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: pip install codespell flake8 -r requirements.txt - name: Test run: | - python3 dirsearch.py -w ./tests/static/wordlist.txt -u https://example.com -o "tmp_report.${extension}" -O json,xml,plain,csv,md,sqlite,html --force-recursive -R 3 --full-url -q + python3 dirsearch.py -w ./tests/static/wordlist.txt -u https://example.com -o "tmp_report.{extension}" -O json,xml,plain,csv,md,sqlite,html --force-recursive -R 3 --full-url -q python3 dirsearch.py -w ./tests/static/wordlist.txt -l ./tests/static/targets.txt --subdirs /,admin/ --exclude-extensions conf -q -L -f -i 200 --user-agent a --log tmp_log.log python3 dirsearch.py -w ./tests/static/wordlist.txt --nmap-report ./tests/static/nmap.xml --max-rate 2 -H K:V --random-agent --overwrite-extensions --no-color python3 dirsearch.py -w ./tests/static/wordlist.txt --raw ./tests/static/raw.txt --prefixes . --suffixes ~ --skip-on-status 404 -m POST -d test=1 --crawl --min-response-size 9 diff --git a/config.ini b/config.ini index afafe4e0f..d2ebc13d6 100644 --- a/config.ini +++ b/config.ini @@ -28,7 +28,7 @@ exit-on-error = False #skip-on-status = 429,999 [dictionary] -default-extensions = php,aspx,jsp,html,js +default-extensions = php,asp,aspx,jsp,html,htm force-extensions = False overwrite-extensions = False lowercase = False From 68aa9944262ef8d9cead6ecd9ae374cfe476c292 Mon Sep 17 00:00:00 2001 From: shelld3v <59408894+shelld3v@users.noreply.github.com> Date: Tue, 22 Oct 2024 16:30:09 +0700 Subject: [PATCH 3/5] Switch back to xml.etree instead of defusedxml --- lib/report/xml_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/report/xml_report.py b/lib/report/xml_report.py index 7a7ea1e0e..c540f4d55 100755 --- a/lib/report/xml_report.py +++ b/lib/report/xml_report.py @@ -16,7 +16,7 @@ # # Author: Mauro Soria -from defusedxml import ElementTree as ET +from xml.etree import ElementTree as ET from lib.core.decorators import locked from lib.core.settings import ( From 2a049fb412c575a5260a491cdefa60f580e32366 Mon Sep 17 00:00:00 2001 From: shelld3v <59408894+shelld3v@users.noreply.github.com> Date: Tue, 22 Oct 2024 18:28:00 +0700 Subject: [PATCH 4/5] Fix and pump minimum python version to 3.9 --- .github/workflows/ci.yml | 2 +- README.md | 2 +- dirsearch.py | 2 +- lib/utils/diff.py | 2 +- setup.py | 4 ++-- tests/utils/test_diff.py | 5 +++-- 6 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index da398e833..3b01f1500 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -9,7 +9,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [3.8, 3.9] + python-version: [3.9, 3.11] os: ['ubuntu-latest', 'windows-latest'] steps: diff --git a/README.md b/README.md index e1dc40796..f85cd6e99 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ Table of Contents Installation & Usage ------------ -**Requirement: python 3.8 or higher** +**Requirement: python 3.9 or higher** Choose one of these installation options: diff --git a/dirsearch.py b/dirsearch.py index 05af81c1f..83233625d 100755 --- a/dirsearch.py +++ b/dirsearch.py @@ -27,7 +27,7 @@ from lib.core.settings import OPTIONS_FILE from lib.parse.config import ConfigParser -if sys.version_info < (3, 8): +if sys.version_info < (3, 9): sys.stderr.write("Sorry, dirsearch requires Python 3.8 or higher\n") sys.exit(1) diff --git a/lib/utils/diff.py b/lib/utils/diff.py index aba450221..6621dfd39 100755 --- a/lib/utils/diff.py +++ b/lib/utils/diff.py @@ -57,7 +57,7 @@ def compare_to(self, content): return False # The number of static patterns is not big enough to say it's a reliable method - if len(self._static_patterns) < 20: + if len(self._static_patterns) < 20 and len(content.split()) > len(self._base_content.split()): return difflib.SequenceMatcher(None, self._base_content, content).ratio() > 0.75 return True diff --git a/setup.py b/setup.py index bc7cfc127..99ec69137 100755 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ entry_points={"console_scripts": ["dirsearch=dirsearch.dirsearch:main"]}, package_data={"dirsearch": ["*", "db/*"]}, include_package_data=True, - python_requires=">=3.8", + python_requires=">=3.9", install_requires=get_dependencies(), classifiers=[ "Programming Language :: Python", @@ -39,7 +39,7 @@ "License :: OSI Approved :: GNU General Public License v2 (GPLv2)", "Operating System :: OS Independent", "Topic :: Security", - "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", ], keywords=["infosec", "bug bounty", "pentesting", "security"], ) diff --git a/tests/utils/test_diff.py b/tests/utils/test_diff.py index 769b87f63..c21e1a004 100755 --- a/tests/utils/test_diff.py +++ b/tests/utils/test_diff.py @@ -26,5 +26,6 @@ def test_generate_matching_regex(self): self.assertEqual(generate_matching_regex("add.php", "abc.php"), "^a.*\\.php$", "Matching regex isn't correct") def test_dynamic_content_parser(self): - self.assertEqual(DynamicContentParser("a b c", "a b d")._static_patterns, [" a", " b"], "Static patterns are not right") - self.assertTrue(DynamicContentParser("a b c", "a b d").compare_to("a b ef")) + self.assertEqual(DynamicContentParser("a b c", "a b d")._static_patterns, ["a", "b"], "Static patterns are not right") + self.assertTrue(DynamicContentParser("abc.php not found", "def.php not found").compare_to("nothing.php not found")) + self.assertTrue(DynamicContentParser("abc.php not found", "def.php not found").compare_to("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.php not found")) From 7401e997276aba7312533260535063744b37b704 Mon Sep 17 00:00:00 2001 From: shelld3v <59408894+shelld3v@users.noreply.github.com> Date: Tue, 22 Oct 2024 18:33:25 +0700 Subject: [PATCH 5/5] Codespell --- lib/utils/diff.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/utils/diff.py b/lib/utils/diff.py index 6621dfd39..d0249df73 100755 --- a/lib/utils/diff.py +++ b/lib/utils/diff.py @@ -39,7 +39,7 @@ def compare_to(self, content): DynamicContentParser.compare_to() workflow 1. Check if the wildcard response is static or not, if yes, compare two responses. - 2. If it's not static, get static patterns (splitted by space) and check if the response + 2. If it's not static, get static patterns (split by space) and check if the response has all of them. 3. In some cases, checking static patterns isn't reliable enough, so we check the similarity ratio of the two responses.