Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize wildcard filter #1421

Merged
merged 5 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [3.8, 3.9]
python-version: [3.9, 3.11]
os: ['ubuntu-latest', 'windows-latest']

steps:
Expand All @@ -24,7 +24,7 @@ jobs:
pip install codespell flake8 -r requirements.txt
- name: Test
run: |
python3 dirsearch.py -w ./tests/static/wordlist.txt -u https://example.com -o "tmp_report.${extension}" -O json,xml,plain,csv,md,sqlite,html --force-recursive -R 3 --full-url -q
python3 dirsearch.py -w ./tests/static/wordlist.txt -u https://example.com -o "tmp_report.{extension}" -O json,xml,plain,csv,md,sqlite,html --force-recursive -R 3 --full-url -q
python3 dirsearch.py -w ./tests/static/wordlist.txt -l ./tests/static/targets.txt --subdirs /,admin/ --exclude-extensions conf -q -L -f -i 200 --user-agent a --log tmp_log.log
python3 dirsearch.py -w ./tests/static/wordlist.txt --nmap-report ./tests/static/nmap.xml --max-rate 2 -H K:V --random-agent --overwrite-extensions --no-color
python3 dirsearch.py -w ./tests/static/wordlist.txt --raw ./tests/static/raw.txt --prefixes . --suffixes ~ --skip-on-status 404 -m POST -d test=1 --crawl --min-response-size 9
Expand Down
1 change: 0 additions & 1 deletion CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@
- [at0m](https://github.com/atomiczsec/)
- [junmoka](https://github.com/junmoka)
- [Akshay Ravi](https://www.linkedin.com/in/c09yc47/)
- [kosyan62](https://https://github.com/kosyan62)
- [Maxence Zolnieurck](https://github.com/mxcezl)
- [Giorgos Drosos](https://github.com/gdrosos)
- [huyphan](https://github.com/huyphan)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ Table of Contents
Installation & Usage
------------

**Requirement: python 3.8 or higher**
**Requirement: python 3.9 or higher**

Choose one of these installation options:

Expand Down
2 changes: 1 addition & 1 deletion config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ exit-on-error = False
#skip-on-status = 429,999

[dictionary]
default-extensions = php,aspx,jsp,html,js
default-extensions = php,asp,aspx,jsp,html,htm
force-extensions = False
overwrite-extensions = False
lowercase = False
Expand Down
2 changes: 1 addition & 1 deletion dirsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from lib.core.settings import OPTIONS_FILE
from lib.parse.config import ConfigParser

if sys.version_info < (3, 8):
if sys.version_info < (3, 9):
sys.stderr.write("Sorry, dirsearch requires Python 3.8 or higher\n")
sys.exit(1)

Expand Down
2 changes: 0 additions & 2 deletions lib/core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,6 @@

RATE_UPDATE_DELAY = 0.15

MAX_MATCH_RATIO = 0.98

ITER_CHUNK_SIZE = 1024 * 1024

MAX_RESPONSE_SIZE = 80 * 1024 * 1024
Expand Down
2 changes: 1 addition & 1 deletion lib/report/xml_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#
# Author: Mauro Soria

from defusedxml import ElementTree as ET
from xml.etree import ElementTree as ET

from lib.core.decorators import locked
from lib.core.settings import (
Expand Down
35 changes: 22 additions & 13 deletions lib/utils/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import difflib
import re

from lib.core.settings import MAX_MATCH_RATIO
from lib.utils.common import lstrip_once


class DynamicContentParser:
Expand All @@ -38,28 +38,37 @@ def compare_to(self, content):
"""
DynamicContentParser.compare_to() workflow

1. Check if the wildcard response is static or not, if yes, compare 2 responses
2. If it's not static, get static patterns (splitting by space) in both responses
and check if they match
3. In some rare cases, checking static patterns fails, so make a final confirmation
if the similarity ratio of 2 responses is not high enough to prove they are the same
1. Check if the wildcard response is static or not, if yes, compare two responses.
2. If it's not static, get static patterns (split by space) and check if the response
has all of them.
3. In some cases, checking static patterns isn't reliable enough, so we check the similarity
ratio of the two responses.
"""

if self._is_static and content == self._base_content:
return True
if self._is_static:
return content == self._base_content

diff = self._differ.compare(self._base_content.split(), content.split())
static_patterns_are_matched = self._static_patterns == self.get_static_patterns(diff)
match_ratio = difflib.SequenceMatcher(None, self._base_content, content).ratio()
return static_patterns_are_matched or match_ratio > MAX_MATCH_RATIO
i = -1
splitted_content = content.split()
for pattern in self._static_patterns:
try:
i = splitted_content.index(pattern, i + 1)
except IndexError:
return False

# The number of static patterns is not big enough to say it's a reliable method
if len(self._static_patterns) < 20 and len(content.split()) > len(self._base_content.split()):
return difflib.SequenceMatcher(None, self._base_content, content).ratio() > 0.75

return True

@staticmethod
def get_static_patterns(patterns):
# difflib.Differ.compare returns something like below:
# [" str1", "- str2", "+ str3", " str4"]
#
# Get only stable patterns in the contents
return [pattern for pattern in patterns if pattern.startswith(" ")]
return [lstrip_once(pattern, " ") for pattern in patterns if pattern.startswith(" ")]


def generate_matching_regex(string1: str, string2: str) -> str:
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
entry_points={"console_scripts": ["dirsearch=dirsearch.dirsearch:main"]},
package_data={"dirsearch": ["*", "db/*"]},
include_package_data=True,
python_requires=">=3.8",
python_requires=">=3.9",
install_requires=get_dependencies(),
classifiers=[
"Programming Language :: Python",
Expand All @@ -39,7 +39,7 @@
"License :: OSI Approved :: GNU General Public License v2 (GPLv2)",
"Operating System :: OS Independent",
"Topic :: Security",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
],
keywords=["infosec", "bug bounty", "pentesting", "security"],
)
5 changes: 3 additions & 2 deletions tests/utils/test_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,6 @@ def test_generate_matching_regex(self):
self.assertEqual(generate_matching_regex("add.php", "abc.php"), "^a.*\\.php$", "Matching regex isn't correct")

def test_dynamic_content_parser(self):
self.assertEqual(DynamicContentParser("a b c", "a b d")._static_patterns, [" a", " b"], "Static patterns are not right")
self.assertTrue(DynamicContentParser("a b c", "a b d").compare_to("a b ef"))
self.assertEqual(DynamicContentParser("a b c", "a b d")._static_patterns, ["a", "b"], "Static patterns are not right")
self.assertTrue(DynamicContentParser("abc.php not found", "def.php not found").compare_to("nothing.php not found"))
self.assertTrue(DynamicContentParser("abc.php not found", "def.php not found").compare_to("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.php not found"))
Loading