Skip to content

Commit

Permalink
Merge pull request #1421 from maurosoria/wildcard
Browse files Browse the repository at this point in the history
Optimize wildcard filter
  • Loading branch information
maurosoria authored Oct 23, 2024
2 parents 3446e17 + 7401e99 commit 4f797a2
Show file tree
Hide file tree
Showing 10 changed files with 33 additions and 26 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
strategy:
fail-fast: false
matrix:
python-version: [3.8, 3.9]
python-version: [3.9, 3.11]
os: ['ubuntu-latest', 'windows-latest']

steps:
Expand All @@ -24,7 +24,7 @@ jobs:
pip install codespell flake8 -r requirements.txt
- name: Test
run: |
python3 dirsearch.py -w ./tests/static/wordlist.txt -u https://example.com -o "tmp_report.${extension}" -O json,xml,plain,csv,md,sqlite,html --force-recursive -R 3 --full-url -q
python3 dirsearch.py -w ./tests/static/wordlist.txt -u https://example.com -o "tmp_report.{extension}" -O json,xml,plain,csv,md,sqlite,html --force-recursive -R 3 --full-url -q
python3 dirsearch.py -w ./tests/static/wordlist.txt -l ./tests/static/targets.txt --subdirs /,admin/ --exclude-extensions conf -q -L -f -i 200 --user-agent a --log tmp_log.log
python3 dirsearch.py -w ./tests/static/wordlist.txt --nmap-report ./tests/static/nmap.xml --max-rate 2 -H K:V --random-agent --overwrite-extensions --no-color
python3 dirsearch.py -w ./tests/static/wordlist.txt --raw ./tests/static/raw.txt --prefixes . --suffixes ~ --skip-on-status 404 -m POST -d test=1 --crawl --min-response-size 9
Expand Down
1 change: 0 additions & 1 deletion CONTRIBUTORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@
- [at0m](https://github.com/atomiczsec/)
- [junmoka](https://github.com/junmoka)
- [Akshay Ravi](https://www.linkedin.com/in/c09yc47/)
- [kosyan62](https://https://github.com/kosyan62)
- [Maxence Zolnieurck](https://github.com/mxcezl)
- [Giorgos Drosos](https://github.com/gdrosos)
- [huyphan](https://github.com/huyphan)
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ Table of Contents
Installation & Usage
------------

**Requirement: python 3.8 or higher**
**Requirement: python 3.9 or higher**

Choose one of these installation options:

Expand Down
2 changes: 1 addition & 1 deletion config.ini
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ exit-on-error = False
#skip-on-status = 429,999

[dictionary]
default-extensions = php,aspx,jsp,html,js
default-extensions = php,asp,aspx,jsp,html,htm
force-extensions = False
overwrite-extensions = False
lowercase = False
Expand Down
2 changes: 1 addition & 1 deletion dirsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from lib.core.settings import OPTIONS_FILE
from lib.parse.config import ConfigParser

if sys.version_info < (3, 8):
if sys.version_info < (3, 9):
sys.stderr.write("Sorry, dirsearch requires Python 3.8 or higher\n")
sys.exit(1)

Expand Down
2 changes: 0 additions & 2 deletions lib/core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,6 @@

RATE_UPDATE_DELAY = 0.15

MAX_MATCH_RATIO = 0.98

ITER_CHUNK_SIZE = 1024 * 1024

MAX_RESPONSE_SIZE = 80 * 1024 * 1024
Expand Down
2 changes: 1 addition & 1 deletion lib/report/xml_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#
# Author: Mauro Soria

from defusedxml import ElementTree as ET
from xml.etree import ElementTree as ET

from lib.core.decorators import locked
from lib.core.settings import (
Expand Down
35 changes: 22 additions & 13 deletions lib/utils/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import difflib
import re

from lib.core.settings import MAX_MATCH_RATIO
from lib.utils.common import lstrip_once


class DynamicContentParser:
Expand All @@ -38,28 +38,37 @@ def compare_to(self, content):
"""
DynamicContentParser.compare_to() workflow
1. Check if the wildcard response is static or not, if yes, compare 2 responses
2. If it's not static, get static patterns (splitting by space) in both responses
and check if they match
3. In some rare cases, checking static patterns fails, so make a final confirmation
if the similarity ratio of 2 responses is not high enough to prove they are the same
1. Check if the wildcard response is static or not, if yes, compare two responses.
2. If it's not static, get static patterns (split by space) and check if the response
has all of them.
3. In some cases, checking static patterns isn't reliable enough, so we check the similarity
ratio of the two responses.
"""

if self._is_static and content == self._base_content:
return True
if self._is_static:
return content == self._base_content

diff = self._differ.compare(self._base_content.split(), content.split())
static_patterns_are_matched = self._static_patterns == self.get_static_patterns(diff)
match_ratio = difflib.SequenceMatcher(None, self._base_content, content).ratio()
return static_patterns_are_matched or match_ratio > MAX_MATCH_RATIO
i = -1
splitted_content = content.split()
for pattern in self._static_patterns:
try:
i = splitted_content.index(pattern, i + 1)
except IndexError:
return False

# The number of static patterns is not big enough to say it's a reliable method
if len(self._static_patterns) < 20 and len(content.split()) > len(self._base_content.split()):
return difflib.SequenceMatcher(None, self._base_content, content).ratio() > 0.75

return True

@staticmethod
def get_static_patterns(patterns):
# difflib.Differ.compare returns something like below:
# [" str1", "- str2", "+ str3", " str4"]
#
# Get only stable patterns in the contents
return [pattern for pattern in patterns if pattern.startswith(" ")]
return [lstrip_once(pattern, " ") for pattern in patterns if pattern.startswith(" ")]


def generate_matching_regex(string1: str, string2: str) -> str:
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
entry_points={"console_scripts": ["dirsearch=dirsearch.dirsearch:main"]},
package_data={"dirsearch": ["*", "db/*"]},
include_package_data=True,
python_requires=">=3.8",
python_requires=">=3.9",
install_requires=get_dependencies(),
classifiers=[
"Programming Language :: Python",
Expand All @@ -39,7 +39,7 @@
"License :: OSI Approved :: GNU General Public License v2 (GPLv2)",
"Operating System :: OS Independent",
"Topic :: Security",
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
],
keywords=["infosec", "bug bounty", "pentesting", "security"],
)
5 changes: 3 additions & 2 deletions tests/utils/test_diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,6 @@ def test_generate_matching_regex(self):
self.assertEqual(generate_matching_regex("add.php", "abc.php"), "^a.*\\.php$", "Matching regex isn't correct")

def test_dynamic_content_parser(self):
self.assertEqual(DynamicContentParser("a b c", "a b d")._static_patterns, [" a", " b"], "Static patterns are not right")
self.assertTrue(DynamicContentParser("a b c", "a b d").compare_to("a b ef"))
self.assertEqual(DynamicContentParser("a b c", "a b d")._static_patterns, ["a", "b"], "Static patterns are not right")
self.assertTrue(DynamicContentParser("abc.php not found", "def.php not found").compare_to("nothing.php not found"))
self.assertTrue(DynamicContentParser("abc.php not found", "def.php not found").compare_to("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz.php not found"))

0 comments on commit 4f797a2

Please sign in to comment.