From 37a510cc0708ed908313c96f8ade8c490a205224 Mon Sep 17 00:00:00 2001 From: valentijnscholten Date: Sat, 16 Dec 2023 19:01:41 +0100 Subject: [PATCH 1/6] Allow commenting out of urls in url file --- lib/controller/controller.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/controller/controller.py b/lib/controller/controller.py index 8fe26f459..1174d199d 100755 --- a/lib/controller/controller.py +++ b/lib/controller/controller.py @@ -211,6 +211,10 @@ def run(self): while options["urls"]: url = options["urls"][0] + if url.startswith('#'): + options["urls"].pop(0) + continue + self.fuzzer = Fuzzer( self.requester, self.dictionary, From 6549d303e404c3fda957c8f3ed7eb5fbe7830c44 Mon Sep 17 00:00:00 2001 From: valentijnscholten Date: Sat, 16 Dec 2023 19:03:21 +0100 Subject: [PATCH 2/6] Update CONTRIBUTORS.md --- CONTRIBUTORS.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 9945012ea..719507311 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -77,6 +77,7 @@ - [huyphan](https://github.com/huyphan) - [Sean Wei](https://www.sean.taipei/about-en) - [FantasqueX](https://www.github.com/FantasqueX) +- [Valentijn Scholten](https://www.github.com/valentijnscholten) Special thanks to all the people who are named here! From c81ab2e628bfa91e72ab9e8c10dbd11ace7db14f Mon Sep 17 00:00:00 2001 From: valentijnscholten Date: Sat, 16 Dec 2023 19:03:46 +0100 Subject: [PATCH 3/6] Update CHANGELOG.md --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9d7947406..7fbae2559 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,8 @@ ## [Unreleased] - Support non-default network interface - +- Allow commenting out of urls in url file +- - Remove unused dependencies (urllib3, cryptography, cffi, idna, chardet) ## [0.4.3] - October 2nd, 2022 From 3a450ad7c350be5994c605c377eb78648113aede Mon Sep 17 00:00:00 2001 From: shelld3v <59408894+shelld3v@users.noreply.github.com> Date: Sat, 30 Dec 2023 17:12:10 +0700 Subject: [PATCH 4/6] Handle comments in URL list --- CHANGELOG.md | 2 -- lib/controller/controller.py | 4 ---- lib/core/options.py | 19 +++++++++++-------- lib/utils/common.py | 14 ++++++++++++-- tests/utils/test_common.py | 6 +++--- 5 files changed, 26 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7fbae2559..c7eaf835e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,8 +2,6 @@ ## [Unreleased] - Support non-default network interface -- Allow commenting out of urls in url file -- - Remove unused dependencies (urllib3, cryptography, cffi, idna, chardet) ## [0.4.3] - October 2nd, 2022 diff --git a/lib/controller/controller.py b/lib/controller/controller.py index 1174d199d..8fe26f459 100755 --- a/lib/controller/controller.py +++ b/lib/controller/controller.py @@ -211,10 +211,6 @@ def run(self): while options["urls"]: url = options["urls"][0] - if url.startswith('#'): - options["urls"].pop(0) - continue - self.fuzzer = Fuzzer( self.requester, self.dictionary, diff --git a/lib/core/options.py b/lib/core/options.py index 16c3c2c08..28362719a 100755 --- a/lib/core/options.py +++ b/lib/core/options.py @@ -26,7 +26,7 @@ from lib.parse.cmdline import parse_arguments from lib.parse.config import ConfigParser from lib.parse.headers import HeadersParser -from lib.utils.common import iprange, read_stdin, uniq +from lib.utils.common import iprange, read_stdin, strip_and_uniquify from lib.utils.file import File, FileUtils @@ -52,7 +52,10 @@ def parse_options(): exit(1) if not opt.raw_file: - opt.urls = uniq(opt.urls) + opt.urls = filter( + lambda url: not url.startswith("#"), + strip_and_uniquify(opt.urls), + ) if not opt.extensions and not opt.remove_extensions: print("WARNING: No extension was specified!") @@ -113,8 +116,8 @@ def parse_options(): opt.exclude_status_codes = _parse_status_codes(opt.exclude_status_codes) opt.recursion_status_codes = _parse_status_codes(opt.recursion_status_codes) opt.skip_on_status = _parse_status_codes(opt.skip_on_status) - opt.prefixes = uniq([prefix.strip() for prefix in opt.prefixes.split(",") if prefix], tuple) - opt.suffixes = uniq([suffix.strip() for suffix in opt.suffixes.split(",") if suffix], tuple) + opt.prefixes = strip_and_uniquify(opt.prefixes.split(","), tuple) + opt.suffixes = strip_and_uniquify(opt.suffixes.split(","), tuple) opt.subdirs = [ subdir.lstrip(" /") + ("" if not subdir or subdir.endswith("/") else "/") for subdir in opt.subdirs.split(",") @@ -134,14 +137,14 @@ def parse_options(): "extension or enclose it in double quotes") exit(0) else: - opt.extensions = uniq( - [extension.lstrip(" .") for extension in opt.extensions.split(",")], + opt.extensions = strip_and_uniquify( + [extension.lstrip(".") for extension in opt.extensions.split(",")], tuple, ) - opt.exclude_extensions = uniq( + opt.exclude_extensions = strip_and_uniquify( [ - exclude_extension.lstrip(" .") + exclude_extension.lstrip(".") for exclude_extension in opt.exclude_extensions.split(",") ], tuple ) diff --git a/lib/utils/common.py b/lib/utils/common.py index 28dc2e6b2..3648569e2 100644 --- a/lib/utils/common.py +++ b/lib/utils/common.py @@ -19,6 +19,7 @@ import os import sys +from functools import reduce from ipaddress import IPv4Network, IPv6Network from urllib.parse import quote, urljoin @@ -42,8 +43,17 @@ def safequote(string_): return quote(string_, safe=URL_SAFE_CHARS) -def uniq(array, type_=list): - return type_(filter(None, dict.fromkeys(array))) +def _strip_and_uniquify_callback(array, item): + item = item.strip() + if not item or item in array: + return array + + return array + [item] + + +# Strip values and remove duplicates from a list, respect the order +def strip_and_uniquify(array, type_=list): + return type_(reduce(_strip_and_uniquify_callback, array, [])) def lstrip_once(string, pattern): diff --git a/tests/utils/test_common.py b/tests/utils/test_common.py index d91aee1e2..82a3dc7d7 100755 --- a/tests/utils/test_common.py +++ b/tests/utils/test_common.py @@ -18,12 +18,12 @@ from unittest import TestCase -from lib.utils.common import merge_path, uniq, get_valid_filename +from lib.utils.common import merge_path, strip_and_uniquify, get_valid_filename class TestCommonUtils(TestCase): - def test_uniq(self): - self.assertEqual(uniq(["foo", "bar", "foo"]), ["foo", "bar"], "The result is not unique or in wrong order") + def test_strip_and_uniquify(self): + self.assertEqual(strip_and_uniquify(["foo", "bar", " bar ", "foo"]), ["foo", "bar"], "The results are not stripped or contain duplicates or in wrong order") def test_get_valid_filename(self): self.assertEqual(get_valid_filename("http://example.com:80/foobar"), "http___example.com_80_foobar", "Invalid filename for Windows") From a4cb38832939bcd979b26225dd6c733d11a7ab6a Mon Sep 17 00:00:00 2001 From: shelld3v <59408894+shelld3v@users.noreply.github.com> Date: Sat, 30 Dec 2023 17:37:48 +0700 Subject: [PATCH 5/6] Fixes --- lib/core/options.py | 99 ++++++++++++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 37 deletions(-) diff --git a/lib/core/options.py b/lib/core/options.py index 28362719a..37275f2d9 100755 --- a/lib/core/options.py +++ b/lib/core/options.py @@ -52,9 +52,11 @@ def parse_options(): exit(1) if not opt.raw_file: - opt.urls = filter( - lambda url: not url.startswith("#"), - strip_and_uniquify(opt.urls), + opt.urls = strip_and_uniquify( + filter( + lambda url: not url.startswith("#"), + opt.urls, + ) ) if not opt.extensions and not opt.remove_extensions: @@ -116,16 +118,20 @@ def parse_options(): opt.exclude_status_codes = _parse_status_codes(opt.exclude_status_codes) opt.recursion_status_codes = _parse_status_codes(opt.recursion_status_codes) opt.skip_on_status = _parse_status_codes(opt.skip_on_status) - opt.prefixes = strip_and_uniquify(opt.prefixes.split(","), tuple) - opt.suffixes = strip_and_uniquify(opt.suffixes.split(","), tuple) - opt.subdirs = [ - subdir.lstrip(" /") + ("" if not subdir or subdir.endswith("/") else "/") - for subdir in opt.subdirs.split(",") - ] - opt.exclude_subdirs = [ - subdir.lstrip(" /") + ("" if not subdir or subdir.endswith("/") else "/") - for subdir in opt.exclude_subdirs.split(",") - ] + opt.prefixes = tuple(strip_and_uniquify(opt.prefixes.split(","))) + opt.suffixes = tuple(strip_and_uniquify(opt.suffixes.split(","))) + opt.subdirs = strip_and_uniquify( + [ + subdir.lstrip("/") + ("" if not subdir or subdir.endswith("/") else "/") + for subdir in opt.subdirs.split(",") + ] + ) + opt.exclude_subdirs = strip_and_uniquify( + [ + subdir.lstrip("/") + ("" if not subdir or subdir.endswith("/") else "/") + for subdir in opt.exclude_subdirs.split(",") + ] + ) opt.exclude_sizes = {size.strip().upper() for size in opt.exclude_sizes.split(",")} if opt.remove_extensions: @@ -133,20 +139,25 @@ def parse_options(): elif opt.extensions == "*": opt.extensions = COMMON_EXTENSIONS elif opt.extensions == "CHANGELOG.md": - print("A weird extension was provided: 'CHANGELOG.md'. Please do not use * as the " - "extension or enclose it in double quotes") + print( + "A weird extension was provided: 'CHANGELOG.md'. Please do not use * as the " + "extension or enclose it in double quotes" + ) exit(0) else: - opt.extensions = strip_and_uniquify( - [extension.lstrip(".") for extension in opt.extensions.split(",")], - tuple, + opt.extensions = tuple( + strip_and_uniquify( + [extension.lstrip(".") for extension in opt.extensions.split(",")] + ) ) - opt.exclude_extensions = strip_and_uniquify( - [ - exclude_extension.lstrip(".") - for exclude_extension in opt.exclude_extensions.split(",") - ], tuple + opt.exclude_extensions = tuple( + strip_and_uniquify( + [ + exclude_extension.lstrip(".") + for exclude_extension in opt.exclude_extensions.split(",") + ] + ) ) if opt.auth and not opt.auth_type: @@ -156,18 +167,24 @@ def parse_options(): print("No authentication credential found") exit(1) elif opt.auth and opt.auth_type not in AUTHENTICATION_TYPES: - print(f"'{opt.auth_type}' is not in available authentication " - f"types: {', '.join(AUTHENTICATION_TYPES)}") + print( + f"'{opt.auth_type}' is not in available authentication " + f"types: {', '.join(AUTHENTICATION_TYPES)}" + ) exit(1) if set(opt.extensions).intersection(opt.exclude_extensions): - print("Exclude extension list can not contain any extension " - "that has already in the extension list") + print( + "Exclude extension list can not contain any extension " + "that has already in the extension list" + ) exit(1) if opt.output_format not in OUTPUT_FORMATS: - print("Select one of the following output formats: " - f"{', '.join(OUTPUT_FORMATS)}") + print( + "Select one of the following output formats: " + f"{', '.join(OUTPUT_FORMATS)}" + ) exit(1) return vars(opt) @@ -215,17 +232,19 @@ def parse_config(opt): config.read(opt.config) # General - opt.thread_count = opt.thread_count or config.safe_getint( - "general", "threads", 25 - ) + opt.thread_count = opt.thread_count or config.safe_getint("general", "threads", 25) opt.include_status_codes = opt.include_status_codes or config.safe_get( "general", "include-status" ) opt.exclude_status_codes = opt.exclude_status_codes or config.safe_get( "general", "exclude-status" ) - opt.exclude_sizes = opt.exclude_sizes or config.safe_get("general", "exclude-sizes", "") - opt.exclude_texts = opt.exclude_texts or config.safe_getlist("general", "exclude-texts") + opt.exclude_sizes = opt.exclude_sizes or config.safe_get( + "general", "exclude-sizes", "" + ) + opt.exclude_texts = opt.exclude_texts or config.safe_getlist( + "general", "exclude-texts" + ) opt.exclude_regex = opt.exclude_regex or config.safe_get("general", "exclude-regex") opt.exclude_redirect = opt.exclude_redirect or config.safe_get( "general", "exclude-redirect" @@ -285,7 +304,9 @@ def parse_config(opt): ) # Request - opt.http_method = opt.http_method or config.safe_get("request", "http-method", "get") + opt.http_method = opt.http_method or config.safe_get( + "request", "http-method", "get" + ) opt.headers = opt.headers or config.safe_getlist("request", "headers") opt.headers_file = opt.headers_file or config.safe_get("request", "headers-file") opt.follow_redirects = opt.follow_redirects or config.safe_getboolean( @@ -300,7 +321,9 @@ def parse_config(opt): # Connection opt.delay = opt.delay or config.safe_getfloat("connection", "delay") opt.timeout = opt.timeout or config.safe_getfloat("connection", "timeout", 7.5) - opt.max_retries = opt.max_retries or config.safe_getint("connection", "max-retries", 1) + opt.max_retries = opt.max_retries or config.safe_getint( + "connection", "max-retries", 1 + ) opt.max_rate = opt.max_rate or config.safe_getint("connection", "max-rate") opt.proxies = opt.proxies or config.safe_getlist("connection", "proxies") opt.proxies_file = opt.proxies_file or config.safe_get("connection", "proxies-file") @@ -308,7 +331,9 @@ def parse_config(opt): "connection", "scheme", None, ("http", "https") ) opt.replay_proxy = opt.replay_proxy or config.safe_get("connection", "replay-proxy") - opt.network_interface = opt.network_interface or config.safe_get("connection", "network-interface") + opt.network_interface = opt.network_interface or config.safe_get( + "connection", "network-interface" + ) # Advanced opt.crawl = opt.crawl or config.safe_getboolean("advanced", "crawl") From 15c859c6246cc42dfb6875dcd9c264fdf0146c50 Mon Sep 17 00:00:00 2001 From: shelld3v <59408894+shelld3v@users.noreply.github.com> Date: Sat, 30 Dec 2023 17:42:26 +0700 Subject: [PATCH 6/6] Fix codespell command (for GitHub Actions) --- .github/workflows/ci.yml | 2 +- CHANGELOG.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 15befeaed..4e53fcef3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,4 +44,4 @@ jobs: run: | flake8 . - name: Codespell - run: codespell + run: codespell -S CONTRIBUTORS.md diff --git a/CHANGELOG.md b/CHANGELOG.md index c7eaf835e..16515b252 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -122,7 +122,7 @@ - Exclude status switch - Pause/next directory feature - Changed help structure -- Expaded default dictionary +- Expanded default dictionary ## 0.2.2 - July 2, 2014 - Fixed some bugs