diff --git a/pydoctor/astbuilder.py b/pydoctor/astbuilder.py index d350aa908..f80acdcc0 100644 --- a/pydoctor/astbuilder.py +++ b/pydoctor/astbuilder.py @@ -26,10 +26,7 @@ def parseFile(path: Path) -> ast.Module: src = f.read() + b'\n' return _parse(src, filename=str(path)) -if sys.version_info >= (3,8): - _parse = partial(ast.parse, type_comments=True) -else: - _parse = ast.parse +_parse = partial(ast.parse, type_comments=True) def _maybeAttribute(cls: model.Class, name: str) -> bool: """Check whether a name is a potential attribute of the given class. diff --git a/pydoctor/astutils.py b/pydoctor/astutils.py index d8e14f1b7..2163c841b 100644 --- a/pydoctor/astutils.py +++ b/pydoctor/astutils.py @@ -4,7 +4,6 @@ from __future__ import annotations import inspect -import platform import sys from numbers import Number from typing import Any, Callable, Collection, Iterator, Optional, List, Iterable, Sequence, TYPE_CHECKING, Tuple, Union, cast @@ -232,11 +231,7 @@ def get_assign_docstring_node(assign:ast.Assign | ast.AnnAssign) -> Str | None: def is_none_literal(node: ast.expr) -> bool: """Does this AST node represent the literal constant None?""" - if sys.version_info >= (3,8): - return isinstance(node, ast.Constant) and node.value is None - else: - # TODO: remove me when python3.7 is not supported anymore - return isinstance(node, (ast.Constant, ast.NameConstant)) and node.value is None + return isinstance(node, ast.Constant) and node.value is None def unstring_annotation(node: ast.expr, ctx:'model.Documentable', section:str='annotation') -> ast.expr: """Replace all strings in the given expression by parsed versions. @@ -489,23 +484,11 @@ def get_docstring_node(node: ast.AST) -> Str | None: return node.value return None -_string_lineno_is_end = sys.version_info < (3,8) \ - and platform.python_implementation() != 'PyPy' -"""True iff the 'lineno' attribute of an AST string node points to the last -line in the string, rather than the first line. -""" - - class _StrMeta(type): - if sys.version_info >= (3,8): - def __instancecheck__(self, instance: object) -> bool: - if isinstance(instance, ast.expr): - return get_str_value(instance) is not None - return False - else: - # TODO: remove me when python3.7 is not supported - def __instancecheck__(self, instance: object) -> bool: - return isinstance(instance, ast.Str) + def __instancecheck__(self, instance: object) -> bool: + if isinstance(instance, ast.expr): + return get_str_value(instance) is not None + return False class Str(ast.expr, metaclass=_StrMeta): """ @@ -514,15 +497,11 @@ class Str(ast.expr, metaclass=_StrMeta): Do not try to instanciate this class. """ + value: str + def __init__(self, *args: Any, **kwargs: Any) -> None: raise TypeError(f'{Str.__qualname__} cannot be instanciated') - if sys.version_info >= (3,8): - value: str - else: - # TODO: remove me when python3.7 is not supported - s: str - def extract_docstring_linenum(node: Str) -> int: r""" In older CPython versions, the AST only tells us the end line @@ -533,18 +512,8 @@ def extract_docstring_linenum(node: Str) -> int: Leading blank lines are stripped by cleandoc(), so we must return the line number of the first non-blank line. """ - if sys.version_info >= (3,8): - doc = node.value - else: - # TODO: remove me when python3.7 is not supported - doc = node.s + doc = node.value lineno = node.lineno - if _string_lineno_is_end: - # In older CPython versions, the AST only tells us the end line - # number and we must approximate the start line number. - # This approximation is correct if the docstring does not contain - # explicit newlines ('\n') or joined lines ('\' at end of line). - lineno -= doc.count('\n') # Leading blank lines are stripped by cleandoc(), so we must # return the line number of the first non-blank line. @@ -564,11 +533,7 @@ def extract_docstring(node: Str) -> Tuple[int, str]: - The line number of the first non-blank line of the docsring. See L{extract_docstring_linenum}. - The docstring to be parsed, cleaned by L{inspect.cleandoc}. """ - if sys.version_info >= (3,8): - value = node.value - else: - # TODO: remove me when python3.7 is not supported - value = node.s + value = node.value lineno = extract_docstring_linenum(node) return lineno, inspect.cleandoc(value) diff --git a/pydoctor/model.py b/pydoctor/model.py index fbae88004..a92b01071 100644 --- a/pydoctor/model.py +++ b/pydoctor/model.py @@ -13,7 +13,6 @@ from collections import defaultdict import datetime import importlib -import platform import sys import textwrap import types @@ -57,12 +56,6 @@ # Functions can't contain anything. -_string_lineno_is_end = sys.version_info < (3,8) \ - and platform.python_implementation() != 'PyPy' -"""True iff the 'lineno' attribute of an AST string node points to the last -line in the string, rather than the first line. -""" - class LineFromAst(int): "Simple L{int} wrapper for linenumbers coming from ast analysis." @@ -707,7 +700,7 @@ def _compute_mro(self, cls: Class) -> list[ClassOrStr]: # support documenting typing.py module by using allobject.get. generic = cls.system.allobjects.get(_d:='typing.Generic', _d) if generic in bases and any(generic in _mro for _mro in bases_mros): - # this is cafe since we checked 'generic in bases'. + # this is safe since we checked 'generic in bases'. bases.remove(generic) # type: ignore[arg-type] try: diff --git a/pydoctor/test/test_configparser.py b/pydoctor/test/test_configparser.py index f0162a8bb..d2a5d3b82 100644 --- a/pydoctor/test/test_configparser.py +++ b/pydoctor/test/test_configparser.py @@ -1,6 +1,6 @@ from io import StringIO from typing import Any, Dict, List -import requests +from pathlib import Path from pydoctor._configparser import parse_toml_section_name, is_quoted, unquote_str, IniConfigParser, TomlConfigParser @@ -32,9 +32,11 @@ def test_unquote_str() -> None: assert unquote_str('""""value""""') == '""""value""""' def test_unquote_naughty_quoted_strings() -> None: - # See https://github.com/minimaxir/big-list-of-naughty-strings/blob/master/blns.txt - res = requests.get('https://raw.githubusercontent.com/minimaxir/big-list-of-naughty-strings/master/blns.txt') - text = res.text + # See https://github.com/minimaxir/big-list-of-naughty-strings + + text = Path(__file__).parent.joinpath('unquote_test_strings.txt' + ).read_text(encoding='utf-8', errors='replace') + for i, string in enumerate(text.split('\n')): if string.strip().startswith('#'): continue diff --git a/pydoctor/test/test_mro.py b/pydoctor/test/test_mro.py index 17b910e55..1f2e5a41a 100644 --- a/pydoctor/test/test_mro.py +++ b/pydoctor/test/test_mro.py @@ -141,6 +141,21 @@ class C(A, Generic[T], B[T]): ... assert_mro_equals(mod.contents['C'], ["t.C", "t.A", "t.B", "typing.Generic"]) +def test_mro_generic_in_system(capsys:CapSys) -> None: + src = ''' + class TypeVar:... + class Generic: ... + T = TypeVar('T') + class A: ... + class B(Generic[T]): ... + class C(A, Generic[T], B[T]): ... + ''' + mod = fromText(src, modname='typing') + assert not capsys.readouterr().out + assert_mro_equals(mod.contents['C'], + ["typing.C", "typing.A", "typing.B", "typing.Generic"]) + + def test_mro_generic_4(capsys:CapSys) -> None: src = ''' from typing import Generic, TypeVar diff --git a/pydoctor/test/unquote_test_strings.txt b/pydoctor/test/unquote_test_strings.txt new file mode 100644 index 000000000..e459dde6d --- /dev/null +++ b/pydoctor/test/unquote_test_strings.txt @@ -0,0 +1,742 @@ +# Reserved Strings +# +# Strings which may be used elsewhere in code + +undefined +undef +null +NULL +(null) +nil +NIL +true +false +True +False +TRUE +FALSE +None +hasOwnProperty +then +constructor +\ +\\ + +# Numeric Strings +# +# Strings which can be interpreted as numeric + +0 +1 +1.00 +$1.00 +1/2 +1E2 +1E02 +1E+02 +-1 +-1.00 +-$1.00 +-1/2 +-1E2 +-1E02 +-1E+02 +1/0 +0/0 +-2147483648/-1 +-9223372036854775808/-1 +-0 +-0.0 ++0 ++0.0 +0.00 +0..0 +. +0.0.0 +0,00 +0,,0 +, +0,0,0 +0.0/0 +1.0/0.0 +0.0/0.0 +1,0/0,0 +0,0/0,0 +--1 +- +-. +-, +999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999 +NaN +Infinity +-Infinity +INF +1#INF +-1#IND +1#QNAN +1#SNAN +1#IND +0x0 +0xffffffff +0xffffffffffffffff +0xabad1dea +123456789012345678901234567890123456789 +1,000.00 +1 000.00 +1'000.00 +1,000,000.00 +1 000 000.00 +1'000'000.00 +1.000,00 +1 000,00 +1'000,00 +1.000.000,00 +1 000 000,00 +1'000'000,00 +01000 +08 +09 +2.2250738585072011e-308 + +# Special Characters +# +# ASCII punctuation. All of these characters may need to be escaped in some +# contexts. Divided into three groups based on (US-layout) keyboard position. + +,./;'[]\-= +<>?:"{}|_+ +!@#$%^&*()`~ + +# Non-whitespace C0 controls: U+0001 through U+0008, U+000E through U+001F, +# and U+007F (DEL) +# Often forbidden to appear in various text-based file formats (e.g. XML), +# or reused for internal delimiters on the theory that they should never +# appear in input. +# The next line may appear to be blank or mojibake in some viewers. + + +# Non-whitespace C1 controls: U+0080 through U+0084 and U+0086 through U+009F. +# Commonly misinterpreted as additional graphic characters. +# The next line may appear to be blank, mojibake, or dingbats in some viewers. +€‚ƒ„†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ + +# Whitespace: all of the characters with category Zs, Zl, or Zp (in Unicode +# version 8.0.0), plus U+0009 (HT), U+000B (VT), U+000C (FF), U+0085 (NEL), +# and U+200B (ZERO WIDTH SPACE), which are in the C categories but are often +# treated as whitespace in some contexts. +# This file unfortunately cannot express strings containing +# U+0000, U+000A, or U+000D (NUL, LF, CR). +# The next line may appear to be blank or mojibake in some viewers. +# The next line may be flagged for "trailing whitespace" in some viewers. + …             ​

    + +# Unicode additional control characters: all of the characters with +# general category Cf (in Unicode 8.0.0). +# The next line may appear to be blank or mojibake in some viewers. +­؀؁؂؃؄؅؜۝܏᠎​‌‍‎‏‪‫‬‭‮⁠⁡⁢⁣⁤⁦⁧⁨⁩𑂽𛲠𛲡𛲢𛲣𝅳𝅴𝅵𝅶𝅷𝅸𝅹𝅺󠀁󠀠󠀡󠀢󠀣󠀤󠀥󠀦󠀧󠀨󠀩󠀪󠀫󠀬󠀭󠀮󠀯󠀰󠀱󠀲󠀳󠀴󠀵󠀶󠀷󠀸󠀹󠀺󠀻󠀼󠀽󠀾󠀿󠁀󠁁󠁂󠁃󠁄󠁅󠁆󠁇󠁈󠁉󠁊󠁋󠁌󠁍󠁎󠁏󠁐󠁑󠁒󠁓󠁔󠁕󠁖󠁗󠁘󠁙󠁚󠁛󠁜󠁝󠁞󠁟󠁠󠁡󠁢󠁣󠁤󠁥󠁦󠁧󠁨󠁩󠁪󠁫󠁬󠁭󠁮󠁯󠁰󠁱󠁲󠁳󠁴󠁵󠁶󠁷󠁸󠁹󠁺󠁻󠁼󠁽󠁾󠁿 + +# "Byte order marks", U+FEFF and U+FFFE, each on its own line. +# The next two lines may appear to be blank or mojibake in some viewers. + +￾ + +# Unicode Symbols +# +# Strings which contain common unicode symbols (e.g. smart quotes) + +Ω≈ç√∫˜µ≤≥÷ +åß∂ƒ©˙∆˚¬…æ +œ∑´®†¥¨ˆøπ“‘ +¡™£¢∞§¶•ªº–≠ +¸˛Ç◊ı˜Â¯˘¿ +ÅÍÎÏ˝ÓÔÒÚÆ☃ +Œ„´‰ˇÁ¨ˆØ∏”’ +`⁄€‹›fifl‡°·‚—± +⅛⅜⅝⅞ +ЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюя +٠١٢٣٤٥٦٧٨٩ + +# Unicode Subscript/Superscript/Accents +# +# Strings which contain unicode subscripts/superscripts; can cause rendering issues + +⁰⁴⁵ +₀₁₂ +⁰⁴⁵₀₁₂ +ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ ด้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็็้้้้้้้้็็็็็้้้้้็็็็ + +# Quotation Marks +# +# Strings which contain misplaced quotation marks; can cause encoding errors + +' +" +'' +"" +'"' +"''''"'" +"'"'"''''" + + + + + +# Two-Byte Characters +# +# Strings which contain two-byte characters: can cause rendering issues or character-length issues + +田中さんにあげて下さい +パーティーへ行かないか +和製漢語 +部落格 +사회과학원 어학연구소 +찦차를 타고 온 펲시맨과 쑛다리 똠방각하 +社會科學院語學研究所 +울란바토르 +𠜎𠜱𠝹𠱓𠱸𠲖𠳏 + +# Strings which contain two-byte letters: can cause issues with naïve UTF-16 capitalizers which think that 16 bits == 1 character + +𐐜 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐙𐐊𐐡𐐝𐐓/𐐝𐐇𐐗𐐊𐐤𐐔 𐐒𐐋𐐗 𐐒𐐌 𐐜 𐐡𐐀𐐖𐐇𐐤𐐓𐐝 𐐱𐑂 𐑄 𐐔𐐇𐐝𐐀𐐡𐐇𐐓 𐐏𐐆𐐅𐐤𐐆𐐚𐐊𐐡𐐝𐐆𐐓𐐆 + +# Special Unicode Characters Union +# +# A super string recommended by VMware Inc. Globalization Team: can effectively cause rendering issues or character-length issues to validate product globalization readiness. +# +# 表 CJK_UNIFIED_IDEOGRAPHS (U+8868) +# ポ KATAKANA LETTER PO (U+30DD) +# あ HIRAGANA LETTER A (U+3042) +# A LATIN CAPITAL LETTER A (U+0041) +# 鷗 CJK_UNIFIED_IDEOGRAPHS (U+9DD7) +# Œ LATIN SMALL LIGATURE OE (U+0153) +# é LATIN SMALL LETTER E WITH ACUTE (U+00E9) +# B FULLWIDTH LATIN CAPITAL LETTER B (U+FF22) +# 逍 CJK_UNIFIED_IDEOGRAPHS (U+900D) +# Ü LATIN SMALL LETTER U WITH DIAERESIS (U+00FC) +# ß LATIN SMALL LETTER SHARP S (U+00DF) +# ª FEMININE ORDINAL INDICATOR (U+00AA) +# ą LATIN SMALL LETTER A WITH OGONEK (U+0105) +# ñ LATIN SMALL LETTER N WITH TILDE (U+00F1) +# 丂 CJK_UNIFIED_IDEOGRAPHS (U+4E02) +# 㐀 CJK Ideograph Extension A, First (U+3400) +# 𠀀 CJK Ideograph Extension B, First (U+20000) + +表ポあA鷗ŒéB逍Üߪąñ丂㐀𠀀 + +# Changing length when lowercased +# +# Characters which increase in length (2 to 3 bytes) when lowercased +# Credit: https://twitter.com/jifa/status/625776454479970304 + +Ⱥ +Ⱦ + +# Japanese Emoticons +# +# Strings which consists of Japanese-style emoticons which are popular on the web + +ヽ༼ຈل͜ຈ༽ノ ヽ༼ຈل͜ຈ༽ノ +(。◕ ∀ ◕。) +`ィ(´∀`∩ +__ロ(,_,*) +・( ̄∀ ̄)・:*: +゚・✿ヾ╲(。◕‿◕。)╱✿・゚ +,。・:*:・゜’( ☻ ω ☻ )。・:*:・゜’ +(╯°□°)╯︵ ┻━┻) +(ノಥ益ಥ)ノ ┻━┻ +┬─┬ノ( º _ ºノ) +( ͡° ͜ʖ ͡°) +¯\_(ツ)_/¯ + +# Emoji +# +# Strings which contain Emoji; should be the same behavior as two-byte characters, but not always + +😍 +👩🏽 +👨‍🦰 👨🏿‍🦰 👨‍🦱 👨🏿‍🦱 🦹🏿‍♂️ +👾 🙇 💁 🙅 🙆 🙋 🙎 🙍 +🐵 🙈 🙉 🙊 +❤️ 💔 💌 💕 💞 💓 💗 💖 💘 💝 💟 💜 💛 💚 💙 +✋🏿 💪🏿 👐🏿 🙌🏿 👏🏿 🙏🏿 +👨‍👩‍👦 👨‍👩‍👧‍👦 👨‍👨‍👦 👩‍👩‍👧 👨‍👦 👨‍👧‍👦 👩‍👦 👩‍👧‍👦 +🚾 🆒 🆓 🆕 🆖 🆗 🆙 🏧 +0️⃣ 1️⃣ 2️⃣ 3️⃣ 4️⃣ 5️⃣ 6️⃣ 7️⃣ 8️⃣ 9️⃣ 🔟 + +# Regional Indicator Symbols +# +# Regional Indicator Symbols can be displayed differently across +# fonts, and have a number of special behaviors + +🇺🇸🇷🇺🇸 🇦🇫🇦🇲🇸 +🇺🇸🇷🇺🇸🇦🇫🇦🇲 +🇺🇸🇷🇺🇸🇦 + +# Unicode Numbers +# +# Strings which contain unicode numbers; if the code is localized, it should see the input as numeric + +123 +١٢٣ + +# Right-To-Left Strings +# +# Strings which contain text that should be rendered RTL if possible (e.g. Arabic, Hebrew) + +ثم نفس سقطت وبالتحديد،, جزيرتي باستخدام أن دنو. إذ هنا؟ الستار وتنصيب كان. أهّل ايطاليا، بريطانيا-فرنسا قد أخذ. سليمان، إتفاقية بين ما, يذكر الحدود أي بعد, معاملة بولندا، الإطلاق عل إيو. +בְּרֵאשִׁית, בָּרָא אֱלֹהִים, אֵת הַשָּׁמַיִם, וְאֵת הָאָרֶץ +הָיְתָהtestالصفحات التّحول +﷽ +ﷺ +مُنَاقَشَةُ سُبُلِ اِسْتِخْدَامِ اللُّغَةِ فِي النُّظُمِ الْقَائِمَةِ وَفِيم يَخُصَّ التَّطْبِيقَاتُ الْحاسُوبِيَّةُ، +الكل في المجمو عة (5) + +# Ogham Text +# +# The only unicode alphabet to use a space which isn't empty but should still act like a space. + +᚛ᚄᚓᚐᚋᚒᚄ ᚑᚄᚂᚑᚏᚅ᚜ +᚛                 ᚜ + +# Trick Unicode +# +# Strings which contain unicode with unusual properties (e.g. Right-to-left override) (c.f. http://www.unicode.org/charts/PDF/U2000.pdf) + +‪‪test‪ +‫test‫ +
test
 +test⁠test‫ +⁦test⁧ + +# Zalgo Text +# +# Strings which contain "corrupted" text. The corruption will not appear in non-HTML text, however. (via http://www.eeemo.net) + +Ṱ̺̺̕o͞ ̷i̲̬͇̪͙n̝̗͕v̟̜̘̦͟o̶̙̰̠kè͚̮̺̪̹̱̤ ̖t̝͕̳̣̻̪͞h̼͓̲̦̳̘̲e͇̣̰̦̬͎ ̢̼̻̱̘h͚͎͙̜̣̲ͅi̦̲̣̰̤v̻͍e̺̭̳̪̰-m̢iͅn̖̺̞̲̯̰d̵̼̟͙̩̼̘̳ ̞̥̱̳̭r̛̗̘e͙p͠r̼̞̻̭̗e̺̠̣͟s̘͇̳͍̝͉e͉̥̯̞̲͚̬͜ǹ̬͎͎̟̖͇̤t͍̬̤͓̼̭͘ͅi̪̱n͠g̴͉ ͏͉ͅc̬̟h͡a̫̻̯͘o̫̟̖͍̙̝͉s̗̦̲.̨̹͈̣ +̡͓̞ͅI̗̘̦͝n͇͇͙v̮̫ok̲̫̙͈i̖͙̭̹̠̞n̡̻̮̣̺g̲͈͙̭͙̬͎ ̰t͔̦h̞̲e̢̤ ͍̬̲͖f̴̘͕̣è͖ẹ̥̩l͖͔͚i͓͚̦͠n͖͍̗͓̳̮g͍ ̨o͚̪͡f̘̣̬ ̖̘͖̟͙̮c҉͔̫͖͓͇͖ͅh̵̤̣͚͔á̗̼͕ͅo̼̣̥s̱͈̺̖̦̻͢.̛̖̞̠̫̰ +̗̺͖̹̯͓Ṯ̤͍̥͇͈h̲́e͏͓̼̗̙̼̣͔ ͇̜̱̠͓͍ͅN͕͠e̗̱z̘̝̜̺͙p̤̺̹͍̯͚e̠̻̠͜r̨̤͍̺̖͔̖̖d̠̟̭̬̝͟i̦͖̩͓͔̤a̠̗̬͉̙n͚͜ ̻̞̰͚ͅh̵͉i̳̞v̢͇ḙ͎͟-҉̭̩̼͔m̤̭̫i͕͇̝̦n̗͙ḍ̟ ̯̲͕͞ǫ̟̯̰̲͙̻̝f ̪̰̰̗̖̭̘͘c̦͍̲̞͍̩̙ḥ͚a̮͎̟̙͜ơ̩̹͎s̤.̝̝ ҉Z̡̖̜͖̰̣͉̜a͖̰͙̬͡l̲̫̳͍̩g̡̟̼̱͚̞̬ͅo̗͜.̟ +̦H̬̤̗̤͝e͜ ̜̥̝̻͍̟́w̕h̖̯͓o̝͙̖͎̱̮ ҉̺̙̞̟͈W̷̼̭a̺̪͍į͈͕̭͙̯̜t̶̼̮s̘͙͖̕ ̠̫̠B̻͍͙͉̳ͅe̵h̵̬͇̫͙i̹͓̳̳̮͎̫̕n͟d̴̪̜̖ ̰͉̩͇͙̲͞ͅT͖̼͓̪͢h͏͓̮̻e̬̝̟ͅ ̤̹̝W͙̞̝͔͇͝ͅa͏͓͔̹̼̣l̴͔̰̤̟͔ḽ̫.͕ +Z̮̞̠͙͔ͅḀ̗̞͈̻̗Ḷ͙͎̯̹̞͓G̻O̭̗̮ + +# Unicode Upsidedown +# +# Strings which contain unicode with an "upsidedown" effect (via http://www.upsidedowntext.com) + +˙ɐnbᴉlɐ ɐuƃɐɯ ǝɹolop ʇǝ ǝɹoqɐl ʇn ʇunpᴉpᴉɔuᴉ ɹodɯǝʇ poɯsnᴉǝ op pǝs 'ʇᴉlǝ ƃuᴉɔsᴉdᴉpɐ ɹnʇǝʇɔǝsuoɔ 'ʇǝɯɐ ʇᴉs ɹolop ɯnsdᴉ ɯǝɹo˥ +00˙Ɩ$- + +# Unicode font +# +# Strings which contain bold/italic/etc. versions of normal characters + +The quick brown fox jumps over the lazy dog +𝐓𝐡𝐞 𝐪𝐮𝐢𝐜𝐤 𝐛𝐫𝐨𝐰𝐧 𝐟𝐨𝐱 𝐣𝐮𝐦𝐩𝐬 𝐨𝐯𝐞𝐫 𝐭𝐡𝐞 𝐥𝐚𝐳𝐲 𝐝𝐨𝐠 +𝕿𝖍𝖊 𝖖𝖚𝖎𝖈𝖐 𝖇𝖗𝖔𝖜𝖓 𝖋𝖔𝖝 𝖏𝖚𝖒𝖕𝖘 𝖔𝖛𝖊𝖗 𝖙𝖍𝖊 𝖑𝖆𝖟𝖞 𝖉𝖔𝖌 +𝑻𝒉𝒆 𝒒𝒖𝒊𝒄𝒌 𝒃𝒓𝒐𝒘𝒏 𝒇𝒐𝒙 𝒋𝒖𝒎𝒑𝒔 𝒐𝒗𝒆𝒓 𝒕𝒉𝒆 𝒍𝒂𝒛𝒚 𝒅𝒐𝒈 +𝓣𝓱𝓮 𝓺𝓾𝓲𝓬𝓴 𝓫𝓻𝓸𝔀𝓷 𝓯𝓸𝔁 𝓳𝓾𝓶𝓹𝓼 𝓸𝓿𝓮𝓻 𝓽𝓱𝓮 𝓵𝓪𝔃𝔂 𝓭𝓸𝓰 +𝕋𝕙𝕖 𝕢𝕦𝕚𝕔𝕜 𝕓𝕣𝕠𝕨𝕟 𝕗𝕠𝕩 𝕛𝕦𝕞𝕡𝕤 𝕠𝕧𝕖𝕣 𝕥𝕙𝕖 𝕝𝕒𝕫𝕪 𝕕𝕠𝕘 +𝚃𝚑𝚎 𝚚𝚞𝚒𝚌𝚔 𝚋𝚛𝚘𝚠𝚗 𝚏𝚘𝚡 𝚓𝚞𝚖𝚙𝚜 𝚘𝚟𝚎𝚛 𝚝𝚑𝚎 𝚕𝚊𝚣𝚢 𝚍𝚘𝚐 +⒯⒣⒠ ⒬⒰⒤⒞⒦ ⒝⒭⒪⒲⒩ ⒡⒪⒳ ⒥⒰⒨⒫⒮ ⒪⒱⒠⒭ ⒯⒣⒠ ⒧⒜⒵⒴ ⒟⒪⒢ + +# Script Injection +# +# Strings which attempt to invoke a benign script injection; shows vulnerability to XSS + + +<script>alert('1');</script> + + +"> +'> +> + +< / script >< script >alert(8)< / script > + onfocus=JaVaSCript:alert(9) autofocus +" onfocus=JaVaSCript:alert(10) autofocus +' onfocus=JaVaSCript:alert(11) autofocus +<script>alert(12)</script> +ript>alert(13)ript> +--> +";alert(15);t=" +';alert(16);t=' +JavaSCript:alert(17) +;alert(18); +src=JaVaSCript:prompt(19) +">javascript:alert(25); +javascript:alert(26); +javascript:alert(27); +javascript:alert(28); +javascript:alert(29); +javascript:alert(30); +javascript:alert(31); +'`"><\x3Cscript>javascript:alert(32) +'`"><\x00script>javascript:alert(33) +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +ABC
DEF +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +test +`"'> +`"'> +`"'> +`"'> +`"'> +`"'> +`"'> +`"'> +`"'> +`"'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> +"`'> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +XXX + + + +<a href=http://foo.bar/#x=`y></a><img alt="`><img src=x:x onerror=javascript:alert(203)></a>"> +<!--[if]><script>javascript:alert(204)</script --> +<!--[if<img src=x onerror=javascript:alert(205)//]> --> +<script src="/\%(jscript)s"></script> +<script src="\\%(jscript)s"></script> +<IMG """><SCRIPT>alert("206")</SCRIPT>"> +<IMG SRC=javascript:alert(String.fromCharCode(50,48,55))> +<IMG SRC=# onmouseover="alert('208')"> +<IMG SRC= onmouseover="alert('209')"> +<IMG onmouseover="alert('210')"> +<IMG SRC=javascript:alert('211')> +<IMG SRC=javascript:alert('212')> +<IMG SRC=javascript:alert('213')> +<IMG SRC="jav ascript:alert('214');"> +<IMG SRC="jav ascript:alert('215');"> +<IMG SRC="jav ascript:alert('216');"> +<IMG SRC="jav ascript:alert('217');"> +perl -e 'print "<IMG SRC=java\0script:alert(\"218\")>";' > out +<IMG SRC="  javascript:alert('219');"> +<SCRIPT/XSS SRC="http://ha.ckers.org/xss.js"></SCRIPT> +<BODY onload!#$%&()*~+-_.,:;?@[/|\]^`=alert("220")> +<SCRIPT/SRC="http://ha.ckers.org/xss.js"></SCRIPT> +<<SCRIPT>alert("221");//<</SCRIPT> +<SCRIPT SRC=http://ha.ckers.org/xss.js?< B > +<SCRIPT SRC=//ha.ckers.org/.j> +<IMG SRC="javascript:alert('222')" +<iframe src=http://ha.ckers.org/scriptlet.html < +\";alert('223');// +<u oncopy=alert()> Copy me</u> +<i onwheel=alert(224)> Scroll over me </i> +<plaintext> +http://a/%%30%30 +</textarea><script>alert(225)</script> + +# SQL Injection +# +# Strings which can cause a SQL injection if inputs are not sanitized + +1;DROP TABLE users +1'; DROP TABLE users-- 1 +' OR 1=1 -- 1 +' OR '1'='1 +'; EXEC sp_MSForEachTable 'DROP TABLE ?'; -- + +% +_ + +# Server Code Injection +# +# Strings which can cause user to run code on server as a privileged user (c.f. https://news.ycombinator.com/item?id=7665153) + +- +-- +--version +--help +$USER +/dev/null; touch /tmp/blns.fail ; echo +`touch /tmp/blns.fail` +$(touch /tmp/blns.fail) +@{[system "touch /tmp/blns.fail"]} + +# Command Injection (Ruby) +# +# Strings which can call system commands within Ruby/Rails applications + +eval("puts 'hello world'") +System("ls -al /") +`ls -al /` +Kernel.exec("ls -al /") +Kernel.exit(1) +%x('ls -al /') + +# XXE Injection (XML) +# +# String which can reveal system files when parsed by a badly configured XML parser + +<?xml version="1.0" encoding="ISO-8859-1"?><!DOCTYPE foo [ <!ELEMENT foo ANY ><!ENTITY xxe SYSTEM "file:///etc/passwd" >]><foo>&xxe;</foo> + +# Unwanted Interpolation +# +# Strings which can be accidentally expanded into different strings if evaluated in the wrong context, e.g. used as a printf format string or via Perl or shell eval. Might expose sensitive data from the program doing the interpolation, or might just represent the wrong string. + +$HOME +$ENV{'HOME'} +%d +%s%s%s%s%s +{0} +%*.*s +%@ +%n +File:/// + +# File Inclusion +# +# Strings which can cause user to pull in files that should not be a part of a web server + +../../../../../../../../../../../etc/passwd%00 +../../../../../../../../../../../etc/hosts + +# Known CVEs and Vulnerabilities +# +# Strings that test for known vulnerabilities + +() { 0; }; touch /tmp/blns.shellshock1.fail; +() { _; } >_[$($())] { touch /tmp/blns.shellshock2.fail; } +<<< %s(un='%s') = %u ++++ATH0 + +# MSDOS/Windows Special Filenames +# +# Strings which are reserved characters in MSDOS/Windows + +CON +PRN +AUX +CLOCK$ +NUL +A: +ZZ: +COM1 +LPT1 +LPT2 +LPT3 +COM2 +COM3 +COM4 + +# IRC specific strings +# +# Strings that may occur on IRC clients that make security products freak out + +DCC SEND STARTKEYLOGGER 0 0 0 + +# Scunthorpe Problem +# +# Innocuous strings which may be blocked by profanity filters (https://en.wikipedia.org/wiki/Scunthorpe_problem) + +Scunthorpe General Hospital +Penistone Community Church +Lightwater Country Park +Jimmy Clitheroe +Horniman Museum +shitake mushrooms +RomansInSussex.co.uk +http://www.cum.qc.ca/ +Craig Cockburn, Software Specialist +Linda Callahan +Dr. Herman I. Libshitz +magna cum laude +Super Bowl XXX +medieval erection of parapets +evaluate +mocha +expression +Arsenal canal +classic +Tyson Gay +Dick Van Dyke +basement + +# Human injection +# +# Strings which may cause human to reinterpret worldview + +If you're reading this, you've been in a coma for almost 20 years now. We're trying a new technique. We don't know where this message will end up in your dream, but we hope it works. Please wake up, we miss you. + +# Terminal escape codes +# +# Strings which punish the fools who use cat/type on this file + +Roses are red, violets are blue. Hope you enjoy terminal hue +But now...for my greatest trick... +The quick brown fox... [Beeeep] + +# iOS Vulnerabilities +# +# Strings which crashed iMessage in various versions of iOS + +Powerلُلُصّبُلُلصّبُررً ॣ ॣh ॣ ॣ冗 +🏳0🌈️ +జ్ఞ‌ా + +# Persian special characters +# +# This is a four characters string which includes Persian special characters (گچپژ) + +گچپژ + +# jinja2 injection +# +# first one is supposed to raise "MemoryError" exception +# second, obviously, prints contents of /etc/passwd + +{% print 'x' * 64 * 1024**3 %} +{{ "".__class__.__mro__[2].__subclasses__()[40]("/etc/passwd").read() }} \ No newline at end of file