From c8f25d4899f51d07cc47de2ddcc0c3c5ec22d0b8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9D=D1=82=D0=BE=D0=BD=D1=8F?= Date: Mon, 1 Nov 2021 23:41:54 +0300 Subject: [PATCH 1/8] fix: false positive code detections (closes #4) --- app/run_code/parse_code.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/app/run_code/parse_code.py b/app/run_code/parse_code.py index 76447b1..da7eade 100644 --- a/app/run_code/parse_code.py +++ b/app/run_code/parse_code.py @@ -6,6 +6,28 @@ class _Result: uses_orig = False +FALSE_BINOP_TYPES = (ast.Constant, ast.Name, ast.Attribute) + + +def _check_node_on_false_binop(node) -> bool: + if isinstance(node, FALSE_BINOP_TYPES): + return True + if not isinstance(node, (ast.BoolOp, ast.BinOp, ast.Compare)): + return False + if isinstance(node, ast.Compare): + return isinstance(node.left, FALSE_BINOP_TYPES) and all(isinstance(x, FALSE_BINOP_TYPES) for x in node.comparators) + return all(_check_node_on_false_binop(operand) for operand in ((node.left, node.right) if isinstance(node, ast.BinOp) else node.values)) + + +def _is_node_false(node: ast.AST) -> bool: + return ( + isinstance(node, (ast.Constant, ast.Name)) + or isinstance(node, ast.UnaryOp) and isinstance(node.operand, (ast.Constant, ast.Name, ast.Attribute)) # Messages like "-1", "+spam" and "not foo.bar" + or _check_node_on_false_binop(node) # Messages like one-two, one is two, one >= two, one.b in two.c + or isinstance(node, ast.Tuple) and all(_check_node_on_false_binop(elt) for elt in node.elts) # "(yes, understood)" + ) + + def parse_code(text: str): result = _Result() @@ -14,11 +36,8 @@ def parse_code(text: str): except (SyntaxError, ValueError): return result - if len(root.body) == 1 and isinstance(root.body[0], ast.Expr): - if isinstance(root.body[0].value, (ast.Constant, ast.Name)): - return result - if isinstance(root.body[0].value, ast.UnaryOp) and isinstance(root.body[0].value.operand, ast.Constant): - return result + if all(isinstance(root.body[i], ast.Expr) and _is_node_false(root.body[i].value) for i in range(len(root.body))): + return result result.is_code = True From ceff1de8a43054b6a03b7f9ae114f45a0863de86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9D=D1=82=D0=BE=D0=BD=D1=8F?= Date: Mon, 1 Nov 2021 23:44:56 +0300 Subject: [PATCH 2/8] fix: get rid of for ... in range(len(root.body)) --- app/run_code/parse_code.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/run_code/parse_code.py b/app/run_code/parse_code.py index da7eade..6d6ece3 100644 --- a/app/run_code/parse_code.py +++ b/app/run_code/parse_code.py @@ -36,7 +36,7 @@ def parse_code(text: str): except (SyntaxError, ValueError): return result - if all(isinstance(root.body[i], ast.Expr) and _is_node_false(root.body[i].value) for i in range(len(root.body))): + if all(isinstance(body_item, ast.Expr) and _is_node_false(body_item.value) for body_item in root.body): return result result.is_code = True From c86d842eb5759b0e4c7ea92f8dc5bd2e86edc6a3 Mon Sep 17 00:00:00 2001 From: Anton Egorov Date: Tue, 2 Nov 2021 12:31:48 +0300 Subject: [PATCH 3/8] feat: make code detection less strict - binary operations with constants (like "1 - 2", but not "1" or "+1") are considered code now - if a variable which is present in locals() appears in the message, it **is** evaluated --- app/handlers/__init__.py | 6 ++++-- app/run_code/__init__.py | 4 ++++ app/run_code/parse_code.py | 25 ++++++++++++++----------- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/app/handlers/__init__.py b/app/handlers/__init__.py index 90d6203..e0b1e80 100644 --- a/app/handlers/__init__.py +++ b/app/handlers/__init__.py @@ -4,7 +4,7 @@ from app import client, message_design from app.handlers.uitls import _handle_errors, outgoing_messages_filter from app.run_code.parse_code import parse_code -from app.run_code import eval_message +from app.run_code import eval_message, get_kwargs async def handle_message(message: Message) -> None: @@ -17,7 +17,9 @@ async def handle_message(message: Message) -> None: await message.edit(message.text[2:]) return - res = parse_code(raw_text) + locals_ = get_kwargs() + + res = parse_code(raw_text, locals_) if not res.is_code: return diff --git a/app/run_code/__init__.py b/app/run_code/__init__.py index 6fd7b0f..00d3922 100644 --- a/app/run_code/__init__.py +++ b/app/run_code/__init__.py @@ -8,6 +8,10 @@ from app.run_code.variables import variables +def get_kwargs(include_orig=True): + return list(variables.keys()) + ['orig'] if include_orig else [] + + async def eval_message(code: str, message: Message, uses_orig=False) -> None: await message_design.edit_message(message, code, 'Running...') diff --git a/app/run_code/parse_code.py b/app/run_code/parse_code.py index 6d6ece3..d201830 100644 --- a/app/run_code/parse_code.py +++ b/app/run_code/parse_code.py @@ -6,29 +6,32 @@ class _Result: uses_orig = False -FALSE_BINOP_TYPES = (ast.Constant, ast.Name, ast.Attribute) +def _is_node_fp_word(node: ast.AST, locs: dict) -> bool: + if isinstance(node, ast.Attribute) and isinstance(node.value, ast.Name): + return node.value.id not in locs + return isinstance(node, ast.Name) and node.id not in locs -def _check_node_on_false_binop(node) -> bool: - if isinstance(node, FALSE_BINOP_TYPES): +def _check_node_on_false_binop(node: ast.AST, locs: dict) -> bool: + if _is_node_fp_word(node, locs): return True if not isinstance(node, (ast.BoolOp, ast.BinOp, ast.Compare)): return False if isinstance(node, ast.Compare): - return isinstance(node.left, FALSE_BINOP_TYPES) and all(isinstance(x, FALSE_BINOP_TYPES) for x in node.comparators) - return all(_check_node_on_false_binop(operand) for operand in ((node.left, node.right) if isinstance(node, ast.BinOp) else node.values)) + return _is_node_fp_word(node.left, locs) and all(_is_node_fp_word(x, locs) for x in node.comparators) + return all(_check_node_on_false_binop(operand, locs) for operand in ((node.left, node.right) if isinstance(node, ast.BinOp) else node.values)) -def _is_node_false(node: ast.AST) -> bool: +def _is_node_false(node: ast.AST, locs: dict) -> bool: return ( - isinstance(node, (ast.Constant, ast.Name)) + isinstance(node, ast.Constant) or _is_node_fp_word(node, locs) or isinstance(node, ast.UnaryOp) and isinstance(node.operand, (ast.Constant, ast.Name, ast.Attribute)) # Messages like "-1", "+spam" and "not foo.bar" - or _check_node_on_false_binop(node) # Messages like one-two, one is two, one >= two, one.b in two.c - or isinstance(node, ast.Tuple) and all(_check_node_on_false_binop(elt) for elt in node.elts) # "(yes, understood)" + or _check_node_on_false_binop(node, locs) # Messages like one-two, one is two, one >= two, one.b in two.c + or isinstance(node, ast.Tuple) and all(_check_node_on_false_binop(elt, locs) for elt in node.elts) # "(yes, understood)" ) -def parse_code(text: str): +def parse_code(text: str, locs: dict): result = _Result() try: @@ -36,7 +39,7 @@ def parse_code(text: str): except (SyntaxError, ValueError): return result - if all(isinstance(body_item, ast.Expr) and _is_node_false(body_item.value) for body_item in root.body): + if all(isinstance(body_item, ast.Expr) and _is_node_false(body_item.value, locs) for body_item in root.body): return result result.is_code = True From b6683292239bcd9c66c92c16f98ec301c135ee41 Mon Sep 17 00:00:00 2001 From: Anton Egorov Date: Tue, 2 Nov 2021 12:37:23 +0300 Subject: [PATCH 4/8] fix: code detection Include ctx, msg, print and client to locals to improve code detection --- app/run_code/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/run_code/__init__.py b/app/run_code/__init__.py index 00d3922..e601968 100644 --- a/app/run_code/__init__.py +++ b/app/run_code/__init__.py @@ -9,7 +9,7 @@ def get_kwargs(include_orig=True): - return list(variables.keys()) + ['orig'] if include_orig else [] + return list(variables.keys()) + ['ctx', 'msg', 'print', 'client'] + ['orig'] if include_orig else [] async def eval_message(code: str, message: Message, uses_orig=False) -> None: From ecec598eca5be5130a264861938a1db423f6cb21 Mon Sep 17 00:00:00 2001 From: Anton Egorov Date: Tue, 2 Nov 2021 13:13:38 +0300 Subject: [PATCH 5/8] fix: tuples of constants are not evaluated --- app/run_code/parse_code.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/run_code/parse_code.py b/app/run_code/parse_code.py index d201830..5aa6689 100644 --- a/app/run_code/parse_code.py +++ b/app/run_code/parse_code.py @@ -27,7 +27,7 @@ def _is_node_false(node: ast.AST, locs: dict) -> bool: isinstance(node, ast.Constant) or _is_node_fp_word(node, locs) or isinstance(node, ast.UnaryOp) and isinstance(node.operand, (ast.Constant, ast.Name, ast.Attribute)) # Messages like "-1", "+spam" and "not foo.bar" or _check_node_on_false_binop(node, locs) # Messages like one-two, one is two, one >= two, one.b in two.c - or isinstance(node, ast.Tuple) and all(_check_node_on_false_binop(elt, locs) for elt in node.elts) # "(yes, understood)" + or isinstance(node, ast.Tuple) and all(_is_node_false(elt, locs) for elt in node.elts) # "(yes, understood)" ) From b6d283aed3e6a714ca70d44a93f46087a6bd450b Mon Sep 17 00:00:00 2001 From: Anton Egorov Date: Tue, 2 Nov 2021 15:47:07 +0300 Subject: [PATCH 6/8] feat: docstrings for app/run_code/parse_code.py --- app/run_code/parse_code.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/app/run_code/parse_code.py b/app/run_code/parse_code.py index 5aa6689..e47a4b8 100644 --- a/app/run_code/parse_code.py +++ b/app/run_code/parse_code.py @@ -7,12 +7,15 @@ class _Result: def _is_node_fp_word(node: ast.AST, locs: dict) -> bool: + """Check if AST node is a Name or Attribute not present in locals""" if isinstance(node, ast.Attribute) and isinstance(node.value, ast.Name): return node.value.id not in locs return isinstance(node, ast.Name) and node.id not in locs def _check_node_on_false_binop(node: ast.AST, locs: dict) -> bool: + """Check if AST node can be an operand or binary operation (ast.BinOp, ast.Compare, ast.BoolOp) + with operands which do not pass _is_node_fp_word check""" if _is_node_fp_word(node, locs): return True if not isinstance(node, (ast.BoolOp, ast.BinOp, ast.Compare)): @@ -23,6 +26,7 @@ def _check_node_on_false_binop(node: ast.AST, locs: dict) -> bool: def _is_node_false(node: ast.AST, locs: dict) -> bool: + """Check if AST node didn't seem to be meant to be code""" return ( isinstance(node, ast.Constant) or _is_node_fp_word(node, locs) or isinstance(node, ast.UnaryOp) and isinstance(node.operand, (ast.Constant, ast.Name, ast.Attribute)) # Messages like "-1", "+spam" and "not foo.bar" @@ -31,7 +35,8 @@ def _is_node_false(node: ast.AST, locs: dict) -> bool: ) -def parse_code(text: str, locs: dict): +def parse_code(text: str, locs: dict) -> _Result: + """Parse given text and decide should it be evaluated as Python code""" result = _Result() try: From 36f5d30845ad1cde402b8bbf07db5d80f3d17957 Mon Sep 17 00:00:00 2001 From: Anton Egorov Date: Tue, 2 Nov 2021 16:27:28 +0300 Subject: [PATCH 7/8] guide: rewrite 'What is ignored?' section in code_detection.md --- guide/docs/code_detection.md | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/guide/docs/code_detection.md b/guide/docs/code_detection.md index ae69112..4ef7241 100644 --- a/guide/docs/code_detection.md +++ b/guide/docs/code_detection.md @@ -11,11 +11,23 @@ It turns out that regular text messages aren't often identified as code. TGPy ig Although, optional disabling of auto-detection might be added in the future. -## Simple expressions +## What is ignored? -Simple names and constants are ignored. If you want to get some variable value, use `return variable`. +TL;DR: Some simple expressions, which could be email addresses, URLs or several comma- or hyphen-separated words +(as described in [issue 4](https://github.com/tm-a-t/TGPy/issues/4)) + +??? note "More details" + In this section, an **unknown** variable is one not present in `locals` — that is, which were not saved in previous messages and which are not built in TGPy (as `ctx`, `orig`, `msg` and `print` are) + Unknown variables' attributes are also considered unknown + + **Ignored** expressions are expressions in the list below: + + * Constants like `1` or `"abcd"` and unknown variables + * Binary operations on unknown variables (recursively, i.e., `a - b -c` is also ignored in case `a`, `b`, `c` are unknown) + * Unary operations on constants or unknown variables + * Tuples of ignored expressions + * Multiple ignored expressions (i.e. separated by `;` or newline)**** -In future updates some other simple expressions will be ignored, too. ## Cancel evaluation From fc0e66d0fb32e0a9b632f5bfcd0de6435091a620 Mon Sep 17 00:00:00 2001 From: Anton Egorov Date: Tue, 2 Nov 2021 16:55:34 +0300 Subject: [PATCH 8/8] refactor: app/run_code/parse_code.py: shorter lines, better function names --- app/run_code/parse_code.py | 32 +++++++++++++++++++------------- 1 file changed, 19 insertions(+), 13 deletions(-) diff --git a/app/run_code/parse_code.py b/app/run_code/parse_code.py index e47a4b8..d442b35 100644 --- a/app/run_code/parse_code.py +++ b/app/run_code/parse_code.py @@ -6,32 +6,38 @@ class _Result: uses_orig = False -def _is_node_fp_word(node: ast.AST, locs: dict) -> bool: +def _is_node_unknown_variable(node: ast.AST, locs: dict) -> bool: """Check if AST node is a Name or Attribute not present in locals""" if isinstance(node, ast.Attribute) and isinstance(node.value, ast.Name): return node.value.id not in locs return isinstance(node, ast.Name) and node.id not in locs -def _check_node_on_false_binop(node: ast.AST, locs: dict) -> bool: - """Check if AST node can be an operand or binary operation (ast.BinOp, ast.Compare, ast.BoolOp) - with operands which do not pass _is_node_fp_word check""" - if _is_node_fp_word(node, locs): +def _is_node_suspicious_binop(node: ast.AST, locs: dict) -> bool: + """Check if AST node can be an operand of binary operation (ast.BinOp, ast.Compare, ast.BoolOp) + with operands which do not pass _is_node_unknown_variable check, or is such operation""" + if _is_node_unknown_variable(node, locs): return True if not isinstance(node, (ast.BoolOp, ast.BinOp, ast.Compare)): return False if isinstance(node, ast.Compare): - return _is_node_fp_word(node.left, locs) and all(_is_node_fp_word(x, locs) for x in node.comparators) - return all(_check_node_on_false_binop(operand, locs) for operand in ((node.left, node.right) if isinstance(node, ast.BinOp) else node.values)) + return _is_node_unknown_variable(node.left, locs) and all(_is_node_unknown_variable(x, locs) + for x in node.comparators) + return all(_is_node_suspicious_binop(operand, locs) + for operand in ((node.left, node.right) if isinstance(node, ast.BinOp) else node.values)) -def _is_node_false(node: ast.AST, locs: dict) -> bool: +def _ignore_node(node: ast.AST, locs: dict) -> bool: """Check if AST node didn't seem to be meant to be code""" return ( - isinstance(node, ast.Constant) or _is_node_fp_word(node, locs) - or isinstance(node, ast.UnaryOp) and isinstance(node.operand, (ast.Constant, ast.Name, ast.Attribute)) # Messages like "-1", "+spam" and "not foo.bar" - or _check_node_on_false_binop(node, locs) # Messages like one-two, one is two, one >= two, one.b in two.c - or isinstance(node, ast.Tuple) and all(_is_node_false(elt, locs) for elt in node.elts) # "(yes, understood)" + # Messages like "python", "123" or "example.com" + isinstance(node, ast.Constant) or _is_node_unknown_variable(node, locs) + # Messages like "-1", "+spam" and "not foo.bar" + or isinstance(node, ast.UnaryOp) and isinstance(node.operand, (ast.Constant, ast.Name, ast.Attribute)) + # Messages like one-two, one is two, one >= two, one.b in two.c + or _is_node_suspicious_binop(node, locs) + # Messages like "yes, understood" + or isinstance(node, ast.Tuple) and all(_ignore_node(elt, locs) for elt in node.elts) ) @@ -44,7 +50,7 @@ def parse_code(text: str, locs: dict) -> _Result: except (SyntaxError, ValueError): return result - if all(isinstance(body_item, ast.Expr) and _is_node_false(body_item.value, locs) for body_item in root.body): + if all(isinstance(body_item, ast.Expr) and _ignore_node(body_item.value, locs) for body_item in root.body): return result result.is_code = True