Skip to content

Commit

Permalink
Merge pull request #9 from tm-a-t/filter-code
Browse files Browse the repository at this point in the history
Fix false positive code detections as described in #4
  • Loading branch information
tm-a-t authored Nov 2, 2021
2 parents 6a99cb1 + fc0e66d commit 04fc88e
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 11 deletions.
6 changes: 4 additions & 2 deletions app/handlers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from app import client, message_design
from app.handlers.uitls import _handle_errors, outgoing_messages_filter
from app.run_code.parse_code import parse_code
from app.run_code import eval_message
from app.run_code import eval_message, get_kwargs


async def handle_message(message: Message) -> None:
Expand All @@ -17,7 +17,9 @@ async def handle_message(message: Message) -> None:
await message.edit(message.text[2:])
return

res = parse_code(raw_text)
locals_ = get_kwargs()

res = parse_code(raw_text, locals_)
if not res.is_code:
return

Expand Down
4 changes: 4 additions & 0 deletions app/run_code/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@
from app.run_code.variables import variables


def get_kwargs(include_orig=True):
return list(variables.keys()) + ['ctx', 'msg', 'print', 'client'] + ['orig'] if include_orig else []


async def eval_message(code: str, message: Message, uses_orig=False) -> None:
await message_design.edit_message(message, code, 'Running...')

Expand Down
45 changes: 39 additions & 6 deletions app/run_code/parse_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,52 @@ class _Result:
uses_orig = False


def parse_code(text: str):
def _is_node_unknown_variable(node: ast.AST, locs: dict) -> bool:
"""Check if AST node is a Name or Attribute not present in locals"""
if isinstance(node, ast.Attribute) and isinstance(node.value, ast.Name):
return node.value.id not in locs
return isinstance(node, ast.Name) and node.id not in locs


def _is_node_suspicious_binop(node: ast.AST, locs: dict) -> bool:
"""Check if AST node can be an operand of binary operation (ast.BinOp, ast.Compare, ast.BoolOp)
with operands which do not pass _is_node_unknown_variable check, or is such operation"""
if _is_node_unknown_variable(node, locs):
return True
if not isinstance(node, (ast.BoolOp, ast.BinOp, ast.Compare)):
return False
if isinstance(node, ast.Compare):
return _is_node_unknown_variable(node.left, locs) and all(_is_node_unknown_variable(x, locs)
for x in node.comparators)
return all(_is_node_suspicious_binop(operand, locs)
for operand in ((node.left, node.right) if isinstance(node, ast.BinOp) else node.values))


def _ignore_node(node: ast.AST, locs: dict) -> bool:
"""Check if AST node didn't seem to be meant to be code"""
return (
# Messages like "python", "123" or "example.com"
isinstance(node, ast.Constant) or _is_node_unknown_variable(node, locs)
# Messages like "-1", "+spam" and "not foo.bar"
or isinstance(node, ast.UnaryOp) and isinstance(node.operand, (ast.Constant, ast.Name, ast.Attribute))
# Messages like one-two, one is two, one >= two, one.b in two.c
or _is_node_suspicious_binop(node, locs)
# Messages like "yes, understood"
or isinstance(node, ast.Tuple) and all(_ignore_node(elt, locs) for elt in node.elts)
)


def parse_code(text: str, locs: dict) -> _Result:
"""Parse given text and decide should it be evaluated as Python code"""
result = _Result()

try:
root = ast.parse(text, '', 'exec')
except (SyntaxError, ValueError):
return result

if len(root.body) == 1 and isinstance(root.body[0], ast.Expr):
if isinstance(root.body[0].value, (ast.Constant, ast.Name)):
return result
if isinstance(root.body[0].value, ast.UnaryOp) and isinstance(root.body[0].value.operand, ast.Constant):
return result
if all(isinstance(body_item, ast.Expr) and _ignore_node(body_item.value, locs) for body_item in root.body):
return result

result.is_code = True

Expand Down
18 changes: 15 additions & 3 deletions guide/docs/code_detection.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,23 @@ It turns out that regular text messages aren't often identified as code. TGPy ig

Although, optional disabling of auto-detection might be added in the future.

## Simple expressions
## What is ignored?

Simple names and constants are ignored. If you want to get some variable value, use `return variable`.
TL;DR: Some simple expressions, which could be email addresses, URLs or several comma- or hyphen-separated words
(as described in [issue 4](https://github.com/tm-a-t/TGPy/issues/4))

??? note "More details"
In this section, an **unknown** variable is one not present in `locals` — that is, which were not saved in previous messages and which are not built in TGPy (as `ctx`, `orig`, `msg` and `print` are)
Unknown variables' attributes are also considered unknown

**Ignored** expressions are expressions in the list below:

* Constants like `1` or `"abcd"` and unknown variables
* Binary operations on unknown variables (recursively, i.e., `a - b -c` is also ignored in case `a`, `b`, `c` are unknown)
* Unary operations on constants or unknown variables
* Tuples of ignored expressions
* Multiple ignored expressions (i.e. separated by `;` or newline)****

In future updates some other simple expressions will be ignored, too.

## Cancel evaluation

Expand Down

0 comments on commit 04fc88e

Please sign in to comment.