Skip to content

Commit

Permalink
Add more test and tests for new stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
rocky committed Dec 11, 2024
1 parent 4eb5499 commit ab22393
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 7 deletions.
11 changes: 11 additions & 0 deletions mathics_scanner/prescanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def try_parse_base(start_shift: int, end_shift: int, base: int) -> None:
self.feeder.message("Syntax", "sntoct2")
elif last == 3:
self.feeder.message("Syntax", "sntoct1")
raise ScanError()
elif last == 4:
self.feeder.message("Syntax", "snthex")
else:
Expand Down Expand Up @@ -151,6 +152,16 @@ def try_parse_named_character(start_shift: int):
# Stay in same line fragment, but advance the cursor position.
self.pos = i + 1

# FIXME:
# The following code is boneheadedly wrong because
# the surrounding lexical context determines whether
# an escape sequences should be valid or not.
# For example, inside a comment, there is no such thing
# as an invalid escape sequence. And this cause \050 which is
# a valid escape sequence, parenthesis, to get treated like
# a grouping symbol inside of a string.
# ...
#
# In the following loop, we look for and replace escape
# sequences. The current character under consideration is at
# self.code[self.pos]. When an escape sequence is found at
Expand Down
9 changes: 8 additions & 1 deletion mathics_scanner/tokeniser.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,13 @@ def t_String(self, match: re.Match) -> Token:
# We have a \ at the end of a line.
self.incomplete()
skipped_chars.append(self.pos)

# Code below is in pre-scanner. We might decide
# later to move that code here.
# elif self.code[self.pos + 1] in "01234567":
# # See if we have an octal number.
# try_parse_base(1, 4, 8)

else:
# newlines (\n), tabs (\t) and double backslash
# "\\" have the backslash preserved. But for other
Expand All @@ -591,7 +598,7 @@ def t_String(self, match: re.Match) -> Token:
self.feeder.message(
"Syntax", "stresc", self.code[self.pos : self.pos + 2]
)
return Token("String", "", start)
raise ScanError()

self.pos += 2
else:
Expand Down
26 changes: 20 additions & 6 deletions test/test_string_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

import pytest

from mathics_scanner.errors import IncompleteSyntaxError
from mathics_scanner.errors import IncompleteSyntaxError, ScanError
from mathics_scanner.feed import SingleLineFeeder
from mathics_scanner.tokeniser import Token, Tokeniser

Expand All @@ -17,10 +17,19 @@ def check_string(source_text, expected_text: str):
assert token.text == expected_text


def incomplete_error(s: str):
with pytest.raises(IncompleteSyntaxError):
def incomplete_error(s: str, failure_msg: str):
with pytest.raises(IncompleteSyntaxError) as excinfo:
get_tokens(s)

assert excinfo, failure_msg


def scan_error(s: str, failure_msg: str):
with pytest.raises(ScanError) as excinfo:
get_tokens(s)

assert excinfo, failure_msg


def single_token(source_text) -> Token:
tokens = get_tokens(source_text)
Expand All @@ -45,9 +54,14 @@ def test_string():
for ctrl_char in ("\b", "\f", "\n", "\r", "\t"):
check_string(f'"a{ctrl_char}"', f'"a{ctrl_char}"')

incomplete_error(r'"a\X"')
# Broken:
# "a\050", "a\051" "a\052"
# Prescanning eagerly replaces the escape sequences with
# symbols "(", ")", or "*" respectively and this messes up parsing
# somehow.
check_string(r'"abc"', r'"abc"')
incomplete_error(r'"abc')
check_string(r'"abc(*def*)"', r'"abc(*def*)"')
check_string(r'"a\"b\\c"', r'"a\"b\\c"')
incomplete_error(r'"\"')
incomplete_error(r'"abc', "String does not have terminating quote")
incomplete_error(r'"\"', "Unterminated escape sequence")
incomplete_error(r'"a\X"', '"X" is not a valid escape character')

0 comments on commit ab22393

Please sign in to comment.