Skip to content

Commit

Permalink
Fix Read{,List}[] with TokenWord and improve ...
Browse files Browse the repository at this point in the history
* Read{,List}[] with TokenWord needs to return two tokens sometimes
* Adjust error message the right value of Read or ReadList is used.
* simplify logic in eval/files_io/read.py
* Add data_dir variable that can be used in testing.
  • Loading branch information
rocky committed Sep 29, 2024
1 parent a551242 commit 77d5f60
Show file tree
Hide file tree
Showing 8 changed files with 148 additions and 51 deletions.
76 changes: 62 additions & 14 deletions mathics/builtin/files_io/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
MathicsOpen,
channel_to_stream,
close_stream,
parse_read_options,
read_name_and_stream,
)
from mathics.eval.makeboxes import do_format, format_element
Expand Down Expand Up @@ -703,7 +704,9 @@ def eval_default(self, exprs, filename, evaluation):
def validate_read_type(name: str, typ, evaluation: Evaluation):
"""
Validate a Read option type, and give a message if
the type is invalid. For Expession[Hold]
the type is invalid. For Expession[Hold], we convert it to
SymbolHoldExpression, String names are like "Byte" are
converted to Symbols in the return.
"""
if hasattr(typ, "head") and typ.head == SymbolHold:
if not hasattr(typ, "elements"):
Expand All @@ -715,6 +718,26 @@ def validate_read_type(name: str, typ, evaluation: Evaluation):
return None

return SymbolHoldExpression

if isinstance(typ, String):
typ = Symbol(typ.value)
elif not isinstance(typ, Symbol):
evaluation.message(name, "readf", typ)
return None

if typ.short_name not in (
"Byte",
"Character",
"Expression",
"Number",
"Real",
"Record",
"String",
"Word",
):
evaluation.message(name, "readf", typ)
return None

return typ


Expand Down Expand Up @@ -791,11 +814,11 @@ class Read(Builtin):
= 5
#> Close[stream];
Reading a comment however will return the empty list:
Reading a comment, a non-expression, will return 'Hold[Null]'
>> stream = StringToStream["(* ::Package:: *)"];
>> Read[stream, Hold[Expression]]
= {}
= Hold[Null]
#> Close[stream];
Expand Down Expand Up @@ -868,14 +891,26 @@ def eval(self, stream, types, evaluation: Evaluation, options: dict):
if new_type is None:
return
checked_types.append(new_type)
check_types = tuple(checked_types)
checked_types = tuple(checked_types)
else:
new_type = validate_read_type("Read", types, evaluation)
if new_type is None:
return
checked_types = (new_type,)

return eval_Read(name, n, checked_types, stream, evaluation, options)
result = eval_Read("Read", n, checked_types, stream, evaluation, options)
if isinstance(result, list):
if isinstance(types, ListExpression):
assert len(result) == len(
types.elements
), "internal error: eval_Read() should have a return for each type"
else:
assert (
len(result) == 1
), f"internal error: eval_Read() should return at most 1 element; got {result}"
return result[0]

return from_python(result)


class ReadList(Read):
Expand Down Expand Up @@ -982,7 +1017,10 @@ class ReadList(Read):
>> InputForm[%]
= {123, abc}
"""
messages = {"opstl": "Value of option `1` should be a string or a list of strings."}
messages = {
"opstl": "Value of option `1` should be a string or a list of strings.",
"readf": "`1` is not a valid format specification.",
}
options = {
"NullRecords": "False",
"NullWords": "False",
Expand All @@ -998,17 +1036,16 @@ class ReadList(Read):
def eval(self, file, types, evaluation: Evaluation, options: dict):
"ReadList[file_, types_, OptionsPattern[ReadList]]"

py_options = parse_read_options(options)
# Options
# TODO: Implement extra options
# py_options = parse_read_options(options)
# null_records = py_options['NullRecords']
# null_words = py_options['NullWords']
# record_separators = py_options['RecordSeparators']
# token_words = py_options['TokenWords']
# word_separators = py_options['WordSeparators']

result = []
name, n, stream = read_name_and_stream(file, evaluation)

# FIXME: DRY better with Read[].
# Validate types parameter and store the
Expand All @@ -1020,30 +1057,41 @@ def eval(self, file, types, evaluation: Evaluation, options: dict):
if new_type is None:
return
checked_types.append(new_type)
check_types = tuple(checked_types)
checked_types = tuple(checked_types)
else:
new_type = validate_read_type("ReadList", types, evaluation)
if new_type is None:
return
checked_types = (new_type,)

name, n, stream = read_name_and_stream(file, evaluation)

if name is None:
return
elif name == SymbolFailed:
return SymbolFailed

while True:
tmp = eval_Read(name, n, checked_types, stream, evaluation, options)
next_elt = eval_Read(
"ReadList", n, checked_types, stream, evaluation, options
)

if tmp is None:
if next_elt is None:
return

if tmp is SymbolFailed:
if next_elt is SymbolFailed:
return

if tmp is SymbolEndOfFile:
if next_elt is SymbolEndOfFile:
break
result.append(tmp)

if isinstance(next_elt, list) and py_options["TokenWords"]:
# FIXME: This might not be correct in all cases.
# we probably need a more positive way to indicate whether next_elt
# was returned from TokenWord parsing or not.
result += next_elt
else:
result.append(next_elt)
return from_python(result)

def eval_n(self, file, types, n: Integer, evaluation: Evaluation, options: dict):
Expand Down
1 change: 1 addition & 0 deletions mathics/core/systemsymbols.py
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@
SymbolVariance = Symbol("System`Variance")
SymbolWhitespace = Symbol("System`Whitespace")
SymbolWhitespaceCharacter = Symbol("System`WhitespaceCharacter")
SymbolWord = Symbol("System`Word")
SymbolWordBoundary = Symbol("System`WordBoundary")
SymbolWordCharacter = Symbol("System`WordCharacter")
SymbolXor = Symbol("System`Xor")
42 changes: 32 additions & 10 deletions mathics/eval/files_io/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
SymbolHoldExpression,
SymbolPath,
SymbolReal,
SymbolWord,
)
from mathics.core.util import canonic_filename
from mathics.eval.files_io.read import (
Expand Down Expand Up @@ -128,13 +129,14 @@ def eval_Read(
name: str, n: int, types: tuple, stream, evaluation: Evaluation, options: dict
):
"""
Evaluation method for Read[] and ReadList[]
Evaluation method for Read[] and ReadList[]. `name` will be either "Read" or
"ReadList" and is used in error messages
"""
types = to_mathics_list(*types)

for typ in types.elements:
if typ not in READ_TYPES:
evaluation.message("Read", "readf", typ)
evaluation.message(name, "readf", typ)
return SymbolFailed

separators = read_get_separators(options, evaluation)
Expand Down Expand Up @@ -199,7 +201,7 @@ def eval_Read(

if expr is SymbolEndOfFile:
evaluation.message(
"Read", "readt", tmp, to_expression("InputSteam", name, n)
name, "readt", tmp, to_expression("InputSteam", name, n)
)
return SymbolFailed
elif isinstance(expr, BaseElement):
Expand All @@ -219,7 +221,7 @@ def eval_Read(
tmp = float(tmp)
except ValueError:
evaluation.message(
"Read", "readn", to_expression("InputSteam", name, n)
name, "readn", to_expression("InputSteam", name, n)
)
return SymbolFailed
result.append(tmp)
Expand All @@ -231,7 +233,7 @@ def eval_Read(
tmp = float(tmp)
except ValueError:
evaluation.message(
"Read", "readn", to_expression("InputSteam", name, n)
name, "readn", to_expression("InputSteam", name, n)
)
return SymbolFailed
result.append(tmp)
Expand All @@ -242,17 +244,37 @@ def eval_Read(
if len(tmp) == 0:
raise EOFError
result.append(tmp.rstrip("\n"))
elif typ is Symbol("Word"):
result.append(next(read_word))
elif typ is SymbolWord:
# next() for word tokens can return one or two words:
# the next word in the list and a following TokenWord
# match. Therefore, test for this and do list-like
# appending here.

# THINK ABOUT: We might need to reconsider/refactor
# other cases to allow for multiple words as well. And
# for uniformity, we may want to redo the generators to
# always return *lists* instead instead of either a
# word or a list (which is always at most two words?)
words = next(read_word)
if not isinstance(words, list):
words = [words]
result += words

except EOFError:
return SymbolEndOfFile
except UnicodeDecodeError:
evaluation.message("General", "ucdec")
evaluation.message(name, "ucdec")

if isinstance(result, Symbol):
return result
if len(result) == 1:
return from_python(*result)
if isinstance(result, list):
result_len = len(result)
if result_len == 0:
if SymbolHoldExpression in types:
return Expression(SymbolHold, SymbolNull)
elif result_len == 2 and SymbolWord in types:
return [from_python(part) for part in result]
elif result_len == 1:
result = result[0]

return from_python(result)
31 changes: 10 additions & 21 deletions mathics/eval/files_io/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,9 @@ def parse_read_options(options) -> dict:
string_quotes=False
)
assert isinstance(record_separators, list)
assert all(
isinstance(s, str) and s[0] == s[-1] == '"' for s in record_separators
)
# assert all(
# isinstance(s, str) and s[0] == s[-1] == '"' for s in record_separators
# )
record_separators = [s[1:-1] for s in record_separators]
result["RecordSeparators"] = record_separators

Expand All @@ -171,8 +171,6 @@ def parse_read_options(options) -> dict:
string_quotes=False
)
assert isinstance(word_separators, list)
assert all(isinstance(s, str) and s[0] == s[-1] == '"' for s in word_separators)
word_separators = [s[1:-1] for s in word_separators]
result["WordSeparators"] = word_separators

# NullRecords
Expand All @@ -190,7 +188,6 @@ def parse_read_options(options) -> dict:
# TokenWords
if "System`TokenWords" in keys:
token_words = options["System`TokenWords"].to_python(string_quotes=False)
assert token_words == []
result["TokenWords"] = token_words

return result
Expand Down Expand Up @@ -385,9 +382,7 @@ def read_from_stream(
else:
yield word
continue
last_word = word
word = ""
yield last_word
yield word
break

if tmp in word_separators:
Expand All @@ -396,30 +391,24 @@ def read_from_stream(
if stream.io.seekable():
stream.io.seek(stream.io.tell() - 1)
word += some_token_word_prefix
last_word = word
word = ""
some_token_word_prefix = ""
yield last_word
yield word
break

if accepted is not None and tmp not in accepted:
word += some_token_word_prefix
last_word = word
word = ""
some_token_word_prefix = ""
yield last_word
yield word
break

some_token_word_prefix += tmp
for token_word in token_words:
if token_word == some_token_word_prefix:
if word:
# Start here
last_word = word
word = ""
some_token_word_prefix = ""
yield last_word
yield token_word
yield [word, token_word]
else:
yield token_word
some_token_word_prefix = ""
break
else:
word += some_token_word_prefix
Expand Down
6 changes: 0 additions & 6 deletions test/builtin/files_io/test_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,12 +278,6 @@ def test_close():
# "{Read, InputStream, String, {Real}}",
# "",
# ),
(
r'stream = StringToStream["\"abc123\""];ReadList[stream, "Invalid"]//{#1[[0]],#1[[2]]}&',
("Invalid is not a valid format specification.",),
"{ReadList, Invalid}",
"",
),
("Close[stream];", None, "Null", ""),
(
'ReadList[StringToStream["a 1 b 2"], {Word, Number}, 1]',
Expand Down
Loading

0 comments on commit 77d5f60

Please sign in to comment.