Skip to content

Commit

Permalink
Improve handling bad bytes in source files (#354)
Browse files Browse the repository at this point in the history
  • Loading branch information
Akuli authored Mar 22, 2023
1 parent 67db3fd commit 9627cdc
Show file tree
Hide file tree
Showing 7 changed files with 13 additions and 7 deletions.
4 changes: 2 additions & 2 deletions compare_compilers.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ function append_line()
local line="$2"
echo -e " ${YELLOW}Adding $line to $file${RESET}"

if grep -q $'\r' $error_list_file; then
if [ -f $file ] && grep -q $'\r' $file; then
# CRLF line endings (likely Windows, but depends on git settings)
printf "%s\r\n" "$line" >> "$file"
else
Expand Down Expand Up @@ -91,7 +91,7 @@ for action in tokenize parse run; do
(./jou $flag $file || true) &> tmp/compare_compilers/compiler_written_in_c.txt
(./self_hosted_compiler $flag $file || true) &> tmp/compare_compilers/self_hosted.txt

if grep -qxF $file <(cat $error_list_file | tr -d '\r'); then
if [ -f $error_list_file ] && grep -qxF $file <(cat $error_list_file | tr -d '\r'); then
# The file is skipped, so the two compilers should behave differently
if diff tmp/compare_compilers/compiler_written_in_c.txt tmp/compare_compilers/self_hosted.txt >/dev/null; then
if [ $fix = yes ]; then
Expand Down
1 change: 0 additions & 1 deletion self_hosted/parses_wrong.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ tests/should_succeed/local_import.jou
tests/should_succeed/method.jou
tests/should_succeed/printf.jou
tests/should_succeed/sizeof.jou
tests/syntax_error/bad_byte.jou
tests/wrong_type/arrow_operator_not_pointer_method.jou
tests/wrong_type/float_and_double.jou
tests/should_succeed/linked_list.jou
Expand Down
1 change: 0 additions & 1 deletion self_hosted/runs_wrong.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ tests/should_succeed/undefined_value_warning.jou
tests/should_succeed/unreachable_warning.jou
tests/should_succeed/unused_import.jou
tests/syntax_error/bad_addressof.jou
tests/syntax_error/bad_byte.jou
tests/syntax_error/self_outside_class.jou
tests/wrong_type/arrow_operator_not_pointer_method.jou
tests/wrong_type/assign_to_deref_non_pointer.jou
Expand Down
7 changes: 6 additions & 1 deletion self_hosted/tokenizer.jou
Original file line number Diff line number Diff line change
Expand Up @@ -495,11 +495,16 @@ class Tokenizer:
self->unread_byte(b)
token.kind = TokenKind::Operator
token.short_string = self->read_operator()
elif b == '\t':
fail(self->location, "Jou files cannot contain tab characters (use 4 spaces for indentation)")
elif b == '\0':
token.kind = TokenKind::EndOfFile
else:
message: byte[100]
sprintf(message, "unexpected byte %#02x", b)
if b < 0x80 and isprint(b) != 0:
sprintf(message, "unexpected byte '%c' (%#02x)", b, b)
else:
sprintf(message, "unexpected byte %#02x", b)
fail(self->location, message)

self->handle_parentheses(&token)
Expand Down
2 changes: 0 additions & 2 deletions self_hosted/tokenizes_wrong.txt

This file was deleted.

2 changes: 2 additions & 0 deletions src/tokenize.c
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,8 @@ static Token read_token(struct State *st)
break;
case '\'': t.type = TOKEN_CHAR; t.data.char_value = read_char_literal(st); break;
case '"': t.type = TOKEN_STRING; t.data.string_value = read_string(st, '"', NULL); break;
case '\t':
fail_with_error(st->location, "Jou files cannot contain tab characters (use 4 spaces for indentation)");
default:
if(is_identifier_or_number_byte(c)) {
read_identifier_or_number(st, c, &t.data.name);
Expand Down
3 changes: 3 additions & 0 deletions tests/syntax_error/tab.jou
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# This file contains a tab character. Make sure your editor doesn't ruin it.
def main() -> int:
return 0 # Error: Jou files cannot contain tab characters (use 4 spaces for indentation)

0 comments on commit 9627cdc

Please sign in to comment.