Skip to content

Commit

Permalink
Save tokens position and print in on errors
Browse files Browse the repository at this point in the history
  • Loading branch information
lluiscamino committed Jul 2, 2024
1 parent fc18363 commit 95cd114
Show file tree
Hide file tree
Showing 6 changed files with 155 additions and 124 deletions.
62 changes: 36 additions & 26 deletions src/em/Lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,52 +10,54 @@ std::vector<Token> Lexer::scanTokens() {
while (mPosition < mProgram.size()) {
switch (auto c = mProgram[mPosition]) {
case '{':
tokens.emplace_back(TokenType::LEFT_BRACE, c);
tokens.emplace_back(TokenType::LEFT_BRACE, c, mLocation);
break;
case '}':
tokens.emplace_back(TokenType::RIGHT_BRACE, c);
tokens.emplace_back(TokenType::RIGHT_BRACE, c, mLocation);
break;
case '(':
tokens.emplace_back(TokenType::LEFT_PAREN, c);
tokens.emplace_back(TokenType::LEFT_PAREN, c, mLocation);
break;
case ')':
tokens.emplace_back(TokenType::RIGHT_PAREN, c);
tokens.emplace_back(TokenType::RIGHT_PAREN, c, mLocation);
break;
case ',':
tokens.emplace_back(TokenType::COMMA, c);
tokens.emplace_back(TokenType::COMMA, c, mLocation);
break;
case '|':
tokens.emplace_back(TokenType::VERTICAL_BAR, c);
tokens.emplace_back(TokenType::VERTICAL_BAR, c, mLocation);
break;
case ':':
if (mProgram[++mPosition] == '=') {
tokens.emplace_back(TokenType::ASSIGN, L":=");
if (mProgram[updatePosition()] == '=') {
tokens.emplace_back(TokenType::ASSIGN, L":=", mLocation);
break;
}
throw std::logic_error("Expected = after :");
throw std::logic_error("Expected '=' after ':' at " + mLocation.str());
case '=':
tokens.emplace_back(TokenType::EQUAL, c);
tokens.emplace_back(TokenType::EQUAL, c, mLocation);
break;
case u'':
tokens.emplace_back(TokenType::NOT_EQUAL, c);
tokens.emplace_back(TokenType::NOT_EQUAL, c, mLocation);
break;
case u'':
tokens.emplace_back(TokenType::UNION, c);
tokens.emplace_back(TokenType::UNION, c, mLocation);
break;
case u'':
tokens.emplace_back(TokenType::INTERSECTION, c);
tokens.emplace_back(TokenType::INTERSECTION, c, mLocation);
break;
case u'':
tokens.emplace_back(TokenType::SUBSET, c);
tokens.emplace_back(TokenType::SUBSET, c, mLocation);
break;
case u'':
tokens.emplace_back(TokenType::NOT_SUBSET, c);
tokens.emplace_back(TokenType::NOT_SUBSET, c, mLocation);
break;
case u'':
tokens.emplace_back(TokenType::ELEMENT_OF, c);
tokens.emplace_back(TokenType::ELEMENT_OF, c, mLocation);
break;
case '\n':
tokens.emplace_back(TokenType::LINE_BREAK, L"\\n");
tokens.emplace_back(TokenType::LINE_BREAK, L"\\n", mLocation);
mLocation.line++;
mLocation.column = 0;
break;
default:
if (std::isdigit(c)) {
Expand All @@ -65,13 +67,13 @@ std::vector<Token> Lexer::scanTokens() {
} else if (!std::isspace(c)) {
throw std::invalid_argument("Character " +
utils::string::wStringToString({c}) +
" is not valid");
" is not valid at " + mLocation.str());
}
break;
}
mPosition++;
updatePosition();
}
tokens.emplace_back(TokenType::END_OF_FILE, L"🔚");
tokens.emplace_back(TokenType::END_OF_FILE, L"🔚", mLocation);
return tokens;
}

Expand All @@ -81,12 +83,13 @@ Token Lexer::scanNumber() {
if (!std::isdigit(mProgram[mPosition])) {
break;
}
mPosition++;
updatePosition();
}
auto len = mPosition - start;
mPosition--;
updatePosition(-1);
auto substr = mProgram.substr(start, len);
return {TokenType::NUMBER, std::wstring(substr.cbegin(), substr.cend())};
return {TokenType::NUMBER, std::wstring(substr.cbegin(), substr.cend()),
mLocation};
}

Token Lexer::scanIdentifier() {
Expand All @@ -95,12 +98,19 @@ Token Lexer::scanIdentifier() {
if (!isValidForIdentifier(mProgram[mPosition])) {
break;
}
mPosition++;
updatePosition();
}
auto len = mPosition - start;
mPosition--;
updatePosition(-1);
auto substr = mProgram.substr(start, len);
return {TokenType::IDENTIFIER, std::wstring(substr.cbegin(), substr.cend())};
return {TokenType::IDENTIFIER, std::wstring(substr.cbegin(), substr.cend()),
mLocation};
}

unsigned int Lexer::updatePosition(int offset) {
mPosition += offset;
mLocation.column += offset;
return mPosition;
}

bool Lexer::isValidForIdentifier(wchar_t character) {
Expand Down
26 changes: 15 additions & 11 deletions src/em/Lexer.h
Original file line number Diff line number Diff line change
@@ -1,25 +1,29 @@
#pragma once

#include <vector>

#include "Token.h"

namespace em {

class Lexer {
public:
explicit Lexer(std::wstring mProgram);
class Lexer {
public:
explicit Lexer(std::wstring mProgram);

std::vector<Token> scanTokens();

std::vector<Token> scanTokens();
private:
Token scanNumber();

private:
Token scanNumber();
Token scanIdentifier();

Token scanIdentifier();
unsigned int updatePosition(int offset = 1);

static bool isValidForIdentifier(wchar_t character);
static bool isValidForIdentifier(wchar_t character);

std::wstring mProgram;
unsigned int mPosition{0};
};
std::wstring mProgram;
unsigned int mPosition{0};
Token::Location mLocation{1, 1};
};

} // namespace em
3 changes: 2 additions & 1 deletion src/em/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,8 @@ Token Parser::consume(TokenType tokenType) {
if (match(tokenType)) {
return previous();
}
throw std::logic_error("Expected " + TokenTypeToString(tokenType));
throw std::logic_error("Expected " + TokenTypeToString(tokenType) + " at " +
mTokens[mPosition].location().str());
}

bool Parser::match(TokenType tokenType) { return match({tokenType}); }
Expand Down
13 changes: 8 additions & 5 deletions src/em/Token.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,22 @@
#include "utils/StringUtils.h"

namespace em {
Token::Token(TokenType type, std::wstring text)
: mType(type), mText(std::move(text)) {}
Token::Token(TokenType type, std::wstring text, Location location)
: mType(type), mText(std::move(text)), mLocation(location) {}

Token::Token(TokenType type, wchar_t text)
: mType(type), mText(std::wstring{text}) {}
Token::Token(TokenType type, wchar_t text, Location location)
: mType(type), mText(std::wstring{text}), mLocation(location) {}

TokenType Token::type() const { return mType; }

std::wstring Token::text() const { return mText; }

Token::Location Token::location() const { return mLocation; }

std::ostream& operator<<(std::ostream& os, const Token& token) {
os << "(" << TokenTypeToString(token.mType) << ", '"
<< utils::string::wStringToString(token.mText) << "')";
<< utils::string::wStringToString(token.mText) << "', ' "
<< token.mLocation.str().c_str() << "')";
return os;
}
} // namespace em
164 changes: 88 additions & 76 deletions src/em/Token.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,89 +6,101 @@

namespace em {

enum class TokenType {
LEFT_BRACE,
RIGHT_BRACE,
LEFT_PAREN,
RIGHT_PAREN,
COMMA,
VERTICAL_BAR,

ASSIGN,

EQUAL,
NOT_EQUAL,
UNION,
INTERSECTION,
SUBSET,
NOT_SUBSET,
ELEMENT_OF,

NUMBER,
IDENTIFIER,

LINE_BREAK,
END_OF_FILE
};
enum class TokenType {
LEFT_BRACE,
RIGHT_BRACE,
LEFT_PAREN,
RIGHT_PAREN,
COMMA,
VERTICAL_BAR,

inline std::string TokenTypeToString(TokenType tokenType) {
switch (tokenType) {
case TokenType::LEFT_BRACE:
return "LEFT_BRACKET";
case TokenType::RIGHT_BRACE:
return "RIGHT_BRACKET";
case TokenType::LEFT_PAREN:
return "LEFT_PAREN";
case TokenType::RIGHT_PAREN:
return "RIGHT_PAREN";
case TokenType::COMMA:
return "COMMA";
case TokenType::VERTICAL_BAR:
return "VERTICAL_BAR";
case TokenType::ASSIGN:
return "ASSIGN";
case TokenType::NUMBER:
return "NUMBER";
case TokenType::EQUAL:
return "EQUAL";
case TokenType::NOT_EQUAL:
return "NOT_EQUAL";
case TokenType::UNION:
return "UNION";
case TokenType::INTERSECTION:
return "INTERSECTION";
case TokenType::SUBSET:
return "SUBSET";
case TokenType::NOT_SUBSET:
return "NOT_SUBSET";
case TokenType::ELEMENT_OF:
return "ELEMENT_OF";
case TokenType::IDENTIFIER:
return "IDENTIFIER";
case TokenType::LINE_BREAK:
return "LINE_BREAK";
case TokenType::END_OF_FILE:
return "END_OF_FILE";
default:
assert(false);
}
}
ASSIGN,

class Token {
public:
Token(TokenType type, std::wstring text);
EQUAL,
NOT_EQUAL,
UNION,
INTERSECTION,
SUBSET,
NOT_SUBSET,
ELEMENT_OF,

Token(TokenType type, wchar_t text);
NUMBER,
IDENTIFIER,

[[nodiscard]] TokenType type() const;
LINE_BREAK,
END_OF_FILE
};

[[nodiscard]] std::wstring text() const;
inline std::string TokenTypeToString(TokenType tokenType) {
switch (tokenType) {
case TokenType::LEFT_BRACE:
return "LEFT_BRACKET";
case TokenType::RIGHT_BRACE:
return "RIGHT_BRACKET";
case TokenType::LEFT_PAREN:
return "LEFT_PAREN";
case TokenType::RIGHT_PAREN:
return "RIGHT_PAREN";
case TokenType::COMMA:
return "COMMA";
case TokenType::VERTICAL_BAR:
return "VERTICAL_BAR";
case TokenType::ASSIGN:
return "ASSIGN";
case TokenType::NUMBER:
return "NUMBER";
case TokenType::EQUAL:
return "EQUAL";
case TokenType::NOT_EQUAL:
return "NOT_EQUAL";
case TokenType::UNION:
return "UNION";
case TokenType::INTERSECTION:
return "INTERSECTION";
case TokenType::SUBSET:
return "SUBSET";
case TokenType::NOT_SUBSET:
return "NOT_SUBSET";
case TokenType::ELEMENT_OF:
return "ELEMENT_OF";
case TokenType::IDENTIFIER:
return "IDENTIFIER";
case TokenType::LINE_BREAK:
return "LINE_BREAK";
case TokenType::END_OF_FILE:
return "END_OF_FILE";
default:
assert(false);
}
}

friend std::ostream& operator<<(std::ostream& os, const Token& token);
class Token {
public:
struct Location {
unsigned int line, column;

private:
TokenType mType;
std::wstring mText;
[[nodiscard]] std::string str() const {
return "line: " + std::to_string(line) +
", col:" + std::to_string(column);
}
};

Token(TokenType type, std::wstring text, Location location);

Token(TokenType type, wchar_t text, Location location);

[[nodiscard]] TokenType type() const;

[[nodiscard]] std::wstring text() const;

[[nodiscard]] Location location() const;

friend std::ostream& operator<<(std::ostream& os, const Token& token);

private:
TokenType mType;
std::wstring mText;
Location mLocation;
};

} // namespace em
Loading

0 comments on commit 95cd114

Please sign in to comment.