diff --git a/Exception/ExceptionHelper.php b/Exception/ExceptionHelper.php new file mode 100644 index 00000000..57110109 --- /dev/null +++ b/Exception/ExceptionHelper.php @@ -0,0 +1,161 @@ + $result['line'], + 'code' => $code, + 'column' => $column, + 'highlight' => self::getStringHighligher($column), + ]; + } + + /** + * Returns the last line with an error. If the error occurred on + * the line where there is no visible part, before complements + * it with the previous ones. + * + * @param array|string[] $textLines List of code lines + * @return string + */ + private static function getAffectedCodeAsString(array $textLines) + { + $result = ''; + $i = 0; + + while (\count($textLines) && ++$i) { + $textLine = \array_pop($textLines); + $result = $textLine . ($i > 1 ? "\n" . $result : ''); + + if (\trim($textLine)) { + break; + } + } + + return $result; + } + + /** + * The method draws the highlight of the error place. + * + * @param int $charsOffset Error offset in symbols + * @return string + */ + private static function getStringHighligher($charsOffset) + { + $prefix = ''; + + if ($charsOffset > 0) { + $prefix = \str_repeat(' ', $charsOffset); + } + + return $prefix . '↑'; + } + + /** + * Returns the error location in UTF characters by the offset in bytes. + * + * @param string $line The code line from which we get a offset in the characters + * @param int $bytesOffset Length of offset in bytes + * @return int + */ + private static function getMbColumnPosition($line, $bytesOffset) + { + $slice = \substr($line, 0, $bytesOffset); + + return \mb_strlen($slice, 'UTF-8'); + } + + /** + * Returns information about the error location: line, column and affected text lines. + * + * @param string $text The source code in which we search for a line and a column + * @param int $bytesOffset Offset in bytes relative to the beginning of the source code + * @return array + */ + private static function getErrorInfo($text, $bytesOffset) + { + $result = [ + 'line' => 1, + 'column' => 0, + 'trace' => [], + ]; + + $current = 0; + + foreach (\explode("\n", $text) as $line => $code) { + $previous = $current; + $current += \strlen($code) + 1; + $result['trace'][] = $code; + + if ($current > $bytesOffset) { + return [ + 'line' => $line + 1, + 'column' => $bytesOffset - $previous, + 'trace' => $result['trace'] + ]; + } + } + + return $result; + } +} diff --git a/Exception/UnrecognizedToken.php b/Exception/UnrecognizedToken.php index 59963c07..214fd945 100644 --- a/Exception/UnrecognizedToken.php +++ b/Exception/UnrecognizedToken.php @@ -46,6 +46,8 @@ */ class UnrecognizedToken extends Exception { + use ExceptionHelper; + /** * Column. * @@ -53,8 +55,6 @@ class UnrecognizedToken extends Exception */ protected $column = 0; - - /** * Override line and add column support. * @@ -74,6 +74,25 @@ public function __construct($message, $code, $arg, $line, $column) return; } + /** + * @param string $message Formatted message. + * @param string $text Source code + * @param int $offsetInBytes Error offset in bytes + * @param int $code Code (the ID). + * @return static + */ + public static function fromOffset($message, $text, $offsetInBytes, $code = 0) + { + $info = self::getErrorPositionByOffset($text, $offsetInBytes); + + // Formatted message + $message .= ' at line %s and column %s' . \PHP_EOL . + $info['code'] . \PHP_EOL . + $info['highlight']; + + return new static($message, $code, [$info['line'], $info['column']], $info['line'], $info['column']); + } + /** * Get column. * diff --git a/Llk/Lexer.php b/Llk/Lexer.php index 68513678..cb69102f 100644 --- a/Llk/Lexer.php +++ b/Llk/Lexer.php @@ -83,8 +83,6 @@ class Lexer */ protected $_pcreOptions = null; - - /** * Constructor. * @@ -147,19 +145,9 @@ public function lexMe($text, array $tokens) $nextToken = $this->nextToken($offset); if (null === $nextToken) { - throw new Compiler\Exception\UnrecognizedToken( - 'Unrecognized token "%s" at line 1 and column %d:' . - "\n" . '%s' . "\n" . - str_repeat(' ', mb_strlen(substr($text, 0, $offset))) . '↑', - 0, - [ - mb_substr(substr($text, $offset), 0, 1), - $offset + 1, - $text - ], - 1, - $offset - ); + $error = \sprintf('Unrecognized token "%s"', \mb_substr(\substr($text, $offset), 0, 1)); + + throw Compiler\Exception\UnrecognizedToken::fromOffset($error, $text, $offset); } if (true === $nextToken['keep']) { diff --git a/Llk/Parser.php b/Llk/Parser.php index ec35d1d2..40f5a8bc 100644 --- a/Llk/Parser.php +++ b/Llk/Parser.php @@ -186,46 +186,18 @@ public function parse($text, $rule = null, $tree = true) } if (false === $this->backtrack()) { - $token = $this->_errorToken; + $token = $this->_errorToken; if (null === $this->_errorToken) { $token = $this->_tokenSequence->current(); } - $offset = $token['offset']; - $line = 1; - $column = 1; - - if (!empty($text)) { - if (0 === $offset) { - $leftnl = 0; - } else { - $leftnl = strrpos($text, "\n", -(strlen($text) - $offset) - 1) ?: 0; - } - - $rightnl = strpos($text, "\n", $offset); - $line = substr_count($text, "\n", 0, $leftnl + 1) + 1; - $column = $offset - $leftnl + (0 === $leftnl); - - if (false !== $rightnl) { - $text = trim(substr($text, $leftnl, $rightnl - $leftnl), "\n"); - } - } + $error = \vsprintf('Unexpected token "%s" (%s)', [ + $token['value'], + $token['token'], + ]); - throw new Compiler\Exception\UnexpectedToken( - 'Unexpected token "%s" (%s) at line %d and column %d:' . - "\n" . '%s' . "\n" . str_repeat(' ', $column - 1) . '↑', - 0, - [ - $token['value'], - $token['token'], - $line, - $column, - $text - ], - $line, - $column - ); + throw Compiler\Exception\UnexpectedToken::fromOffset($error, $text, $token['offset']); } } while (true);