Skip to content

Commit

Permalink
Merge pull request #4 from TommYDeeee/ast
Browse files Browse the repository at this point in the history
ast implementation
  • Loading branch information
TommYDeeee authored Feb 9, 2024
2 parents 528b1f5 + ba83809 commit 85f79fd
Show file tree
Hide file tree
Showing 49 changed files with 3,054 additions and 727 deletions.
45 changes: 43 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@ logos = "0.13.0"
rowan-test = { git = "https://github.com/avast/avast-rowan.git" }
text-size = "1.1.1"
drop_bomb = "0.1.5"
ungrammar = "1.16.1"
itertools = "0.12.0"
xshell = "0.1"

[dev-dependencies]
goldenfile = "1.6.0"
globwalk = "0.9.1"
proc-macro2 = "1.0.78"
quote = "1.0.35"
3 changes: 1 addition & 2 deletions example.yar
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,5 @@ rule test
$a = "foo"
$b = "bar"
condition:
$a or
$b and true
$b and not true or false
}
3 changes: 3 additions & 0 deletions rustfmt.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
reorder_modules = false
use_small_heuristics = "Max"
edition = "2021"
71 changes: 37 additions & 34 deletions src/lexer/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,13 @@
use crate::{parser::syntaxkind::SyntaxKind, syntax::syntax_error::SyntaxError};
//! This module contains lexer for YARA language.
//! The lexer is implemented using `logos` crate.
//! The lexer is used to convert the input text into a stream of tokens.
//!
//! Logos tokens are converted to `SyntaxKind` which is used in the parser to build the syntax tree.

use crate::{
parser::syntax_kind::{SyntaxKind, T},
syntax::syntax_error::SyntaxError,
};
use logos::Logos;
use std::fmt;
use std::num::ParseIntError;
Expand Down Expand Up @@ -136,44 +145,38 @@ pub fn tokenize(text: &str) -> (Vec<Token>, Vec<SyntaxError>) {
SyntaxKind::ERROR
}
};
tokens.push(Token {
kind: syntaxkind,
len: token_len,
});
tokens.push(Token { kind: syntaxkind, len: token_len });
offset += range.len();
}

// Add EOF token at the end
tokens.push(Token {
kind: SyntaxKind::EOF,
len: 0.into(),
});
tokens.push(Token { kind: SyntaxKind::EOF, len: 0.into() });

(tokens, errors)
}

// Convert LogosToken to SyntaxKind
fn logos_tokenkind_to_syntaxkind(token: LogosToken) -> SyntaxKind {
match token {
LogosToken::Rule => SyntaxKind::RULE,
LogosToken::Strings => SyntaxKind::STRINGS,
LogosToken::Condition => SyntaxKind::CONDITION,
LogosToken::And => SyntaxKind::AND,
LogosToken::Or => SyntaxKind::OR,
LogosToken::Not => SyntaxKind::NOT,
LogosToken::Rule => SyntaxKind::RULE_KW,
LogosToken::Strings => SyntaxKind::STRINGS_KW,
LogosToken::Condition => SyntaxKind::CONDITION_KW,
LogosToken::And => SyntaxKind::AND_KW,
LogosToken::Or => SyntaxKind::OR_KW,
LogosToken::Not => SyntaxKind::NOT_KW,
LogosToken::Identifier(_) => SyntaxKind::IDENTIFIER,
LogosToken::Variable(_) => SyntaxKind::VARIABLE,
LogosToken::String(_) => SyntaxKind::STRING,
LogosToken::Assign => SyntaxKind::ASSIGN,
LogosToken::Colon => SyntaxKind::COLON,
LogosToken::LBrace => SyntaxKind::LBRACE,
LogosToken::RBrace => SyntaxKind::RBRACE,
LogosToken::LParen => SyntaxKind::LPAREN,
LogosToken::RParen => SyntaxKind::RPAREN,
LogosToken::Comma => SyntaxKind::COMMA,
LogosToken::String(_) => SyntaxKind::STRING_LIT,
LogosToken::Assign => T![=],
LogosToken::Colon => T![:],
LogosToken::LBrace => T!['{'],
LogosToken::RBrace => T!['}'],
LogosToken::LParen => T!['('],
LogosToken::RParen => T![')'],
LogosToken::Comma => T![,],
LogosToken::Number(_) => SyntaxKind::NUMBER,
LogosToken::True => SyntaxKind::TRUE,
LogosToken::False => SyntaxKind::FALSE,
LogosToken::True => SyntaxKind::TRUE_KW,
LogosToken::False => SyntaxKind::FALSE_KW,
LogosToken::Whitespace => SyntaxKind::WHITESPACE,
LogosToken::Comment | LogosToken::MultilineComment => SyntaxKind::COMMENT,
}
Expand Down Expand Up @@ -213,18 +216,18 @@ mod tests {
assert!(errors.is_empty());
assert_eq!(tokens.len(), 15);
assert_eq!(tokens[0].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[1].kind, SyntaxKind::RULE);
assert_eq!(tokens[1].kind, SyntaxKind::RULE_KW);
assert_eq!(tokens[2].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[3].kind, SyntaxKind::IDENTIFIER);
assert_eq!(tokens[4].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[5].kind, SyntaxKind::LBRACE);
assert_eq!(tokens[5].kind, SyntaxKind::L_BRACE);
assert_eq!(tokens[6].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[7].kind, SyntaxKind::CONDITION);
assert_eq!(tokens[7].kind, SyntaxKind::CONDITION_KW);
assert_eq!(tokens[8].kind, SyntaxKind::COLON);
assert_eq!(tokens[9].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[10].kind, SyntaxKind::VARIABLE);
assert_eq!(tokens[11].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[12].kind, SyntaxKind::RBRACE);
assert_eq!(tokens[12].kind, SyntaxKind::R_BRACE);
assert_eq!(tokens[13].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[14].kind, SyntaxKind::EOF);
}
Expand All @@ -242,28 +245,28 @@ mod tests {
assert_eq!(errors.len(), 1);
assert_eq!(tokens.len(), 25);
assert_eq!(tokens[0].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[1].kind, SyntaxKind::RULE);
assert_eq!(tokens[1].kind, SyntaxKind::RULE_KW);
assert_eq!(tokens[2].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[3].kind, SyntaxKind::IDENTIFIER);
assert_eq!(tokens[4].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[5].kind, SyntaxKind::LBRACE);
assert_eq!(tokens[5].kind, SyntaxKind::L_BRACE);
assert_eq!(tokens[6].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[7].kind, SyntaxKind::CONDITION);
assert_eq!(tokens[7].kind, SyntaxKind::CONDITION_KW);
assert_eq!(tokens[8].kind, SyntaxKind::COLON);
assert_eq!(tokens[9].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[10].kind, SyntaxKind::VARIABLE);
assert_eq!(tokens[11].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[12].kind, SyntaxKind::ASSIGN);
assert_eq!(tokens[13].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[14].kind, SyntaxKind::STRING);
assert_eq!(tokens[14].kind, SyntaxKind::STRING_LIT);
assert_eq!(tokens[15].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[16].kind, SyntaxKind::VARIABLE);
assert_eq!(tokens[17].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[18].kind, SyntaxKind::ASSIGN);
assert_eq!(tokens[19].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[20].kind, SyntaxKind::ERROR);
assert_eq!(tokens[21].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[22].kind, SyntaxKind::RBRACE);
assert_eq!(tokens[22].kind, SyntaxKind::R_BRACE);
assert_eq!(tokens[23].kind, SyntaxKind::WHITESPACE);
assert_eq!(tokens[24].kind, SyntaxKind::EOF);
}
Expand Down
Loading

0 comments on commit 85f79fd

Please sign in to comment.