Skip to content

Commit

Permalink
parser+lev+types: suggestions if unknown keyword found:
Browse files Browse the repository at this point in the history
If an identifier is found in place where a keyword should be (for
example at the start of a statement), the diagnostic now displays 3
suggestions similar suggestions, by using the levenshtein distance to
all known keywords:

    =========================== example/stmt.sql ===========================
    error[UnknownKeyword]: Unknown Keyword
     -> /home/teo/programming/sqleibniz/example/stmt.sql:65:1
     63 | -- https://www.sqlite.org/lang_view.html
     64 | DROP VIEW view_name;
     65 | SELET VIEW IF EXISTS schema_name.view_name;
        | ^^^^^ error occurs here.
        |
        ~ note: 'SELET' is not a know keyword, did you mean:
            - SELECT
            - SET
            - LEFT
      * UnknownKeyword: Source file contains an unknown keyword
    =============================== Summary ================================
    [-] example/stmt.sql:
        1 Error(s) detected
        0 Error(s) ignored

    => 0/1 Files verified successfully, 1 verification failed.
  • Loading branch information
xNaCly committed Nov 23, 2024
1 parent 90fcd43 commit 9397048
Show file tree
Hide file tree
Showing 4 changed files with 199 additions and 5 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ dynamic correctness. See below for a list of currently implemented features.
- [x] highlighting the error in the faulty code snippet
- [x] explanation why the specific error was ommitted based on its Rule
- [ ] possible fix suggestions
- [x] suggestions for unknown and possible misspelled keywords
- [ ] language server protocol
- [ ] diagnostics for full sqleibniz analysis
- [ ] snippets
Expand Down
33 changes: 31 additions & 2 deletions src/lev.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,35 @@
/// into the other
///
/// see: https://en.wikipedia.org/wiki/Levenshtein_distance
pub fn distance(a: &str, b: &str) -> usize {
todo!("levenshtein_distance");
pub fn distance(a: &[u8], b: &[u8]) -> usize {
// TODO: this implementation is naive at best, use the matrix approach
if b.len() == 0 {
a.len()
} else if a.len() == 0 {
b.len()
} else if a[0] == b[0] {
return distance(
a.get(1..).unwrap_or_default(),
b.get(1..).unwrap_or_default(),
);
} else {
let first = distance(a.get(1..).unwrap_or_default(), b);
let second = distance(a, b.get(1..).unwrap_or_default());
let third = distance(
a.get(1..).unwrap_or_default(),
b.get(1..).unwrap_or_default(),
);
[first, second, third].iter().min().map_or(0, |min| 1 + min)
}
}

#[cfg(test)]
mod lev {
use super::distance;

#[test]
fn kitten_sitting() {
// https://en.wikipedia.org/wiki/Levenshtein_distance#Example
assert_eq!(distance("kitten".as_bytes(), "sitting".as_bytes()), 3);
}
}
5 changes: 2 additions & 3 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -310,10 +310,9 @@ impl<'a> Parser<'a> {
self.errors.push(self.err(
"Unknown Keyword",
&format!(
"'{}' is not a know keyword, did you mean '{}'?",
"'{}' is not a know keyword, did you mean: \n\t- {}",
name,
// TODO: replace with top 3 result of levenstein
"levenstein results here"
Keyword::suggestions(name).join("\n\t- ").as_str()
),
self.cur()?,
Rule::UnknownKeyword,
Expand Down
165 changes: 165 additions & 0 deletions src/types/keyword.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
use std::collections::BTreeMap;

use crate::lev;

#[derive(Debug, PartialEq, Clone)]
#[allow(non_camel_case_types)]
#[allow(clippy::upper_case_acronyms)]
Expand Down Expand Up @@ -152,6 +156,167 @@ pub enum Keyword {
}

impl Keyword {
/// suggestions returns three suggestions based on their smallest Levenshtein_distance computed via lev::distance
pub fn suggestions(s: &str) -> Vec<&str> {
let input = s.to_uppercase();
let bytes = input.as_bytes();
vec![
"ABORT",
"ACTION",
"ADD",
"AFTER",
"ALL",
"ALTER",
"ALWAYS",
"ANALYZE",
"AND",
"AS",
"ASC",
"ATTACH",
"AUTOINCREMENT",
"BEFORE",
"BEGIN",
"BETWEEN",
"BY",
"CASCADE",
"CASE",
"CAST",
"CHECK",
"COLLATE",
"COLUMN",
"COMMIT",
"CONFLICT",
"CONSTRAINT",
"CREATE",
"CROSS",
"CURRENT",
"CURRENT_DATE",
"CURRENT_TIME",
"CURRENT_TIMESTAMP",
"DATABASE",
"DEFAULT",
"DEFERRABLE",
"DEFERRED",
"DELETE",
"DESC",
"DETACH",
"DISTINCT",
"DO",
"DROP",
"EACH",
"ELSE",
"END",
"ESCAPE",
"EXCEPT",
"EXCLUDE",
"EXCLUSIVE",
"EXISTS",
"EXPLAIN",
"FAIL",
"FILTER",
"FIRST",
"FOLLOWING",
"FOR",
"FOREIGN",
"FROM",
"FULL",
"GENERATED",
"GLOB",
"GROUP",
"GROUPS",
"HAVING",
"IF",
"IGNORE",
"IMMEDIATE",
"IN",
"INDEX",
"INDEXED",
"INITIALLY",
"INNER",
"INSERT",
"INSTEAD",
"INTERSECT",
"INTO",
"IS",
"ISNULL",
"JOIN",
"KEY",
"LAST",
"LEFT",
"LIKE",
"LIMIT",
"MATCH",
"MATERIALIZED",
"NATURAL",
"NO",
"NOT",
"NOTHING",
"NOTNULL",
"NULL",
"NULLS",
"OF",
"OFFSET",
"ON",
"OR",
"ORDER",
"OTHERS",
"OUTER",
"OVER",
"PARTITION",
"PLAN",
"PRAGMA",
"PRECEDING",
"PRIMARY",
"QUERY",
"RAISE",
"RANGE",
"RECURSIVE",
"REFERENCES",
"REGEXP",
"REINDEX",
"RELEASE",
"RENAME",
"REPLACE",
"RESTRICT",
"RETURNING",
"RIGHT",
"ROLLBACK",
"ROW",
"ROWS",
"SAVEPOINT",
"SELECT",
"SET",
"TABLE",
"TEMP",
"TEMPORARY",
"THEN",
"TIES",
"TO",
"TRANSACTION",
"TRIGGER",
"UNBOUNDED",
"UNION",
"UNIQUE",
"UPDATE",
"USING",
"VACUUM",
"VALUES",
"VIEW",
"VIRTUAL",
"WHEN",
"WHERE",
"WINDOW",
"WITH",
"WITHOUT",
]
.into_iter()
.map(|keyword| (lev::distance(bytes, keyword.as_bytes()), keyword))
.collect::<BTreeMap<usize, &str>>()
.into_values()
.take(3)
.collect()
}

pub fn from_str(s: &str) -> Option<Keyword> {
Some(match s.to_uppercase().as_str() {
"ABORT" => Keyword::ABORT,
Expand Down

0 comments on commit 9397048

Please sign in to comment.