diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..cb083446 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/target +.idea/ +*.iml \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 00000000..0823352e --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,100 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" +dependencies = [ + "memchr", +] + +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +dependencies = [ + "winapi", + "winapi-build", +] + +[[package]] +name = "libc" +version = "0.2.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8f7255a17a627354f321ef0055d63b898c6fb27eff628af4d1b66b7331edf6" + +[[package]] +name = "memchr" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" +dependencies = [ + "libc", +] + +[[package]] +name = "parsing-rust" +version = "0.1.0" +dependencies = [ + "regex", +] + +[[package]] +name = "regex" +version = "0.1.80" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", + "thread_local", + "utf8-ranges", +] + +[[package]] +name = "regex-syntax" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957" + +[[package]] +name = "thread-id" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" +dependencies = [ + "kernel32-sys", + "libc", +] + +[[package]] +name = "thread_local" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" +dependencies = [ + "thread-id", +] + +[[package]] +name = "utf8-ranges" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" + +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 00000000..b64c2ef5 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "parsing-rust" +version = "0.1.0" +edition = "2018" +license = "MIT OR Apache-2.0" + +[dependencies] +regex = "0.1" \ No newline at end of file diff --git a/rust-toolchain b/rust-toolchain index e69de29b..8c5f8f7b 100644 --- a/rust-toolchain +++ b/rust-toolchain @@ -0,0 +1 @@ +nightly-2021-01-05 diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 00000000..1d04cc45 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,9 @@ +max_width = 120 +newline_style = "Unix" +indent_style = "Block" +normalize_comments = true +brace_style = "PreferSameLine" +reorder_imports = true +reorder_impl_items = true +reorder_modules = true +tab_spaces = 2 \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 00000000..b585c096 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,208 @@ +#![feature(generic_associated_types)] +#![feature(associated_type_defaults)] +#![allow(incomplete_features)] +// https://github.com/fpinscala/fpinscala/blob/first-edition/answers/src/main/scala/fpinscala/parsing/Parsers.scala +use regex::Regex; + +pub struct Location { + input: String, + offset: u32, +} + +pub struct ParseError { + stack: Vec<(Location, String)>, +} + +pub trait Parser { + type Elm; + + fn or(&self, p: P) -> P + where + P: Parser; + + fn pure(a: A) -> P + where + P: Parser; + + fn map(self, f: F) -> P + where + P: Parser, + F: FnOnce(Self::Elm) -> B, + Self: Sized, { + self.flat_map(|e| Self::pure(f(e))) + } + + fn flat_map(self, f: F) -> P + where + P: Parser, + F: FnOnce(Self::Elm) -> P; +} + +pub trait Parsers { + type P; + + fn run(&self, p: Self::P, input: &str) -> Result; + + fn string(&self, s: String) -> Self::P; + + fn char(&self, c: char) -> Self::P { + let s = c.to_string(); + let p = self.string(s); + self.map(p, |e| e.chars().nth(0).unwrap()) + } + + fn default_succeed(&self, a: A) -> Self::P { + self.map(self.string("".to_string()),move |_| a) + } + + fn succeed(&self, a: A) -> Self::P; + + fn slice(&self, p: Self::P) -> Self::P; + + fn many1(&self, pf: PF) -> Self::P> + where + PF: Fn() -> Self::P, { + self.map2( + pf(), + || self.many(pf), + |a, b: Vec| { + let mut m = vec![]; + m.push(a); + m.extend(b); + m + }, + ) + } + + fn list_of_n(&self, n: i32, pf: PF) -> Self::P> + where + PF: Fn() -> Self::P, { + if n <= 0 { + self.succeed(Vec::new()) + } else { + self.map2( + pf(), + || self.list_of_n(n - 1, pf), + |a, b: Vec| { + let mut m = vec![]; + m.push(a); + m.extend(b); + m + }, + ) + } + } + + fn many(&self, pf: PF) -> Self::P> + where + PF: Fn() -> Self::P, { + let map2 = self.map2( + pf(), + || self.many(pf), + |a, b: Vec| { + let mut m = vec![]; + m.push(a); + m.extend(b); + m + }, + ); + self.or(map2, self.succeed(Vec::new())) + } + + fn or(&self, p1: Self::P, p2: Self::P) -> Self::P; + + fn flat_map(&self, p: Self::P, f: PF) -> Self::P + where + PF: FnOnce(A) -> Self::P; + + fn regex(&self, r: Regex) -> Self::P; + + fn product(&self, p1: Self::P, p2f: PF) -> Self::P<(A, B)> + where + PF: FnOnce() -> Self::P, { + self.flat_map(p1, |a| self.map(p2f(), move |b| (a, b))) + } + + fn map2(&self, pa: Self::P, pbf: PF, f: F) -> Self::P + where + PF: FnOnce() -> Self::P, + F: Fn(A, B) -> C, { + self.flat_map(pa, |a| self.map(pbf(), |b| f(a, b))) + } + + fn map(&self, p: Self::P, f: F) -> Self::P + where + F: FnOnce(A) -> B, { + self.flat_map(p, move |e| self.succeed(f(e))) + } + + fn label(&self, msg: String, p: Self::P) -> Self::P; + + fn scope(&self, msg: String, p: Self::P) -> Self::P; + + fn attempt(&self, p: Self::P) -> Self::P; + + fn skip_l(&self, p1: Self::P, p2f: PF) -> Self::P + where + PF: Fn() -> Self::P, { + self.map2(self.slice(p1), p2f, |_, b| b) + } + + fn skip_r(&self, p1: Self::P, p2f: PF) -> Self::P + where + PF: FnOnce() -> Self::P, { + self.map2(p1, || self.slice(p2f()), |a, _| a) + } + + fn opt(&self, p: Self::P) -> Self::P> { + self.or(self.map(p, Some), self.succeed(None)) + } + + fn whitespace(&self) -> Self::P { + self.regex(Regex::new("\\s*").unwrap()) + } + + fn digits(&self) -> Self::P { + self.regex(Regex::new("\\d+").unwrap()) + } + + fn double_string(&self) -> Self::P { + self.token(self.regex(Regex::new("[-+]?([0-9]*\\.)?[0-9]+([eE][-+]?[0-9]+)?").unwrap())) + } + + fn double(&self) -> Self::P { + let ds = self.double_string(); + let p = self.map(ds, |s: String| s.parse::().unwrap()); + self.label("double literal".to_string(), p) + } + + fn token(&self, p: Self::P) -> Self::P { + self.skip_r(self.attempt(p), || self.whitespace()) + } + + // fn sep(&self, p1: Self::P, p2f: PF) -> Self::P> + // where + // PF: FnOnce() -> Self::P; + + // fn sep1(&self, p1f: PF1, p2f: PF2) -> Self::P> + // where + // PF1: Fn() -> Self::P, + // PF2: Fn() -> Self::P, + // { + // let a = self.many(|| self.skip_l(p2f(), p1f)); + // self.map2( + // p1f(), + // || a, + // |a, b: Vec| { + // let mut m = vec![]; + // m.push(a); + // m.extend(b); + // m + // }, + // ) + // } +} + +fn main() { + println!("Hello, world!"); +}