From fe5e7fc79b981d3208a34f598590cf91eb1d0fe9 Mon Sep 17 00:00:00 2001 From: Jim Kingdon Date: Sat, 30 Sep 2023 14:02:37 -0700 Subject: [PATCH] Fix set.mm check (#143) * Fix set.mm check First of all, use the develop branch rather than the master branch because the latter is rather old and we haven't been using it. Second of all, add --verify-markup and --parse-typesetting so that we run more of the checks (including most notably the ones which the set.mm automated checks enable). * `\r\n`-compatible newline detection --------- Co-authored-by: Mario Carneiro --- .github/workflows/verify.yml | 4 ++-- src/comment_parser.rs | 8 ++++---- src/parser.rs | 8 ++++---- src/verify_markup.rs | 9 +++++++++ 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/.github/workflows/verify.yml b/.github/workflows/verify.yml index 86c6e1e..4d0b023 100644 --- a/.github/workflows/verify.yml +++ b/.github/workflows/verify.yml @@ -48,5 +48,5 @@ jobs: run: cargo fmt -- --check - name: Verify official set.mm run: | - curl -s -G https://raw.githubusercontent.com/metamath/set.mm/master/set.mm > set.mm - cargo run -- --jobs 8 set.mm --verify --grammar --parse-stmt --verify-parse-stmt + curl -s -G https://raw.githubusercontent.com/metamath/set.mm/develop/set.mm > set.mm + cargo run -- --jobs 8 set.mm --verify --grammar --parse-stmt --verify-parse-stmt --verify-markup --parse-typesetting diff --git a/src/comment_parser.rs b/src/comment_parser.rs index 036f658..ae9e52f 100644 --- a/src/comment_parser.rs +++ b/src/comment_parser.rs @@ -304,7 +304,7 @@ impl<'a> CommentParser<'a> { while let Some(&c) = self.buf.get(self.pos) { match c { b'[' | b'`' if self.buf.get(self.pos + 1) == Some(&c) => self.pos += 2, - b' ' | b'\n' | b'`' => break, + b' ' | b'\r' | b'\n' | b'`' => break, b'[' if self.parse_bib().is_some() => break, b'<' if self.buf[self.pos..].starts_with(b"") || self.buf[self.pos..].starts_with(b"") => @@ -534,9 +534,9 @@ impl<'a> ParentheticalIter<'a> { pub fn new(buf: &'a [u8], span: Span) -> Self { lazy_static! { static ref PARENTHETICALS: Regex = Regex::new(concat!( - r"\((Contributed|Revised|Proof[ \n]+shortened)", - r"[ \n]+by[ \n]+([^,)]+),[ \n]+([0-9]{1,2}-[A-Z][a-z]{2}-[0-9]{4})\.\)|", - r"\((Proof[ \n]+modification|New[ \n]+usage)[ \n]+is[ \n]+discouraged\.\)", + r"\((Contributed|Revised|Proof[ \r\n]+shortened)", + r"[ \r\n]+by[ \r\n]+([^,)]+),[ \r\n]+([0-9]{1,2}-[A-Z][a-z]{2}-[0-9]{4})\.\)|", + r"\((Proof[ \r\n]+modification|New[ \r\n]+usage)[ \r\n]+is[ \r\n]+discouraged\.\)", )) .unwrap(); } diff --git a/src/parser.rs b/src/parser.rs index 310ca5c..c9863d3 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1110,13 +1110,13 @@ impl HeadingComment { pub fn parse(buf: &[u8], lvl: HeadingLevel, sp: Span) -> Option { lazy_static::lazy_static! { static ref MAJOR_PART: Regex = - Regex::new(r"^[ \n]+#{4,}\n *([^\n]*)\n#{4,}\n").unwrap(); + Regex::new(r"^[ \r\n]+#{4,}\r?\n *([^\n]*)\r?\n#{4,}\r?\n").unwrap(); static ref SECTION: Regex = - Regex::new(r"^[ \n]+(?:#\*){2,}#?\n *([^\n]*)\n(?:#\*){2,}#?\n").unwrap(); + Regex::new(r"^[ \r\n]+(?:#\*){2,}#?\r?\n *([^\n]*)\r?\n(?:#\*){2,}#?\r?\n").unwrap(); static ref SUBSECTION: Regex = - Regex::new(r"^[ \n]+(?:=-){2,}=?\n *([^\n]*)\n(?:=-){2,}=?\n").unwrap(); + Regex::new(r"^[ \r\n]+(?:=-){2,}=?\r?\n *([^\n]*)\r?\n(?:=-){2,}=?\r?\n").unwrap(); static ref SUBSUBSECTION: Regex = - Regex::new(r"^[ \n]+(?:-\.){2,}-?\n *([^\n]*)\n(?:-\.){2,}-?\n").unwrap(); + Regex::new(r"^[ \r\n]+(?:-\.){2,}-?\r?\n *([^\n]*)\r?\n(?:-\.){2,}-?\r?\n").unwrap(); } let regex = match lvl { HeadingLevel::MajorPart => &*MAJOR_PART, diff --git a/src/verify_markup.rs b/src/verify_markup.rs index f83fcc6..94d2168 100644 --- a/src/verify_markup.rs +++ b/src/verify_markup.rs @@ -286,6 +286,15 @@ impl Database { eol_check(&mut diags, &seg, line_start, i); line_start = i + 1; } + b'\r' => { + eol_check(&mut diags, &seg, line_start, i); + line_start = if seg.buffer.get(i + 1) == Some(&b'\n') { + iter.next(); + i + 2 + } else { + i + 1 + } + } b'\t' => { let count = seg.buffer[i..].iter().take_while(|&&c| c == b'\t').count(); for _ in 1..count {