diff --git a/bin/arguments.rs b/bin/arguments.rs index 88a91ee3..16e9ba9d 100644 --- a/bin/arguments.rs +++ b/bin/arguments.rs @@ -1,7 +1,8 @@ -use clap::{Args, Parser, Subcommand}; +use clap::{Args, Parser, Subcommand, ValueEnum}; use std::path::PathBuf; +use regex::Regex; -#[derive(Clone, Debug, Eq, Parser, PartialEq)] +#[derive(Clone, Debug, Parser)] #[clap(author, version, about)] #[clap(args_conflicts_with_subcommands = true, infer_subcommands = true)] pub struct Arguments { @@ -21,21 +22,22 @@ impl Arguments { } /// Set the logging verbosity or level. -#[derive(Args, Copy, Clone, Debug, Eq, PartialEq)] +#[derive(Args, Copy, Clone, Debug)] pub struct Verbosity { #[clap( short, long, action = clap::ArgAction::Count, + global = true, help_heading("VERBOSITY"), conflicts_with_all(&["debug", "trace"]) )] /// Make the program more talkative. pub verbose: u8, - #[clap(short, long, help_heading("VERBOSITY"), conflicts_with_all(&["verbose", "trace"]))] + #[clap(short, long, global = true, help_heading("VERBOSITY"), conflicts_with_all(&["verbose", "trace"]))] /// Print debug messages. pub debug: bool, - #[clap(short, long, help_heading("VERBOSITY"), conflicts_with_all(&["verbose", "debug"]))] + #[clap(short, long, global = true, help_heading("VERBOSITY"), conflicts_with_all(&["verbose", "debug"]))] /// Print trace messages. pub trace: bool, } @@ -51,6 +53,40 @@ pub struct LanguageArguments { pub required: bool, } +/// Defines which heading will be included in the output. +#[derive(Args, Clone, Debug)] +pub struct HeadingArguments { + #[clap(short, long, help_heading("HEADING"), value_enum)] + /// The level of the heading to quote. + pub level: Option, + #[clap(short, long, help_heading("HEADING"))] + /// A regular expression to match the heading content with. + pub pattern: Option, +} + +#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, ValueEnum)] +pub enum HeadingLevel { + H1, + H2, + H3, + H4, + H5, + H6, +} + +impl From for pulldown_cmark::HeadingLevel { + fn from(level: HeadingLevel) -> Self { + match level { + HeadingLevel::H1 => pulldown_cmark::HeadingLevel::H1, + HeadingLevel::H2 => pulldown_cmark::HeadingLevel::H2, + HeadingLevel::H3 => pulldown_cmark::HeadingLevel::H3, + HeadingLevel::H4 => pulldown_cmark::HeadingLevel::H4, + HeadingLevel::H5 => pulldown_cmark::HeadingLevel::H5, + HeadingLevel::H6 => pulldown_cmark::HeadingLevel::H6, + } + } +} + /// The input and output stream arguments for extracting a single file. #[derive(Args, Clone, Debug, Eq, PartialEq)] pub struct ExtractCommand { @@ -68,6 +104,23 @@ pub struct ExtractCommand { pub matcher: LanguageArguments, } +/// The input and output stream arguments for extracting a section of markdown from a single file. +#[derive(Args, Clone, Debug)] +pub struct QuoteCommand { + /// The input stream to read Markdown from. Defaults to STDIN. + #[clap(short, long, help_heading("IO"))] + pub input: Option, + /// The output stream to write matching fenced code block contents to. Defaults to STDOUT. + /// The directory path to the file must already exist. + #[clap(short, long, help_heading("IO"))] + pub output: Option, + /// Overwrite the existing contents in the output stream. + #[clap(short, long, help_heading("IO"), requires("output"))] + pub force: bool, + #[clap(flatten)] + pub matcher: HeadingArguments, +} + #[derive(Clone, Debug, Eq, Parser, PartialEq)] /// Walks a directory tree, extracting each matching file found during the walk and outputting the contents to the output directory with the `.md` extension removed. pub struct WalkCommand { @@ -90,7 +143,8 @@ pub struct WalkCommand { } /// The sub-command to execute. -#[derive(Clone, Debug, Eq, PartialEq, Subcommand)] +#[derive(Clone, Debug, Subcommand)] pub enum Commands { + Quote(QuoteCommand), Walk(WalkCommand), } diff --git a/bin/main.rs b/bin/main.rs index 0de4d04e..56d57000 100644 --- a/bin/main.rs +++ b/bin/main.rs @@ -1,7 +1,7 @@ -use crate::arguments::{Commands, ExtractCommand, LanguageArguments, Verbosity, WalkCommand}; +use crate::arguments::{Commands, ExtractCommand, HeadingArguments, LanguageArguments, QuoteCommand, Verbosity, WalkCommand}; use anyhow::Result; use arguments::Arguments; -use literate::{CodeMatcher, LanguageMatcher, LiterateError}; +use literate::{CodeMatcher, HeadingMatcher, LanguageMatcher, LiterateError, PatternMatcher}; use std::fs::File; use std::io::ErrorKind::BrokenPipe; use std::io::{stdin, stdout, Read, Write}; @@ -39,6 +39,7 @@ fn set_verbosity(verbosity: Verbosity) -> Result<()> { fn run_subcommand(arguments: Arguments) -> Result<()> { match arguments.command { None => run_extraction(arguments.extract), + Some(Commands::Quote(command)) => run_quote(command), Some(Commands::Walk(command)) => run_walk(command), } } @@ -69,6 +70,32 @@ fn run_extraction(arguments: ExtractCommand) -> Result<()> { } } +fn run_quote(arguments: QuoteCommand) -> Result<()> { + let input: Box = match arguments.input { + None => Box::new(stdin()), + Some(path) => Box::new(File::open(path)?), + }; + + let output: Box = match arguments.output { + None => Box::new(stdout()), + Some(path) => Box::new( + File::options() + .write(true) + .create(true) + .truncate(true) + .create_new(!arguments.force) + .open(path)?, + ), + }; + + let matcher: Box = arguments.matcher.into(); + match literate::quote(input, output, matcher) { + Ok(bytes) => Ok(info!("Extracted {bytes} bytes into the output directory.")), + Err(LiterateError::IO(error)) if error.kind() == BrokenPipe => Ok(()), + Err(error) => Ok(eprintln!("{error}")), + } +} + fn run_walk(command: WalkCommand) -> Result<()> { let matcher: Box = command.matcher.into(); @@ -85,6 +112,12 @@ fn run_walk(command: WalkCommand) -> Result<()> { Ok(()) } +impl From for Box { + fn from(arguments: HeadingArguments) -> Self { + Box::new(PatternMatcher::new(arguments.level.map(pulldown_cmark::HeadingLevel::from), arguments.pattern)) + } +} + impl From for Box { fn from(arguments: LanguageArguments) -> Self { match arguments.language { diff --git a/examples/tortuga.ta.md b/examples/tortuga.ta.md index 655bea8c..a727e30a 100644 --- a/examples/tortuga.ta.md +++ b/examples/tortuga.ta.md @@ -36,6 +36,8 @@ f((x, y)) = y - x ``` ## Factorial +Calculate the factorial of an integer recursively: + ``` factorial(n = 0) = 1 factorial(n.0 > 0) = n * factorial(n - 1) @@ -47,4 +49,7 @@ fibonacci(n <= 1) = n fibonacci(n) = [ fibonacci(n - 2) + fibonacci(n - 1) ] -``` \ No newline at end of file +``` + +## +Empty heading \ No newline at end of file diff --git a/src/matcher.rs b/src/code.rs similarity index 97% rename from src/matcher.rs rename to src/code.rs index dd2ea876..78ff0b1a 100644 --- a/src/matcher.rs +++ b/src/code.rs @@ -65,7 +65,7 @@ impl Display for LanguageMatcher { impl LanguageMatcher { /// Creates a new [`LanguageMatcher`]. pub fn new(language: String, required: bool) -> Self { - LanguageMatcher { language, required } + Self { language, required } } } diff --git a/src/heading.rs b/src/heading.rs new file mode 100644 index 00000000..570b41c7 --- /dev/null +++ b/src/heading.rs @@ -0,0 +1,99 @@ +use std::fmt::{Display, Formatter}; +use pulldown_cmark::HeadingLevel; +use regex::Regex; + +/// Determines whether a heading should be included in the output. +pub trait HeadingMatcher { + /// Tests whether this heading should be included in the output. + fn matches(&self, level: HeadingLevel, contents: Option<&str>) -> bool; +} + +impl HeadingMatcher for Box { + fn matches(&self, level: HeadingLevel, contents: Option<&str>) -> bool { + (**self).matches(level, contents) + } +} + +impl HeadingMatcher for &Matcher { + fn matches(&self, level: HeadingLevel, contents: Option<&str>) -> bool { + (*self).matches(level, contents) + } +} + +impl HeadingMatcher for bool { + fn matches(&self, _: HeadingLevel, _: Option<&str>) -> bool { + *self + } +} + +impl HeadingMatcher for HeadingLevel { + fn matches(&self, level: HeadingLevel, _: Option<&str>) -> bool { + *self == level + } +} + +impl HeadingMatcher for Option { + fn matches(&self, level: HeadingLevel, _: Option<&str>) -> bool { + self.map(|expected| expected == level).unwrap_or(true) + } +} + +impl HeadingMatcher for Option { + fn matches(&self, _: HeadingLevel, contents: Option<&str>) -> bool { + match (self.as_ref(), contents) { + (Some(regex), Some(contents)) => regex.is_match(contents), + (Some(_), None) => false, + (None, _) => true, + } + } +} + +impl HeadingMatcher for str { + fn matches(&self, _: HeadingLevel, contents: Option<&str>) -> bool { + contents.map(|c| self == c).unwrap_or(false) + } +} + +impl HeadingMatcher for Option<&str> { + fn matches(&self, _: HeadingLevel, contents: Option<&str>) -> bool { + match self { + None => true, + _ => *self == contents + } + } +} + +/// Matches the header against an regular expression for the contents and an optional level. +/// Exposes control over whether to include fenced code blocks without a language in the output. +#[derive(Clone, Debug)] +pub struct PatternMatcher { + level: Option, + pattern: Option, +} + +impl Display for PatternMatcher { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self.level.as_ref() { + None => f.write_str("h*"), + Some(level) => level.fmt(f) + }?; + + match self.pattern.as_ref() { + None => f.write_str(" *"), + Some(pattern) => write!(f, " {}", pattern) + } + } +} + +impl PatternMatcher { + /// Creates a new [`PatternMatcher`]. + pub fn new(level: Option, pattern: Option) -> Self { + Self { level, pattern } + } +} + +impl HeadingMatcher for PatternMatcher { + fn matches(&self, level: HeadingLevel, contents: Option<&str>) -> bool { + HeadingMatcher::matches(&self.level, level, contents) && HeadingMatcher::matches(&self.pattern, level, contents) + } +} diff --git a/src/lib.rs b/src/lib.rs index 339f27dc..85c7317b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,18 +1,20 @@ #![cfg_attr(docsrs, feature(doc_cfg))] -use pulldown_cmark::{CodeBlockKind, Event, Options, Parser, Tag}; +use pulldown_cmark::{CodeBlockKind, Event, HeadingLevel, Options, Parser, Tag}; use std::io::{BufReader, Read, Write}; use tracing::trace; +mod code; mod error; -mod matcher; +mod heading; #[cfg(feature = "walk")] #[cfg_attr(docsrs, doc(cfg(feature = "walk")))] pub mod walk; +pub use code::{CodeMatcher, LanguageMatcher}; +pub use heading::{HeadingMatcher, PatternMatcher}; pub use error::LiterateError; -pub use matcher::{CodeMatcher, LanguageMatcher}; const MINIMUM_CAPACITY: usize = 1024; @@ -43,6 +45,8 @@ where let mut bytes = 0; for event in parser { + trace!("Received event: {:?}", event); + match event { Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(language))) => { printing = matcher.matches(Some(&*language).filter(|s| !s.trim().is_empty())); @@ -54,7 +58,77 @@ where Event::End(Tag::CodeBlock(CodeBlockKind::Fenced(_))) => { printing = false; } - event => trace!("Received event: {:?}", event), + _ => {}, + } + } + + output.flush()?; + + Ok(bytes) +} + +#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +enum QuoteState<'a> { + Candidate(HeadingLevel, &'a str), + Printing(HeadingLevel, &'a str), + Searching +} + +/// Extracts fenced code blocks from the input. +/// If the matcher returns [`true`] for the language, the contents of the code block are written to the output. +/// Otherwise, the contents are ignored. +/// If the language of the fenced code block is blank (empty or blank space), the matcher will get [`None`] as the language. +/// +/// Returns the number of extracted bytes. +pub fn quote( + input: Input, + mut output: Output, + matcher: Matcher, +) -> Result + where + Input: Read, + Output: Write, + Matcher: HeadingMatcher, +{ + let mut buffer = BufReader::new(input); + let mut contents = String::with_capacity(MINIMUM_CAPACITY); + + buffer.read_to_string(&mut contents)?; + + let parser = Parser::new_ext(contents.as_str(), Options::all()); + + let mut state = QuoteState::Searching; + let mut bytes = 0; + + for (event, range) in parser.into_offset_iter() { + trace!("Received event: {:?} for excerpt: {:?}", event, &contents[range.clone()]); + + match (state, event) { + (QuoteState::Searching, Event::Start(Tag::Heading(level, ..))) => { + state = QuoteState::Candidate(level, &contents[range]); + } + (QuoteState::Printing(level, _), Event::Start(Tag::Heading(l, ..))) if level <= l => { + state = QuoteState::Candidate(level, &contents[range]); + } + (QuoteState::Candidate(level, excerpt), Event::Text(body)) => { + if matcher.matches(level, Some(&*body).filter(|s| !s.trim().is_empty())) { + state = QuoteState::Printing(level, excerpt); + } else { + state = QuoteState::Searching; + } + } + (QuoteState::Candidate(..), _) => { + state = QuoteState::Searching; + } + (QuoteState::Printing(..), _) => { + if bytes <= range.start { + let excerpt = &contents[range].as_bytes(); + output.write_all(excerpt)?; + output.write_all(b"\n")?; + bytes += excerpt.len(); + } + } + (_, _) => {} } }