From 01c32a5220ef036bdc1d9bae8928336a815db619 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 28 Nov 2023 11:40:33 +0100 Subject: [PATCH 1/7] fmt: clean up some small bits --- src/uu/fmt/src/linebreak.rs | 37 +++--- src/uu/fmt/src/parasplit.rs | 232 ++++++++++++++++++------------------ 2 files changed, 132 insertions(+), 137 deletions(-) diff --git a/src/uu/fmt/src/linebreak.rs b/src/uu/fmt/src/linebreak.rs index fbd990fff1e..7cd65d86149 100644 --- a/src/uu/fmt/src/linebreak.rs +++ b/src/uu/fmt/src/linebreak.rs @@ -46,7 +46,7 @@ pub fn break_lines( ostream: &mut BufWriter, ) -> std::io::Result<()> { // indent - let p_indent = ¶.indent_str[..]; + let p_indent = ¶.indent_str; let p_indent_len = para.indent_len; // words @@ -55,14 +55,12 @@ pub fn break_lines( // the first word will *always* appear on the first line // make sure of this here - let (w, w_len) = match p_words_words.next() { - Some(winfo) => (winfo.word, winfo.word_nchars), - None => { - return ostream.write_all(b"\n"); - } + let Some(winfo) = p_words_words.next() else { + return ostream.write_all(b"\n"); }; + // print the init, if it exists, and get its length - let p_init_len = w_len + let p_init_len = winfo.word_nchars + if opts.crown || opts.tagged { // handle "init" portion ostream.write_all(para.init_str.as_bytes())?; @@ -75,8 +73,9 @@ pub fn break_lines( // except that mail headers get no indent at all 0 }; + // write first word after writing init - ostream.write_all(w.as_bytes())?; + ostream.write_all(winfo.word.as_bytes())?; // does this paragraph require uniform spacing? let uniform = para.mail_header || opts.uniform; @@ -103,15 +102,16 @@ fn break_simple<'a, T: Iterator>>( mut iter: T, args: &mut BreakArgs<'a>, ) -> std::io::Result<()> { - iter.try_fold((args.init_len, false), |l, winfo| { - accum_words_simple(args, l, winfo) + iter.try_fold((args.init_len, false), |(l, prev_punct), winfo| { + accum_words_simple(args, l, prev_punct, winfo) })?; args.ostream.write_all(b"\n") } fn accum_words_simple<'a>( args: &mut BreakArgs<'a>, - (l, prev_punct): (usize, bool), + l: usize, + prev_punct: bool, winfo: &'a WordInfo<'a>, ) -> std::io::Result<(usize, bool)> { // compute the length of this word, considering how tabs will expand at this position on the line @@ -233,14 +233,14 @@ fn find_kp_breakpoints<'a, T: Iterator>>( linebreak: None, break_before: false, demerits: 0, - prev_rat: 0.0f32, + prev_rat: 0.0, length: args.init_len, fresh: false, }]; // this vec holds the current active linebreaks; next_ holds the breaks that will be active for // the next word - let active_breaks = &mut vec![0]; - let next_active_breaks = &mut vec![]; + let mut active_breaks = vec![0]; + let mut next_active_breaks = vec![]; let stretch = (args.opts.width - args.opts.goal) as isize; let minlength = args.opts.goal - stretch as usize; @@ -248,10 +248,7 @@ fn find_kp_breakpoints<'a, T: Iterator>>( let mut is_sentence_start = false; let mut least_demerits = 0; loop { - let w = match iter.next() { - None => break, - Some(w) => w, - }; + let Some(w) = iter.next() else { break }; // if this is the last word, we don't add additional demerits for this break let (is_last_word, is_sentence_end) = match iter.peek() { @@ -358,13 +355,13 @@ fn find_kp_breakpoints<'a, T: Iterator>>( least_demerits = cmp::max(ld_next, 0); } // swap in new list of active breaks - mem::swap(active_breaks, next_active_breaks); + mem::swap(&mut active_breaks, &mut next_active_breaks); // If this was the last word in a sentence, the next one must be the first in the next. is_sentence_start = is_sentence_end; } // return the best path - build_best_path(&linebreaks, active_breaks) + build_best_path(&linebreaks, &active_breaks) } fn build_best_path<'a>(paths: &[LineBreak<'a>], active: &[usize]) -> Vec<(&'a WordInfo<'a>, bool)> { diff --git a/src/uu/fmt/src/parasplit.rs b/src/uu/fmt/src/parasplit.rs index 68c8f78fa89..311ddbc9b83 100644 --- a/src/uu/fmt/src/parasplit.rs +++ b/src/uu/fmt/src/parasplit.rs @@ -52,18 +52,22 @@ impl Line { } } -// each line's prefix has to be considered to know whether to merge it with -// the next line or not +/// Each line's prefix has to be considered to know whether to merge it with +/// the next line or not #[derive(Debug)] pub struct FileLine { line: String, - indent_end: usize, // the end of the indent, always the start of the text - pfxind_end: usize, // the end of the PREFIX's indent, that is, the spaces before the prefix - indent_len: usize, // display length of indent taking into account tabs - prefix_len: usize, // PREFIX indent length taking into account tabs + /// The end of the indent, always the start of the text + indent_end: usize, + /// The end of the PREFIX's indent, that is, the spaces before the prefix + pfxind_end: usize, + /// Display length of indent taking into account tabs + indent_len: usize, + /// PREFIX indent length taking into account tabs + prefix_len: usize, } -// iterator that produces a stream of Lines from a file +/// Iterator that produces a stream of Lines from a file pub struct FileLines<'a> { opts: &'a FmtOptions, lines: Lines<&'a mut FileOrStdReader>, @@ -74,7 +78,7 @@ impl<'a> FileLines<'a> { FileLines { opts, lines } } - // returns true if this line should be formatted + /// returns true if this line should be formatted fn match_prefix(&self, line: &str) -> (bool, usize) { if !self.opts.use_prefix { return (true, 0); @@ -83,7 +87,7 @@ impl<'a> FileLines<'a> { FileLines::match_prefix_generic(&self.opts.prefix[..], line, self.opts.xprefix) } - // returns true if this line should be formatted + /// returns true if this line should be formatted fn match_anti_prefix(&self, line: &str) -> bool { if !self.opts.use_anti_prefix { return true; @@ -148,13 +152,7 @@ impl<'a> Iterator for FileLines<'a> { type Item = Line; fn next(&mut self) -> Option { - let n = match self.lines.next() { - Some(t) => match t { - Ok(tt) => tt, - Err(_) => return None, - }, - None => return None, - }; + let n = self.lines.next()?.ok()?; // if this line is entirely whitespace, // emit a blank line @@ -205,24 +203,33 @@ impl<'a> Iterator for FileLines<'a> { } } -// a paragraph : a collection of FileLines that are to be formatted -// plus info about the paragraph's indentation -// (but we only retain the String from the FileLine; the other info -// is only there to help us in deciding how to merge lines into Paragraphs +/// A paragraph : a collection of FileLines that are to be formatted +/// plus info about the paragraph's indentation +/// +/// We only retain the String from the FileLine; the other info +/// is only there to help us in deciding how to merge lines into Paragraphs #[derive(Debug)] pub struct Paragraph { - lines: Vec, // the lines of the file - pub init_str: String, // string representing the init, that is, the first line's indent - pub init_len: usize, // printable length of the init string considering TABWIDTH - init_end: usize, // byte location of end of init in first line String - pub indent_str: String, // string representing indent - pub indent_len: usize, // length of above - indent_end: usize, // byte location of end of indent (in crown and tagged mode, only applies to 2nd line and onward) - pub mail_header: bool, // we need to know if this is a mail header because we do word splitting differently in that case + /// the lines of the file + lines: Vec, + /// string representing the init, that is, the first line's indent + pub init_str: String, + /// printable length of the init string considering TABWIDTH + pub init_len: usize, + /// byte location of end of init in first line String + init_end: usize, + /// string representing indent + pub indent_str: String, + /// length of above + pub indent_len: usize, + /// byte location of end of indent (in crown and tagged mode, only applies to 2nd line and onward) + indent_end: usize, + /// we need to know if this is a mail header because we do word splitting differently in that case + pub mail_header: bool, } -// an iterator producing a stream of paragraphs from a stream of lines -// given a set of options. +/// An iterator producing a stream of paragraphs from a stream of lines +/// given a set of options. pub struct ParagraphStream<'a> { lines: Peekable>, next_mail: bool, @@ -240,7 +247,7 @@ impl<'a> ParagraphStream<'a> { } } - // detect RFC822 mail header + /// Detect RFC822 mail header fn is_mail_header(line: &FileLine) -> bool { // a mail header begins with either "From " (envelope sender line) // or with a sequence of printable ASCII chars (33 to 126, inclusive, @@ -276,12 +283,9 @@ impl<'a> Iterator for ParagraphStream<'a> { #[allow(clippy::cognitive_complexity)] fn next(&mut self) -> Option> { // return a NoFormatLine in an Err; it should immediately be output - let noformat = match self.lines.peek() { - None => return None, - Some(l) => match *l { - Line::FormatLine(_) => false, - Line::NoFormatLine(_, _) => true, - }, + let noformat = match self.lines.peek()? { + Line::FormatLine(_) => false, + Line::NoFormatLine(_, _) => true, }; // found a NoFormatLine, immediately dump it out @@ -305,95 +309,89 @@ impl<'a> Iterator for ParagraphStream<'a> { let mut in_mail = false; let mut second_done = false; // for when we use crown or tagged mode loop { - { - // peek ahead - // need to explicitly force fl out of scope before we can call self.lines.next() - let fl = match self.lines.peek() { - None => break, - Some(l) => match *l { - Line::FormatLine(ref x) => x, - Line::NoFormatLine(..) => break, - }, - }; + // peek ahead + // need to explicitly force fl out of scope before we can call self.lines.next() + let Some(Line::FormatLine(fl)) = self.lines.peek() else { + break; + }; - if p_lines.is_empty() { - // first time through the loop, get things set up - // detect mail header - if self.opts.mail && self.next_mail && ParagraphStream::is_mail_header(fl) { - in_mail = true; - // there can't be any indent or pfxind because otherwise is_mail_header - // would fail since there cannot be any whitespace before the colon in a - // valid header field - indent_str.push_str(" "); - indent_len = 2; + if p_lines.is_empty() { + // first time through the loop, get things set up + // detect mail header + if self.opts.mail && self.next_mail && ParagraphStream::is_mail_header(fl) { + in_mail = true; + // there can't be any indent or pfxind because otherwise is_mail_header + // would fail since there cannot be any whitespace before the colon in a + // valid header field + indent_str.push_str(" "); + indent_len = 2; + } else { + if self.opts.crown || self.opts.tagged { + init_str.push_str(&fl.line[..fl.indent_end]); + init_len = fl.indent_len; + init_end = fl.indent_end; } else { - if self.opts.crown || self.opts.tagged { - init_str.push_str(&fl.line[..fl.indent_end]); - init_len = fl.indent_len; - init_end = fl.indent_end; - } else { - second_done = true; - } - - // these will be overwritten in the 2nd line of crown or tagged mode, but - // we are not guaranteed to get to the 2nd line, e.g., if the next line - // is a NoFormatLine or None. Thus, we set sane defaults the 1st time around - indent_str.push_str(&fl.line[..fl.indent_end]); - indent_len = fl.indent_len; - indent_end = fl.indent_end; - - // save these to check for matching lines - prefix_len = fl.prefix_len; - pfxind_end = fl.pfxind_end; - - // in tagged mode, add 4 spaces of additional indenting by default - // (gnu fmt's behavior is different: it seems to find the closest column to - // indent_end that is divisible by 3. But honestly that behavior seems - // pretty arbitrary. - // Perhaps a better default would be 1 TABWIDTH? But ugh that's so big. - if self.opts.tagged { - indent_str.push_str(" "); - indent_len += 4; - } - } - } else if in_mail { - // lines following mail headers must begin with spaces - if fl.indent_end == 0 || (self.opts.use_prefix && fl.pfxind_end == 0) { - break; // this line does not begin with spaces + second_done = true; } - } else if !second_done { - // now we have enough info to handle crown margin and tagged mode - // in both crown and tagged modes we require that prefix_len is the same - if prefix_len != fl.prefix_len || pfxind_end != fl.pfxind_end { - break; - } - - // in tagged mode, indent has to be *different* on following lines - if self.opts.tagged - && indent_len - 4 == fl.indent_len - && indent_end == fl.indent_end - { - break; - } - - // this is part of the same paragraph, get the indent info from this line - indent_str.clear(); + // these will be overwritten in the 2nd line of crown or tagged mode, but + // we are not guaranteed to get to the 2nd line, e.g., if the next line + // is a NoFormatLine or None. Thus, we set sane defaults the 1st time around indent_str.push_str(&fl.line[..fl.indent_end]); indent_len = fl.indent_len; indent_end = fl.indent_end; - second_done = true; - } else { - // detect mismatch - if indent_end != fl.indent_end - || pfxind_end != fl.pfxind_end - || indent_len != fl.indent_len - || prefix_len != fl.prefix_len - { - break; + // save these to check for matching lines + prefix_len = fl.prefix_len; + pfxind_end = fl.pfxind_end; + + // in tagged mode, add 4 spaces of additional indenting by default + // (gnu fmt's behavior is different: it seems to find the closest column to + // indent_end that is divisible by 3. But honestly that behavior seems + // pretty arbitrary. + // Perhaps a better default would be 1 TABWIDTH? But ugh that's so big. + if self.opts.tagged { + indent_str.push_str(" "); + indent_len += 4; } } + } else if in_mail { + // lines following mail headers must begin with spaces + if fl.indent_end == 0 || (self.opts.use_prefix && fl.pfxind_end == 0) { + break; // this line does not begin with spaces + } + } else if !second_done { + // now we have enough info to handle crown margin and tagged mode + + // in both crown and tagged modes we require that prefix_len is the same + if prefix_len != fl.prefix_len || pfxind_end != fl.pfxind_end { + break; + } + + // in tagged mode, indent has to be *different* on following lines + if self.opts.tagged + && indent_len - 4 == fl.indent_len + && indent_end == fl.indent_end + { + break; + } + + // this is part of the same paragraph, get the indent info from this line + indent_str.clear(); + indent_str.push_str(&fl.line[..fl.indent_end]); + indent_len = fl.indent_len; + indent_end = fl.indent_end; + + second_done = true; + } else { + // detect mismatch + if indent_end != fl.indent_end + || pfxind_end != fl.pfxind_end + || indent_len != fl.indent_len + || prefix_len != fl.prefix_len + { + break; + } } p_lines.push(self.lines.next().unwrap().get_formatline().line); @@ -429,7 +427,7 @@ pub struct ParaWords<'a> { } impl<'a> ParaWords<'a> { - pub fn new<'b>(opts: &'b FmtOptions, para: &'b Paragraph) -> ParaWords<'b> { + pub fn new(opts: &'a FmtOptions, para: &'a Paragraph) -> Self { let mut pw = ParaWords { opts, para, From d78923e4ccda95db136c358913a09642f4ee9729 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 28 Nov 2023 11:54:43 +0100 Subject: [PATCH 2/7] fmt: extract determining options to separate function --- src/uu/fmt/src/fmt.rs | 194 +++++++++++++++++++++--------------------- 1 file changed, 95 insertions(+), 99 deletions(-) diff --git a/src/uu/fmt/src/fmt.rs b/src/uu/fmt/src/fmt.rs index c30d923b76b..3a494c868e5 100644 --- a/src/uu/fmt/src/fmt.rs +++ b/src/uu/fmt/src/fmt.rs @@ -5,7 +5,7 @@ // spell-checker:ignore (ToDO) PSKIP linebreak ostream parasplit tabwidth xanti xprefix -use clap::{crate_version, Arg, ArgAction, Command}; +use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; use std::cmp; use std::fs::File; use std::io::{stdin, stdout, Write}; @@ -40,6 +40,9 @@ static OPT_TAB_WIDTH: &str = "tab-width"; static ARG_FILES: &str = "files"; +// by default, goal is 93% of width +const DEFAULT_GOAL_TO_WIDTH_RATIO: usize = 93; + pub type FileOrStdReader = BufReader>; pub struct FmtOptions { crown: bool, @@ -59,25 +62,97 @@ pub struct FmtOptions { tabwidth: usize, } -impl Default for FmtOptions { - fn default() -> Self { - Self { - crown: false, - tagged: false, - mail: false, - uniform: false, - quick: false, - split_only: false, - use_prefix: false, - prefix: String::new(), - xprefix: false, - use_anti_prefix: false, - anti_prefix: String::new(), - xanti_prefix: false, - width: 75, - goal: 70, - tabwidth: 8, +impl FmtOptions { + fn from_matches(matches: &ArgMatches) -> UResult { + let mut tagged = matches.get_flag(OPT_TAGGED_PARAGRAPH); + let mut crown = matches.get_flag(OPT_CROWN_MARGIN); + + let mail = matches.get_flag(OPT_PRESERVE_HEADERS); + let uniform = matches.get_flag(OPT_UNIFORM_SPACING); + let quick = matches.get_flag(OPT_QUICK); + let split_only = matches.get_flag(OPT_SPLIT_ONLY); + + if crown { + tagged = false; + } + if split_only { + crown = false; + tagged = false; + } + + let xprefix = matches.contains_id(OPT_EXACT_PREFIX); + let xanti_prefix = matches.contains_id(OPT_SKIP_PREFIX); + + let mut prefix = String::new(); + let mut use_prefix = false; + if let Some(s) = matches.get_one::(OPT_PREFIX).map(String::from) { + prefix = s; + use_prefix = true; + }; + + let mut anti_prefix = String::new(); + let mut use_anti_prefix = false; + if let Some(s) = matches.get_one::(OPT_SKIP_PREFIX).map(String::from) { + anti_prefix = s; + use_anti_prefix = true; + }; + + let mut width = 75; + let mut goal = 70; + if let Some(w) = matches.get_one::(OPT_WIDTH) { + width = *w; + if width > MAX_WIDTH { + return Err(USimpleError::new( + 1, + format!("invalid width: '{}': Numerical result out of range", width), + )); + } + goal = cmp::min(width * DEFAULT_GOAL_TO_WIDTH_RATIO / 100, width - 3); + }; + + if let Some(g) = matches.get_one::(OPT_GOAL) { + goal = *g; + if !matches.contains_id(OPT_WIDTH) { + width = cmp::max(goal * 100 / DEFAULT_GOAL_TO_WIDTH_RATIO, goal + 3); + } else if goal > width { + return Err(USimpleError::new(1, "GOAL cannot be greater than WIDTH.")); + } + }; + + let mut tabwidth = 8; + if let Some(s) = matches.get_one::(OPT_TAB_WIDTH) { + tabwidth = match s.parse::() { + Ok(t) => t, + Err(e) => { + return Err(USimpleError::new( + 1, + format!("Invalid TABWIDTH specification: {}: {}", s.quote(), e), + )); + } + }; + }; + + if tabwidth < 1 { + tabwidth = 1; } + + Ok(Self { + crown, + tagged, + mail, + uniform, + quick, + split_only, + use_prefix, + prefix, + xprefix, + use_anti_prefix, + anti_prefix, + xanti_prefix, + width, + goal, + tabwidth, + }) } } @@ -90,12 +165,7 @@ impl Default for FmtOptions { /// # Returns /// /// A tuple containing a vector of file names and a `FmtOptions` struct. -#[allow(clippy::cognitive_complexity)] -#[allow(clippy::field_reassign_with_default)] fn parse_arguments(args: impl uucore::Args) -> UResult<(Vec, FmtOptions)> { - // by default, goal is 93% of width - const DEFAULT_GOAL_TO_WIDTH_RATIO: usize = 93; - let matches = uu_app().try_get_matches_from(args)?; let mut files: Vec = matches @@ -103,81 +173,7 @@ fn parse_arguments(args: impl uucore::Args) -> UResult<(Vec, FmtOptions) .map(|v| v.map(ToString::to_string).collect()) .unwrap_or_default(); - let mut fmt_opts = FmtOptions::default(); - - fmt_opts.tagged = matches.get_flag(OPT_TAGGED_PARAGRAPH); - if matches.get_flag(OPT_CROWN_MARGIN) { - fmt_opts.crown = true; - fmt_opts.tagged = false; - } - fmt_opts.mail = matches.get_flag(OPT_PRESERVE_HEADERS); - fmt_opts.uniform = matches.get_flag(OPT_UNIFORM_SPACING); - fmt_opts.quick = matches.get_flag(OPT_QUICK); - if matches.get_flag(OPT_SPLIT_ONLY) { - fmt_opts.split_only = true; - fmt_opts.crown = false; - fmt_opts.tagged = false; - } - fmt_opts.xprefix = matches.contains_id(OPT_EXACT_PREFIX); - fmt_opts.xanti_prefix = matches.contains_id(OPT_SKIP_PREFIX); - - if let Some(s) = matches.get_one::(OPT_PREFIX).map(String::from) { - fmt_opts.prefix = s; - fmt_opts.use_prefix = true; - }; - - if let Some(s) = matches.get_one::(OPT_SKIP_PREFIX).map(String::from) { - fmt_opts.anti_prefix = s; - fmt_opts.use_anti_prefix = true; - }; - - if let Some(width) = matches.get_one::(OPT_WIDTH) { - fmt_opts.width = *width; - if fmt_opts.width > MAX_WIDTH { - return Err(USimpleError::new( - 1, - format!( - "invalid width: '{}': Numerical result out of range", - fmt_opts.width, - ), - )); - } - fmt_opts.goal = cmp::min( - fmt_opts.width * DEFAULT_GOAL_TO_WIDTH_RATIO / 100, - fmt_opts.width - 3, - ); - }; - - if let Some(goal) = matches.get_one::(OPT_GOAL) { - fmt_opts.goal = *goal; - if !matches.contains_id(OPT_WIDTH) { - fmt_opts.width = cmp::max( - fmt_opts.goal * 100 / DEFAULT_GOAL_TO_WIDTH_RATIO, - fmt_opts.goal + 3, - ); - } else if fmt_opts.goal > fmt_opts.width { - return Err(USimpleError::new(1, "GOAL cannot be greater than WIDTH.")); - } - }; - - if let Some(s) = matches.get_one::(OPT_TAB_WIDTH) { - fmt_opts.tabwidth = match s.parse::() { - Ok(t) => t, - Err(e) => { - return Err(USimpleError::new( - 1, - format!("Invalid TABWIDTH specification: {}: {}", s.quote(), e), - )); - } - }; - }; - - if fmt_opts.tabwidth < 1 { - fmt_opts.tabwidth = 1; - } - - // immutable now - let fmt_opts = fmt_opts; + let fmt_opts = FmtOptions::from_matches(&matches)?; if files.is_empty() { files.push("-".to_owned()); From f5206ce783d1606432c20f67d8ab027fcab06e7c Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 28 Nov 2023 12:05:35 +0100 Subject: [PATCH 3/7] fmt: merge prefix and use_prefix options (same for anti_prefix) --- src/uu/fmt/src/fmt.rs | 23 ++++------------------- src/uu/fmt/src/parasplit.rs | 22 +++++++++------------- 2 files changed, 13 insertions(+), 32 deletions(-) diff --git a/src/uu/fmt/src/fmt.rs b/src/uu/fmt/src/fmt.rs index 3a494c868e5..3a02c642902 100644 --- a/src/uu/fmt/src/fmt.rs +++ b/src/uu/fmt/src/fmt.rs @@ -49,11 +49,9 @@ pub struct FmtOptions { tagged: bool, mail: bool, split_only: bool, - use_prefix: bool, - prefix: String, + prefix: Option, xprefix: bool, - use_anti_prefix: bool, - anti_prefix: String, + anti_prefix: Option, xanti_prefix: bool, uniform: bool, quick: bool, @@ -83,19 +81,8 @@ impl FmtOptions { let xprefix = matches.contains_id(OPT_EXACT_PREFIX); let xanti_prefix = matches.contains_id(OPT_SKIP_PREFIX); - let mut prefix = String::new(); - let mut use_prefix = false; - if let Some(s) = matches.get_one::(OPT_PREFIX).map(String::from) { - prefix = s; - use_prefix = true; - }; - - let mut anti_prefix = String::new(); - let mut use_anti_prefix = false; - if let Some(s) = matches.get_one::(OPT_SKIP_PREFIX).map(String::from) { - anti_prefix = s; - use_anti_prefix = true; - }; + let prefix = matches.get_one::(OPT_PREFIX).map(String::from); + let anti_prefix = matches.get_one::(OPT_SKIP_PREFIX).map(String::from); let mut width = 75; let mut goal = 70; @@ -143,10 +130,8 @@ impl FmtOptions { uniform, quick, split_only, - use_prefix, prefix, xprefix, - use_anti_prefix, anti_prefix, xanti_prefix, width, diff --git a/src/uu/fmt/src/parasplit.rs b/src/uu/fmt/src/parasplit.rs index 311ddbc9b83..f22400dff20 100644 --- a/src/uu/fmt/src/parasplit.rs +++ b/src/uu/fmt/src/parasplit.rs @@ -80,24 +80,20 @@ impl<'a> FileLines<'a> { /// returns true if this line should be formatted fn match_prefix(&self, line: &str) -> (bool, usize) { - if !self.opts.use_prefix { + let Some(prefix) = &self.opts.prefix else { return (true, 0); - } + }; - FileLines::match_prefix_generic(&self.opts.prefix[..], line, self.opts.xprefix) + FileLines::match_prefix_generic(prefix, line, self.opts.xprefix) } /// returns true if this line should be formatted fn match_anti_prefix(&self, line: &str) -> bool { - if !self.opts.use_anti_prefix { + let Some(anti_prefix) = &self.opts.anti_prefix else { return true; - } + }; - match FileLines::match_prefix_generic( - &self.opts.anti_prefix[..], - line, - self.opts.xanti_prefix, - ) { + match FileLines::match_prefix_generic(anti_prefix, line, self.opts.xanti_prefix) { (true, _) => false, (_, _) => true, } @@ -176,7 +172,7 @@ impl<'a> Iterator for FileLines<'a> { // not truly blank we will not allow mail headers on the // following line) if pmatch - && n[poffset + self.opts.prefix.len()..] + && n[poffset + self.opts.prefix.as_ref().map_or(0, |s| s.len())..] .chars() .all(char::is_whitespace) { @@ -190,7 +186,7 @@ impl<'a> Iterator for FileLines<'a> { } // figure out the indent, prefix, and prefixindent ending points - let prefix_end = poffset + self.opts.prefix.len(); + let prefix_end = poffset + self.opts.prefix.as_ref().map_or(0, |s| s.len()); let (indent_end, prefix_len, indent_len) = self.compute_indent(&n[..], prefix_end); Some(Line::FormatLine(FileLine { @@ -357,7 +353,7 @@ impl<'a> Iterator for ParagraphStream<'a> { } } else if in_mail { // lines following mail headers must begin with spaces - if fl.indent_end == 0 || (self.opts.use_prefix && fl.pfxind_end == 0) { + if fl.indent_end == 0 || (self.opts.prefix.is_some() && fl.pfxind_end == 0) { break; // this line does not begin with spaces } } else if !second_done { From 96ca5e609eacf4fd09316da2c4bbd165ff052273 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 28 Nov 2023 12:22:46 +0100 Subject: [PATCH 4/7] fmt: refactor width and goal calculation --- src/uu/fmt/src/fmt.rs | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/src/uu/fmt/src/fmt.rs b/src/uu/fmt/src/fmt.rs index 3a02c642902..0ed32641fbb 100644 --- a/src/uu/fmt/src/fmt.rs +++ b/src/uu/fmt/src/fmt.rs @@ -6,7 +6,6 @@ // spell-checker:ignore (ToDO) PSKIP linebreak ostream parasplit tabwidth xanti xprefix use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; -use std::cmp; use std::fs::File; use std::io::{stdin, stdout, Write}; use std::io::{BufReader, BufWriter, Read, Stdout}; @@ -84,28 +83,33 @@ impl FmtOptions { let prefix = matches.get_one::(OPT_PREFIX).map(String::from); let anti_prefix = matches.get_one::(OPT_SKIP_PREFIX).map(String::from); - let mut width = 75; - let mut goal = 70; - if let Some(w) = matches.get_one::(OPT_WIDTH) { - width = *w; - if width > MAX_WIDTH { - return Err(USimpleError::new( - 1, - format!("invalid width: '{}': Numerical result out of range", width), - )); + let width_opt = matches.get_one::(OPT_WIDTH); + let goal_opt = matches.get_one::(OPT_GOAL); + let (width, goal) = match (width_opt, goal_opt) { + (Some(&w), Some(&g)) => { + if g > w { + return Err(USimpleError::new(1, "GOAL cannot be greater than WIDTH.")); + } + (w, g) } - goal = cmp::min(width * DEFAULT_GOAL_TO_WIDTH_RATIO / 100, width - 3); - }; - - if let Some(g) = matches.get_one::(OPT_GOAL) { - goal = *g; - if !matches.contains_id(OPT_WIDTH) { - width = cmp::max(goal * 100 / DEFAULT_GOAL_TO_WIDTH_RATIO, goal + 3); - } else if goal > width { - return Err(USimpleError::new(1, "GOAL cannot be greater than WIDTH.")); + (Some(&w), None) => { + let g = (w * DEFAULT_GOAL_TO_WIDTH_RATIO / 100).min(w - 3); + (w, g) } + (None, Some(&g)) => { + let w = (g * 100 / DEFAULT_GOAL_TO_WIDTH_RATIO).max(g + 3); + (w, g) + } + (None, None) => (75, 70), }; + if width > MAX_WIDTH { + return Err(USimpleError::new( + 1, + format!("invalid width: '{}': Numerical result out of range", width), + )); + } + let mut tabwidth = 8; if let Some(s) = matches.get_one::(OPT_TAB_WIDTH) { tabwidth = match s.parse::() { From 8a494530572ca1a2221416a284144a9e44177f8e Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 28 Nov 2023 12:24:18 +0100 Subject: [PATCH 5/7] fmt: clean up imports --- src/uu/fmt/src/fmt.rs | 4 ++-- src/uu/fmt/src/linebreak.rs | 4 +--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/uu/fmt/src/fmt.rs b/src/uu/fmt/src/fmt.rs index 0ed32641fbb..3461a79ba7f 100644 --- a/src/uu/fmt/src/fmt.rs +++ b/src/uu/fmt/src/fmt.rs @@ -13,8 +13,8 @@ use uucore::display::Quotable; use uucore::error::{FromIo, UResult, USimpleError}; use uucore::{format_usage, help_about, help_usage, show_warning}; -use self::linebreak::break_lines; -use self::parasplit::ParagraphStream; +use linebreak::break_lines; +use parasplit::ParagraphStream; mod linebreak; mod parasplit; diff --git a/src/uu/fmt/src/linebreak.rs b/src/uu/fmt/src/linebreak.rs index 7cd65d86149..306c15f3614 100644 --- a/src/uu/fmt/src/linebreak.rs +++ b/src/uu/fmt/src/linebreak.rs @@ -5,10 +5,8 @@ // spell-checker:ignore (ToDO) INFTY MULT accum breakwords linebreak linebreaking linebreaks linelen maxlength minlength nchars ostream overlen parasplit plass posn powf punct signum slen sstart tabwidth tlen underlen winfo wlen wordlen -use std::cmp; -use std::i64; use std::io::{BufWriter, Stdout, Write}; -use std::mem; +use std::{cmp, i64, mem}; use uucore::crash; From 0b4d4b610cc510a7aff4095447ddf8195cf27072 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 28 Nov 2023 12:30:01 +0100 Subject: [PATCH 6/7] fmt: put options into module and change static to const --- src/uu/fmt/src/fmt.rs | 100 +++++++++++++++++++++--------------------- 1 file changed, 51 insertions(+), 49 deletions(-) diff --git a/src/uu/fmt/src/fmt.rs b/src/uu/fmt/src/fmt.rs index 3461a79ba7f..e44b7e0e5be 100644 --- a/src/uu/fmt/src/fmt.rs +++ b/src/uu/fmt/src/fmt.rs @@ -7,8 +7,7 @@ use clap::{crate_version, Arg, ArgAction, ArgMatches, Command}; use std::fs::File; -use std::io::{stdin, stdout, Write}; -use std::io::{BufReader, BufWriter, Read, Stdout}; +use std::io::{stdin, stdout, BufReader, BufWriter, Read, Stdout, Write}; use uucore::display::Quotable; use uucore::error::{FromIo, UResult, USimpleError}; use uucore::{format_usage, help_about, help_usage, show_warning}; @@ -19,25 +18,26 @@ use parasplit::ParagraphStream; mod linebreak; mod parasplit; -static ABOUT: &str = help_about!("fmt.md"); +const ABOUT: &str = help_about!("fmt.md"); const USAGE: &str = help_usage!("fmt.md"); -static MAX_WIDTH: usize = 2500; - -static OPT_CROWN_MARGIN: &str = "crown-margin"; -static OPT_TAGGED_PARAGRAPH: &str = "tagged-paragraph"; -static OPT_PRESERVE_HEADERS: &str = "preserve-headers"; -static OPT_SPLIT_ONLY: &str = "split-only"; -static OPT_UNIFORM_SPACING: &str = "uniform-spacing"; -static OPT_PREFIX: &str = "prefix"; -static OPT_SKIP_PREFIX: &str = "skip-prefix"; -static OPT_EXACT_PREFIX: &str = "exact-prefix"; -static OPT_EXACT_SKIP_PREFIX: &str = "exact-skip-prefix"; -static OPT_WIDTH: &str = "width"; -static OPT_GOAL: &str = "goal"; -static OPT_QUICK: &str = "quick"; -static OPT_TAB_WIDTH: &str = "tab-width"; - -static ARG_FILES: &str = "files"; +const MAX_WIDTH: usize = 2500; + +mod options { + pub const CROWN_MARGIN: &str = "crown-margin"; + pub const TAGGED_PARAGRAPH: &str = "tagged-paragraph"; + pub const PRESERVE_HEADERS: &str = "preserve-headers"; + pub const SPLIT_ONLY: &str = "split-only"; + pub const UNIFORM_SPACING: &str = "uniform-spacing"; + pub const PREFIX: &str = "prefix"; + pub const SKIP_PREFIX: &str = "skip-prefix"; + pub const EXACT_PREFIX: &str = "exact-prefix"; + pub const EXACT_SKIP_PREFIX: &str = "exact-skip-prefix"; + pub const WIDTH: &str = "width"; + pub const GOAL: &str = "goal"; + pub const QUICK: &str = "quick"; + pub const TAB_WIDTH: &str = "tab-width"; + pub const FILES: &str = "files"; +} // by default, goal is 93% of width const DEFAULT_GOAL_TO_WIDTH_RATIO: usize = 93; @@ -61,13 +61,13 @@ pub struct FmtOptions { impl FmtOptions { fn from_matches(matches: &ArgMatches) -> UResult { - let mut tagged = matches.get_flag(OPT_TAGGED_PARAGRAPH); - let mut crown = matches.get_flag(OPT_CROWN_MARGIN); + let mut tagged = matches.get_flag(options::TAGGED_PARAGRAPH); + let mut crown = matches.get_flag(options::CROWN_MARGIN); - let mail = matches.get_flag(OPT_PRESERVE_HEADERS); - let uniform = matches.get_flag(OPT_UNIFORM_SPACING); - let quick = matches.get_flag(OPT_QUICK); - let split_only = matches.get_flag(OPT_SPLIT_ONLY); + let mail = matches.get_flag(options::PRESERVE_HEADERS); + let uniform = matches.get_flag(options::UNIFORM_SPACING); + let quick = matches.get_flag(options::QUICK); + let split_only = matches.get_flag(options::SPLIT_ONLY); if crown { tagged = false; @@ -77,14 +77,16 @@ impl FmtOptions { tagged = false; } - let xprefix = matches.contains_id(OPT_EXACT_PREFIX); - let xanti_prefix = matches.contains_id(OPT_SKIP_PREFIX); + let xprefix = matches.contains_id(options::EXACT_PREFIX); + let xanti_prefix = matches.contains_id(options::SKIP_PREFIX); - let prefix = matches.get_one::(OPT_PREFIX).map(String::from); - let anti_prefix = matches.get_one::(OPT_SKIP_PREFIX).map(String::from); + let prefix = matches.get_one::(options::PREFIX).map(String::from); + let anti_prefix = matches + .get_one::(options::SKIP_PREFIX) + .map(String::from); - let width_opt = matches.get_one::(OPT_WIDTH); - let goal_opt = matches.get_one::(OPT_GOAL); + let width_opt = matches.get_one::(options::WIDTH); + let goal_opt = matches.get_one::(options::GOAL); let (width, goal) = match (width_opt, goal_opt) { (Some(&w), Some(&g)) => { if g > w { @@ -111,7 +113,7 @@ impl FmtOptions { } let mut tabwidth = 8; - if let Some(s) = matches.get_one::(OPT_TAB_WIDTH) { + if let Some(s) = matches.get_one::(options::TAB_WIDTH) { tabwidth = match s.parse::() { Ok(t) => t, Err(e) => { @@ -158,7 +160,7 @@ fn parse_arguments(args: impl uucore::Args) -> UResult<(Vec, FmtOptions) let matches = uu_app().try_get_matches_from(args)?; let mut files: Vec = matches - .get_many::(ARG_FILES) + .get_many::(options::FILES) .map(|v| v.map(ToString::to_string).collect()) .unwrap_or_default(); @@ -242,9 +244,9 @@ pub fn uu_app() -> Command { .override_usage(format_usage(USAGE)) .infer_long_args(true) .arg( - Arg::new(OPT_CROWN_MARGIN) + Arg::new(options::CROWN_MARGIN) .short('c') - .long(OPT_CROWN_MARGIN) + .long(options::CROWN_MARGIN) .help( "First and second line of paragraph \ may have different indentations, in which \ @@ -254,7 +256,7 @@ pub fn uu_app() -> Command { .action(ArgAction::SetTrue), ) .arg( - Arg::new(OPT_TAGGED_PARAGRAPH) + Arg::new(options::TAGGED_PARAGRAPH) .short('t') .long("tagged-paragraph") .help( @@ -264,7 +266,7 @@ pub fn uu_app() -> Command { .action(ArgAction::SetTrue), ) .arg( - Arg::new(OPT_PRESERVE_HEADERS) + Arg::new(options::PRESERVE_HEADERS) .short('m') .long("preserve-headers") .help( @@ -274,14 +276,14 @@ pub fn uu_app() -> Command { .action(ArgAction::SetTrue), ) .arg( - Arg::new(OPT_SPLIT_ONLY) + Arg::new(options::SPLIT_ONLY) .short('s') .long("split-only") .help("Split lines only, do not reflow.") .action(ArgAction::SetTrue), ) .arg( - Arg::new(OPT_UNIFORM_SPACING) + Arg::new(options::UNIFORM_SPACING) .short('u') .long("uniform-spacing") .help( @@ -294,7 +296,7 @@ pub fn uu_app() -> Command { .action(ArgAction::SetTrue), ) .arg( - Arg::new(OPT_PREFIX) + Arg::new(options::PREFIX) .short('p') .long("prefix") .help( @@ -306,7 +308,7 @@ pub fn uu_app() -> Command { .value_name("PREFIX"), ) .arg( - Arg::new(OPT_SKIP_PREFIX) + Arg::new(options::SKIP_PREFIX) .short('P') .long("skip-prefix") .help( @@ -317,7 +319,7 @@ pub fn uu_app() -> Command { .value_name("PSKIP"), ) .arg( - Arg::new(OPT_EXACT_PREFIX) + Arg::new(options::EXACT_PREFIX) .short('x') .long("exact-prefix") .help( @@ -327,7 +329,7 @@ pub fn uu_app() -> Command { .action(ArgAction::SetTrue), ) .arg( - Arg::new(OPT_EXACT_SKIP_PREFIX) + Arg::new(options::EXACT_SKIP_PREFIX) .short('X') .long("exact-skip-prefix") .help( @@ -337,7 +339,7 @@ pub fn uu_app() -> Command { .action(ArgAction::SetTrue), ) .arg( - Arg::new(OPT_WIDTH) + Arg::new(options::WIDTH) .short('w') .long("width") .help("Fill output lines up to a maximum of WIDTH columns, default 75.") @@ -345,7 +347,7 @@ pub fn uu_app() -> Command { .value_parser(clap::value_parser!(usize)), ) .arg( - Arg::new(OPT_GOAL) + Arg::new(options::GOAL) .short('g') .long("goal") .help("Goal width, default of 93% of WIDTH. Must be less than WIDTH.") @@ -353,7 +355,7 @@ pub fn uu_app() -> Command { .value_parser(clap::value_parser!(usize)), ) .arg( - Arg::new(OPT_QUICK) + Arg::new(options::QUICK) .short('q') .long("quick") .help( @@ -363,7 +365,7 @@ pub fn uu_app() -> Command { .action(ArgAction::SetTrue), ) .arg( - Arg::new(OPT_TAB_WIDTH) + Arg::new(options::TAB_WIDTH) .short('T') .long("tab-width") .help( @@ -374,7 +376,7 @@ pub fn uu_app() -> Command { .value_name("TABWIDTH"), ) .arg( - Arg::new(ARG_FILES) + Arg::new(options::FILES) .action(ArgAction::Append) .value_hint(clap::ValueHint::FilePath), ) From 2a8f4ec294369c228bc8063676879a9ac9e436e5 Mon Sep 17 00:00:00 2001 From: Terts Diepraam Date: Tue, 28 Nov 2023 12:34:04 +0100 Subject: [PATCH 7/7] fmt: inline parse_arguments function --- src/uu/fmt/src/fmt.rs | 35 ++++++++--------------------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/src/uu/fmt/src/fmt.rs b/src/uu/fmt/src/fmt.rs index e44b7e0e5be..4380487814b 100644 --- a/src/uu/fmt/src/fmt.rs +++ b/src/uu/fmt/src/fmt.rs @@ -147,32 +147,6 @@ impl FmtOptions { } } -/// Parse the command line arguments and return the list of files and formatting options. -/// -/// # Arguments -/// -/// * `args` - Command line arguments. -/// -/// # Returns -/// -/// A tuple containing a vector of file names and a `FmtOptions` struct. -fn parse_arguments(args: impl uucore::Args) -> UResult<(Vec, FmtOptions)> { - let matches = uu_app().try_get_matches_from(args)?; - - let mut files: Vec = matches - .get_many::(options::FILES) - .map(|v| v.map(ToString::to_string).collect()) - .unwrap_or_default(); - - let fmt_opts = FmtOptions::from_matches(&matches)?; - - if files.is_empty() { - files.push("-".to_owned()); - } - - Ok((files, fmt_opts)) -} - /// Process the content of a file and format it according to the provided options. /// /// # Arguments @@ -226,7 +200,14 @@ fn process_file( #[uucore::main] pub fn uumain(args: impl uucore::Args) -> UResult<()> { - let (files, fmt_opts) = parse_arguments(args)?; + let matches = uu_app().try_get_matches_from(args)?; + + let files: Vec = matches + .get_many::(options::FILES) + .map(|v| v.map(ToString::to_string).collect()) + .unwrap_or(vec!["-".into()]); + + let fmt_opts = FmtOptions::from_matches(&matches)?; let mut ostream = BufWriter::new(stdout());