diff --git a/src/uu/env/src/env.rs b/src/uu/env/src/env.rs index fb590e06bfd..2626c301ede 100644 --- a/src/uu/env/src/env.rs +++ b/src/uu/env/src/env.rs @@ -6,7 +6,8 @@ // spell-checker:ignore (ToDO) chdir execvp progname subcommand subcommands unsets setenv putenv spawnp SIGSEGV SIGBUS sigaction pub mod parse_error; -pub mod raw_string_parser; +pub mod string_parser; +pub mod string_expander; pub mod split_iterator; pub mod variable_parser; diff --git a/src/uu/env/src/parse_error.rs b/src/uu/env/src/parse_error.rs index be269285543..32f80f4d575 100644 --- a/src/uu/env/src/parse_error.rs +++ b/src/uu/env/src/parse_error.rs @@ -5,7 +5,7 @@ use std::fmt; -use crate::raw_string_parser; +use crate::string_parser; /// An error returned when string arg splitting fails. #[derive(Clone, Debug, Eq, PartialEq)] @@ -31,7 +31,7 @@ pub enum ParseError { }, InternalError { pos: usize, - sub_err: raw_string_parser::Error, + sub_err: string_parser::Error, }, ReachedEnd, ContinueWithDelimiter, @@ -45,8 +45,8 @@ impl fmt::Display for ParseError { impl std::error::Error for ParseError {} -impl From for ParseError { - fn from(value: raw_string_parser::Error) -> Self { +impl From for ParseError { + fn from(value: string_parser::Error) -> Self { Self::InternalError { pos: value.look_at_pos, sub_err: value, diff --git a/src/uu/env/src/split_iterator.rs b/src/uu/env/src/split_iterator.rs index 595dfbe0b5e..9f456c3596b 100644 --- a/src/uu/env/src/split_iterator.rs +++ b/src/uu/env/src/split_iterator.rs @@ -18,15 +18,12 @@ #![forbid(unsafe_code)] -use std::env::vars_os; use std::ffi::OsStr; use std::ffi::OsString; -use std::mem; -use std::ops::Range; use crate::parse_error::ParseError; -use crate::raw_string_parser::RawStringExpander; -use crate::raw_string_parser::RawStringParser; +use crate::string_parser::RawStringExpander; +use crate::string_parser::RawStringParser; use crate::variable_parser::VariableParser; #[derive(Clone, Copy)] diff --git a/src/uu/env/src/string_expander.rs b/src/uu/env/src/string_expander.rs new file mode 100644 index 00000000000..a9c05695519 --- /dev/null +++ b/src/uu/env/src/string_expander.rs @@ -0,0 +1,81 @@ +// This file is part of the uutils coreutils package. +// +// For the full copyright and license information, please view the LICENSE +// file that was distributed with this source code. + +use std::{ffi::{OsStr, OsString}, mem}; + +use crate::string_parser::{Chunk, Error, RawStringParser}; + + + + +/// This class makes parsing and word collection more convenient. +/// +/// It manages an "output" buffer that is automatically filled. +/// It provides "skip_one" and "take_one" that focus on +/// working with ASCII separators. Thus they will skip or take +/// all consecutive non-ascii char sequences at once. +pub struct RawStringExpander<'a> { + parser: RawStringParser<'a>, + output: OsString, +} + + + +impl<'a> RawStringExpander<'a> { + pub fn new + ?Sized>(input: &'a S) -> Self { + Self { + parser: RawStringParser::new(input), + output: OsString::default(), + } + } + + pub fn new_at(input: &'a OsStr, pos: usize) -> Result { + Ok(Self { + parser: RawStringParser::new_at(input, pos)?, + output: OsString::default(), + }) + } + + pub fn get_parser(&self) -> &RawStringParser<'a> { + &self.parser + } + + pub fn get_parser_mut(&mut self) -> &mut RawStringParser<'a> { + &mut self.parser + } + + pub fn skip_one(&mut self) -> Result<(), Error> { + self.get_parser_mut() + .consumer_one_ascii_or_all_non_ascii()?; + Ok(()) + } + + pub fn get_look_at_pos(&self) -> usize { + self.get_parser().get_look_at_pos() + } + + pub fn take_one(&mut self) -> Result<(), Error> { + let chunks = self.parser.consumer_one_ascii_or_all_non_ascii()?; + for chunk in chunks { + match chunk { + Chunk::InvalidEncoding(invalid) => self.output.push(invalid), + Chunk::ValidChar(char) => self.output.push(char.to_string()), + } + } + Ok(()) + } + + pub fn put_one_char(&mut self, c: char) { + self.output.push(c.to_string()); + } + + pub fn put_string>(&mut self, str: S) { + self.output.push(str); + } + + pub fn take_collected_output(&mut self) -> OsString { + mem::take(&mut self.output) + } +} diff --git a/src/uu/env/src/raw_string_parser.rs b/src/uu/env/src/string_parser.rs similarity index 100% rename from src/uu/env/src/raw_string_parser.rs rename to src/uu/env/src/string_parser.rs diff --git a/src/uu/env/src/variable_parser.rs b/src/uu/env/src/variable_parser.rs index 310540e3592..4848d921b74 100644 --- a/src/uu/env/src/variable_parser.rs +++ b/src/uu/env/src/variable_parser.rs @@ -2,14 +2,10 @@ // // For the full copyright and license information, please view the LICENSE // file that was distributed with this source code. -// -// This file is based on work from Tomasz Miąsko who published it as "shell_words" crate, -// licensed under the Apache License, Version 2.0 -// or the MIT license , at your option. use std::{ffi::OsStr, ops::Range}; -use crate::{parse_error::ParseError, raw_string_parser::RawStringParser}; +use crate::{parse_error::ParseError, string_parser::RawStringParser}; pub struct VariableParser<'a, 'b> where 'a : 'b diff --git a/tests/by-util/test_env.rs b/tests/by-util/test_env.rs index 6c5dc6410dc..52b3635cb10 100644 --- a/tests/by-util/test_env.rs +++ b/tests/by-util/test_env.rs @@ -912,22 +912,22 @@ mod tests_split_iterator { mod test_raw_string_parser { use std::{ffi::{OsStr, OsString}, os::unix::ffi::OsStringExt}; - use env::raw_string_parser; + use env::string_parser; use os_str_bytes::OsStrBytesExt; #[test] fn test_ascii_only_take_one_look_at_correct_data_and_end_behavior() { let input = "hello"; - let mut uut = env::raw_string_parser::RawStringExpander::new(&input); + let mut uut = env::string_parser::RawStringExpander::new(&input); for c in input.chars() { assert_eq!(c, uut.get_parser().look_at().unwrap()); uut.take_one().unwrap(); } assert_eq!( uut.get_parser().look_at(), - Err(raw_string_parser::Error { + Err(string_parser::Error { look_at_pos: 5, - err_type: raw_string_parser::ErrorType::EndOfInput + err_type: string_parser::ErrorType::EndOfInput }) ); uut.take_one().unwrap_err(); @@ -941,7 +941,7 @@ mod test_raw_string_parser { fn test_multi_byte_codes_take_one_look_at_correct_data_and_end_behavior() { let input = OsString::from("🦉🦉🦉x🦉🦉x🦉x🦉🦉🦉🦉"); let owl: char = '🦉'; - let mut uut = env::raw_string_parser::RawStringExpander::new(&input); + let mut uut = env::string_parser::RawStringExpander::new(&input); for _i in 0..3 { assert_eq!(uut.get_parser().look_at().unwrap(), owl); uut.take_one().unwrap(); @@ -952,9 +952,9 @@ mod test_raw_string_parser { uut.take_one().unwrap(); assert_eq!( uut.get_parser().look_at(), - Err(raw_string_parser::Error { + Err(string_parser::Error { look_at_pos: 43, - err_type: raw_string_parser::ErrorType::EndOfInput + err_type: string_parser::ErrorType::EndOfInput }) ); uut.take_one().unwrap_err(); @@ -968,7 +968,7 @@ mod test_raw_string_parser { fn test_multi_byte_codes_put_one_ascii_start_middle_end_try_invalid_ascii() { let input = OsString::from("🦉🦉🦉x🦉🦉x🦉x🦉🦉🦉🦉"); let owl: char = '🦉'; - let mut uut = env::raw_string_parser::RawStringExpander::new(&input); + let mut uut = env::string_parser::RawStringExpander::new(&input); uut.put_one_char('a'); for _i in 0..3 { assert_eq!(uut.get_parser().look_at().unwrap(), owl); @@ -984,9 +984,9 @@ mod test_raw_string_parser { uut.put_one_char('a'); assert_eq!( uut.get_parser().look_at(), - Err(raw_string_parser::Error { + Err(string_parser::Error { look_at_pos: 43, - err_type: raw_string_parser::ErrorType::EndOfInput + err_type: string_parser::ErrorType::EndOfInput }) ); uut.take_one().unwrap_err(); @@ -1003,7 +1003,7 @@ mod test_raw_string_parser { #[test] fn test_multi_byte_codes_skip_one_take_one_skip_until_ascii_char_or_end() { let input = OsString::from("🦉🦉🦉x🦉🦉x🦉x🦉🦉🦉🦉"); - let mut uut = env::raw_string_parser::RawStringExpander::new(&input); + let mut uut = env::string_parser::RawStringExpander::new(&input); uut.skip_one().unwrap(); // skip 🦉🦉🦉 assert_eq!(uut.get_look_at_pos(), 12); @@ -1028,7 +1028,7 @@ mod test_raw_string_parser { #[test] fn test_multi_byte_codes_skip_multiple_ascii_bounded_good_and_bad() { let input = OsString::from("🦉🦉🦉x🦉🦉x🦉x🦉🦉🦉🦉"); - let mut uut = env::raw_string_parser::RawStringExpander::new(&input); + let mut uut = env::string_parser::RawStringExpander::new(&input); uut.get_parser_mut().skip_multiple_ascii_bounded(0); assert_eq!(uut.get_look_at_pos(), 0); @@ -1052,7 +1052,7 @@ mod test_raw_string_parser { #[test] fn test_multi_byte_codes_put_string_utf8_start_middle_end() { let input = OsString::from("🦉🦉🦉x🦉🦉x🦉x🦉🦉🦉🦉"); - let mut uut = env::raw_string_parser::RawStringExpander::new(&input); + let mut uut = env::string_parser::RawStringExpander::new(&input); uut.put_string("🦔oo"); uut.take_one().unwrap(); // takes 🦉🦉🦉 @@ -1068,7 +1068,7 @@ mod test_raw_string_parser { #[test] fn test_multi_byte_codes_look_at_remaining_start_middle_end() { let input = "🦉🦉🦉x🦉🦉x🦉x🦉🦉🦉🦉"; - let mut uut = env::raw_string_parser::RawStringExpander::new(&input); + let mut uut = env::string_parser::RawStringExpander::new(&input); assert_eq!(uut.get_parser().look_at_remaining(), input); uut.take_one().unwrap(); // takes 🦉🦉🦉 @@ -1088,7 +1088,7 @@ mod test_raw_string_parser { let owl_b = "🦉".bytes().next().unwrap(); let input_u8 = [b'<', owl_b, b'>']; let input_str = OsString::from_vec(input_u8.to_vec()); - let mut uut = env::raw_string_parser::RawStringExpander::new(&input_str); + let mut uut = env::string_parser::RawStringExpander::new(&input_str); assert_eq!(uut.get_parser().look_at_remaining(), input_str); assert_eq!(uut.get_parser().look_at().unwrap(), '<');