Skip to content

Commit

Permalink
move string expander into own file
Browse files Browse the repository at this point in the history
  • Loading branch information
cre4ture committed Feb 18, 2024
1 parent f40788b commit e348613
Show file tree
Hide file tree
Showing 7 changed files with 105 additions and 30 deletions.
3 changes: 2 additions & 1 deletion src/uu/env/src/env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
// spell-checker:ignore (ToDO) chdir execvp progname subcommand subcommands unsets setenv putenv spawnp SIGSEGV SIGBUS sigaction

pub mod parse_error;
pub mod raw_string_parser;
pub mod string_parser;
pub mod string_expander;
pub mod split_iterator;
pub mod variable_parser;

Expand Down
8 changes: 4 additions & 4 deletions src/uu/env/src/parse_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

use std::fmt;

use crate::raw_string_parser;
use crate::string_parser;

/// An error returned when string arg splitting fails.
#[derive(Clone, Debug, Eq, PartialEq)]
Expand All @@ -31,7 +31,7 @@ pub enum ParseError {
},
InternalError {
pos: usize,
sub_err: raw_string_parser::Error,
sub_err: string_parser::Error,
},
ReachedEnd,
ContinueWithDelimiter,
Expand All @@ -45,8 +45,8 @@ impl fmt::Display for ParseError {

impl std::error::Error for ParseError {}

impl From<raw_string_parser::Error> for ParseError {
fn from(value: raw_string_parser::Error) -> Self {
impl From<string_parser::Error> for ParseError {
fn from(value: string_parser::Error) -> Self {
Self::InternalError {
pos: value.look_at_pos,
sub_err: value,
Expand Down
7 changes: 2 additions & 5 deletions src/uu/env/src/split_iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,12 @@

#![forbid(unsafe_code)]

use std::env::vars_os;
use std::ffi::OsStr;
use std::ffi::OsString;
use std::mem;
use std::ops::Range;

use crate::parse_error::ParseError;
use crate::raw_string_parser::RawStringExpander;
use crate::raw_string_parser::RawStringParser;
use crate::string_parser::RawStringExpander;
use crate::string_parser::RawStringParser;
use crate::variable_parser::VariableParser;

#[derive(Clone, Copy)]
Expand Down
81 changes: 81 additions & 0 deletions src/uu/env/src/string_expander.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// This file is part of the uutils coreutils package.
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.

use std::{ffi::{OsStr, OsString}, mem};

use crate::string_parser::{Chunk, Error, RawStringParser};




/// This class makes parsing and word collection more convenient.
///
/// It manages an "output" buffer that is automatically filled.
/// It provides "skip_one" and "take_one" that focus on
/// working with ASCII separators. Thus they will skip or take
/// all consecutive non-ascii char sequences at once.
pub struct RawStringExpander<'a> {
parser: RawStringParser<'a>,
output: OsString,
}



impl<'a> RawStringExpander<'a> {
pub fn new<S: AsRef<OsStr> + ?Sized>(input: &'a S) -> Self {
Self {
parser: RawStringParser::new(input),
output: OsString::default(),
}
}

pub fn new_at(input: &'a OsStr, pos: usize) -> Result<Self, Error> {
Ok(Self {
parser: RawStringParser::new_at(input, pos)?,
output: OsString::default(),
})
}

pub fn get_parser(&self) -> &RawStringParser<'a> {
&self.parser
}

pub fn get_parser_mut(&mut self) -> &mut RawStringParser<'a> {
&mut self.parser
}

pub fn skip_one(&mut self) -> Result<(), Error> {
self.get_parser_mut()
.consumer_one_ascii_or_all_non_ascii()?;
Ok(())
}

pub fn get_look_at_pos(&self) -> usize {
self.get_parser().get_look_at_pos()
}

pub fn take_one(&mut self) -> Result<(), Error> {
let chunks = self.parser.consumer_one_ascii_or_all_non_ascii()?;
for chunk in chunks {
match chunk {
Chunk::InvalidEncoding(invalid) => self.output.push(invalid),
Chunk::ValidChar(char) => self.output.push(char.to_string()),
}
}
Ok(())
}

pub fn put_one_char(&mut self, c: char) {
self.output.push(c.to_string());
}

pub fn put_string<S: AsRef<OsStr>>(&mut self, str: S) {
self.output.push(str);
}

pub fn take_collected_output(&mut self) -> OsString {
mem::take(&mut self.output)
}
}
File renamed without changes.
6 changes: 1 addition & 5 deletions src/uu/env/src/variable_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,10 @@
//
// For the full copyright and license information, please view the LICENSE
// file that was distributed with this source code.
//
// This file is based on work from Tomasz MiΔ…sko who published it as "shell_words" crate,
// licensed under the Apache License, Version 2.0 <LICENSE-APACHE>
// or the MIT license <LICENSE-MIT>, at your option.

use std::{ffi::OsStr, ops::Range};

use crate::{parse_error::ParseError, raw_string_parser::RawStringParser};
use crate::{parse_error::ParseError, string_parser::RawStringParser};

pub struct VariableParser<'a, 'b>
where 'a : 'b
Expand Down
30 changes: 15 additions & 15 deletions tests/by-util/test_env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -912,22 +912,22 @@ mod tests_split_iterator {
mod test_raw_string_parser {
use std::{ffi::{OsStr, OsString}, os::unix::ffi::OsStringExt};

use env::raw_string_parser;
use env::string_parser;
use os_str_bytes::OsStrBytesExt;

#[test]
fn test_ascii_only_take_one_look_at_correct_data_and_end_behavior() {
let input = "hello";
let mut uut = env::raw_string_parser::RawStringExpander::new(&input);
let mut uut = env::string_parser::RawStringExpander::new(&input);
for c in input.chars() {
assert_eq!(c, uut.get_parser().look_at().unwrap());
uut.take_one().unwrap();
}
assert_eq!(
uut.get_parser().look_at(),
Err(raw_string_parser::Error {
Err(string_parser::Error {
look_at_pos: 5,
err_type: raw_string_parser::ErrorType::EndOfInput
err_type: string_parser::ErrorType::EndOfInput
})
);
uut.take_one().unwrap_err();
Expand All @@ -941,7 +941,7 @@ mod test_raw_string_parser {
fn test_multi_byte_codes_take_one_look_at_correct_data_and_end_behavior() {
let input = OsString::from("πŸ¦‰πŸ¦‰πŸ¦‰xπŸ¦‰πŸ¦‰xπŸ¦‰xπŸ¦‰πŸ¦‰πŸ¦‰πŸ¦‰");
let owl: char = 'πŸ¦‰';
let mut uut = env::raw_string_parser::RawStringExpander::new(&input);
let mut uut = env::string_parser::RawStringExpander::new(&input);
for _i in 0..3 {
assert_eq!(uut.get_parser().look_at().unwrap(), owl);
uut.take_one().unwrap();
Expand All @@ -952,9 +952,9 @@ mod test_raw_string_parser {
uut.take_one().unwrap();
assert_eq!(
uut.get_parser().look_at(),
Err(raw_string_parser::Error {
Err(string_parser::Error {
look_at_pos: 43,
err_type: raw_string_parser::ErrorType::EndOfInput
err_type: string_parser::ErrorType::EndOfInput
})
);
uut.take_one().unwrap_err();
Expand All @@ -968,7 +968,7 @@ mod test_raw_string_parser {
fn test_multi_byte_codes_put_one_ascii_start_middle_end_try_invalid_ascii() {
let input = OsString::from("πŸ¦‰πŸ¦‰πŸ¦‰xπŸ¦‰πŸ¦‰xπŸ¦‰xπŸ¦‰πŸ¦‰πŸ¦‰πŸ¦‰");
let owl: char = 'πŸ¦‰';
let mut uut = env::raw_string_parser::RawStringExpander::new(&input);
let mut uut = env::string_parser::RawStringExpander::new(&input);
uut.put_one_char('a');
for _i in 0..3 {
assert_eq!(uut.get_parser().look_at().unwrap(), owl);
Expand All @@ -984,9 +984,9 @@ mod test_raw_string_parser {
uut.put_one_char('a');
assert_eq!(
uut.get_parser().look_at(),
Err(raw_string_parser::Error {
Err(string_parser::Error {
look_at_pos: 43,
err_type: raw_string_parser::ErrorType::EndOfInput
err_type: string_parser::ErrorType::EndOfInput
})
);
uut.take_one().unwrap_err();
Expand All @@ -1003,7 +1003,7 @@ mod test_raw_string_parser {
#[test]
fn test_multi_byte_codes_skip_one_take_one_skip_until_ascii_char_or_end() {
let input = OsString::from("πŸ¦‰πŸ¦‰πŸ¦‰xπŸ¦‰πŸ¦‰xπŸ¦‰xπŸ¦‰πŸ¦‰πŸ¦‰πŸ¦‰");
let mut uut = env::raw_string_parser::RawStringExpander::new(&input);
let mut uut = env::string_parser::RawStringExpander::new(&input);

uut.skip_one().unwrap(); // skip πŸ¦‰πŸ¦‰πŸ¦‰
assert_eq!(uut.get_look_at_pos(), 12);
Expand All @@ -1028,7 +1028,7 @@ mod test_raw_string_parser {
#[test]
fn test_multi_byte_codes_skip_multiple_ascii_bounded_good_and_bad() {
let input = OsString::from("πŸ¦‰πŸ¦‰πŸ¦‰xπŸ¦‰πŸ¦‰xπŸ¦‰xπŸ¦‰πŸ¦‰πŸ¦‰πŸ¦‰");
let mut uut = env::raw_string_parser::RawStringExpander::new(&input);
let mut uut = env::string_parser::RawStringExpander::new(&input);

uut.get_parser_mut().skip_multiple_ascii_bounded(0);
assert_eq!(uut.get_look_at_pos(), 0);
Expand All @@ -1052,7 +1052,7 @@ mod test_raw_string_parser {
#[test]
fn test_multi_byte_codes_put_string_utf8_start_middle_end() {
let input = OsString::from("πŸ¦‰πŸ¦‰πŸ¦‰xπŸ¦‰πŸ¦‰xπŸ¦‰xπŸ¦‰πŸ¦‰πŸ¦‰πŸ¦‰");
let mut uut = env::raw_string_parser::RawStringExpander::new(&input);
let mut uut = env::string_parser::RawStringExpander::new(&input);

uut.put_string("πŸ¦”oo");
uut.take_one().unwrap(); // takes πŸ¦‰πŸ¦‰πŸ¦‰
Expand All @@ -1068,7 +1068,7 @@ mod test_raw_string_parser {
#[test]
fn test_multi_byte_codes_look_at_remaining_start_middle_end() {
let input = "πŸ¦‰πŸ¦‰πŸ¦‰xπŸ¦‰πŸ¦‰xπŸ¦‰xπŸ¦‰πŸ¦‰πŸ¦‰πŸ¦‰";
let mut uut = env::raw_string_parser::RawStringExpander::new(&input);
let mut uut = env::string_parser::RawStringExpander::new(&input);

assert_eq!(uut.get_parser().look_at_remaining(), input);
uut.take_one().unwrap(); // takes πŸ¦‰πŸ¦‰πŸ¦‰
Expand All @@ -1088,7 +1088,7 @@ mod test_raw_string_parser {
let owl_b = "πŸ¦‰".bytes().next().unwrap();
let input_u8 = [b'<', owl_b, b'>'];
let input_str = OsString::from_vec(input_u8.to_vec());
let mut uut = env::raw_string_parser::RawStringExpander::new(&input_str);
let mut uut = env::string_parser::RawStringExpander::new(&input_str);

assert_eq!(uut.get_parser().look_at_remaining(), input_str);
assert_eq!(uut.get_parser().look_at().unwrap(), '<');
Expand Down

0 comments on commit e348613

Please sign in to comment.