From f3ad7dac2e2003f1c949a0229f724b67ab351b0d Mon Sep 17 00:00:00 2001 From: Tang-Tang Zhou Date: Sat, 18 May 2024 23:19:20 +0200 Subject: [PATCH] feat: implement unpack iro subcommand (#1) --- Cargo.lock | 17 +++++++ Cargo.toml | 1 + src/error.rs | 37 ++++++++++++++ src/iro_entry.rs | 23 +++++++-- src/iro_header.rs | 50 +++++++++++++++---- src/iro_parser.rs | 42 ++++++++++++++++ src/main.rs | 119 +++++++++++++++++++++++++++++++++++++++------- tests/test.rs | 113 ++++++++++++++++++++++++++++++++++++++++--- 8 files changed, 367 insertions(+), 35 deletions(-) create mode 100644 src/error.rs create mode 100644 src/iro_parser.rs diff --git a/Cargo.lock b/Cargo.lock index 8c45940..771737d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -287,6 +287,7 @@ dependencies = [ "assert_fs", "clap", "hex-literal", + "nom", "predicates", "thiserror", "walkdir", @@ -322,6 +323,22 @@ version = "2.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "normalize-line-endings" version = "0.3.0" diff --git a/Cargo.toml b/Cargo.toml index 23e22ca..b9682a6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ edition = "2021" clap = { version = "4.5.4", features = ["derive"] } thiserror = "1.0.59" walkdir = "2.5.0" +nom = "7.1.3" [dev-dependencies] assert_cmd = "2.0.12" diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..61c61a5 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,37 @@ +use std::path::PathBuf; + +use thiserror::Error; + + +#[derive(Error, Debug)] +pub enum Error { + #[error(transparent)] + Io(#[from] ::std::io::Error), + #[error(transparent)] + StripPrefix(#[from] ::std::path::StripPrefixError), + #[error("{0} is not a directory")] + NotDir(PathBuf), + #[error("output path already exists: {0}")] + OutputPathExists(PathBuf), + #[error("{0} has invalid unicode")] + InvalidUnicode(PathBuf), + #[error("could not find default name from {0}")] + CannotDetectDefaultName(PathBuf), + #[error("parsing error due to invalid iro flags {0}")] + InvalidIroFlags(i32), + // #[error("failed to parse binary data")] + #[error(transparent)] + CannotParseBinary(nom::Err<::nom::error::Error>>), + #[error("parsing error due to invalid file flags {0}")] + InvalidFileFlags(i32), + #[error("invalid utf16 {0}")] + InvalidUtf16(String), + #[error("parten file path does not exists: {0}")] + ParentPathDoesNotExist(PathBuf), +} + +impl From>> for Error { + fn from(err: nom::Err>) -> Self { + Self::CannotParseBinary(err.map_input(|input| input.into())) + } +} diff --git a/src/iro_entry.rs b/src/iro_entry.rs index 7824783..47d7f04 100644 --- a/src/iro_entry.rs +++ b/src/iro_entry.rs @@ -1,12 +1,16 @@ +use crate::Error; + pub const INDEX_FIXED_BYTE_SIZE: usize = 20; +#[derive(Debug)] pub struct IroEntry { - path: Vec, - flags: FileFlags, - offset: u64, - data_len: u32, + pub path: Vec, + pub flags: FileFlags, + pub offset: u64, + pub data_len: u32, } +#[derive(Debug)] pub enum FileFlags { Uncompressed = 0, } @@ -34,3 +38,14 @@ impl From for Vec { bytes } } + +impl TryFrom for FileFlags { + type Error = Error; + + fn try_from(value: i32) -> Result { + match value { + 0 => Ok(FileFlags::Uncompressed), + _ => Err(Error::InvalidFileFlags(value)) + } + } +} diff --git a/src/iro_header.rs b/src/iro_header.rs index d79b70e..42345e5 100644 --- a/src/iro_header.rs +++ b/src/iro_header.rs @@ -1,21 +1,25 @@ -const IRO_SIG: i32 = 0x534f5249; // represents IROS text +use std::fmt::Display; -#[derive(Clone)] +use crate::Error; + +pub const IRO_SIG: i32 = 0x534f5249; // represents IROS text + +#[derive(Clone, Debug)] pub struct IroHeader { - version: IroVersion, - flags: IroFlags, - size: i32, - num_files: u32, + pub version: IroVersion, + pub flags: IroFlags, + pub size: i32, + pub num_files: u32, } -#[derive(Clone)] +#[derive(Clone, Debug)] #[allow(dead_code)] pub enum IroFlags { None = 0, Patch = 1, } -#[derive(Clone)] +#[derive(Clone, Debug)] #[allow(dead_code)] pub enum IroVersion { Zero = 0x10000, @@ -45,3 +49,33 @@ impl From for Vec { .concat() } } + +impl TryFrom for IroFlags { + type Error = Error; + + fn try_from(value: i32) -> Result { + match value { + 0 => Ok(IroFlags::None), + 1 => Ok(IroFlags::Patch), + _ => Err(Error::InvalidIroFlags(value)), + } + } +} + +impl Display for IroFlags { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + IroFlags::None => f.write_str("Full IRO"), + IroFlags::Patch => f.write_str("Patch IRO"), + } + } +} + +impl Display for IroVersion { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + IroVersion::Zero => f.write_str("0x10000"), + IroVersion::Two => f.write_str("0x10002"), + } + } +} diff --git a/src/iro_parser.rs b/src/iro_parser.rs new file mode 100644 index 0000000..45fd8a5 --- /dev/null +++ b/src/iro_parser.rs @@ -0,0 +1,42 @@ +use nom::{ + bytes::complete::{tag, take}, + number::complete::{le_i32, le_u16, le_u32, le_u64}, +}; + +use crate::{ + iro_entry::{FileFlags, IroEntry}, + iro_header::{IroFlags, IroHeader, IroVersion, IRO_SIG}, + Error, +}; + +pub fn parse_iro_header_v2(bytes: &[u8]) -> Result<(&[u8], IroHeader), Error> { + let (bytes, _) = tag(&IRO_SIG.to_le_bytes())(bytes)?; + let (bytes, _) = tag((IroVersion::Two as i32).to_le_bytes())(bytes)?; + let (bytes, flags) = le_i32(bytes)?; + let (bytes, _) = tag(16i32.to_le_bytes())(bytes)?; + let (bytes, num_files) = le_u32(bytes)?; + + Ok(( + bytes, + IroHeader::new(IroVersion::Two, IroFlags::try_from(flags)?, 16, num_files), + )) +} + +/// Parse IroEntry without considering length of entire block +pub fn parse_iro_entry_v2(bytes: &[u8]) -> Result<(&[u8], IroEntry), Error> { + let (bytes, filepath_len) = le_u16(bytes)?; + let (bytes, filepath) = take(filepath_len)(bytes)?; + let (bytes, file_flags) = le_i32(bytes)?; + let (bytes, offset) = le_u64(bytes)?; + let (bytes, data_len) = le_u32(bytes)?; + + Ok(( + bytes, + IroEntry::new( + filepath.to_vec(), + FileFlags::try_from(file_flags)?, + offset, + data_len, + ), + )) +} diff --git a/src/main.rs b/src/main.rs index ab7691a..6b5fab6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,17 +1,20 @@ mod iro_entry; mod iro_header; +mod iro_parser; +mod error; use std::{ - io::{BufRead, BufReader, Seek, Write}, + io::{BufRead, BufReader, Read, Seek, Write}, path::{Path, PathBuf}, process, result::Result, }; use clap::{Args, Parser, Subcommand}; +use error::Error; use iro_entry::{FileFlags, IroEntry, INDEX_FIXED_BYTE_SIZE}; use iro_header::{IroFlags, IroHeader, IroVersion}; -use thiserror::Error; +use iro_parser::{parse_iro_entry_v2, parse_iro_header_v2}; use walkdir::{DirEntry, WalkDir}; /// Command line tool to pack a single directory into a single archive in IRO format @@ -26,6 +29,7 @@ struct Cli { enum Commands { /// Pack a single directory into an IRO archive Pack(PackArgs), + Unpack(UnpackArgs), } #[derive(Args)] @@ -39,20 +43,15 @@ struct PackArgs { output: Option, } -#[derive(Error, Debug)] -pub enum Error { - #[error(transparent)] - Io(#[from] ::std::io::Error), - #[error(transparent)] - StripPrefix(#[from] ::std::path::StripPrefixError), - #[error("{0} is not a directory")] - NotDir(PathBuf), - #[error("output file path already exists: {0}")] - OutputPathExists(PathBuf), - #[error("{0} has invalid unicode")] - InvalidUnicode(PathBuf), - #[error("could not find default name from {0}")] - CannotDetectDefaultName(PathBuf), +#[derive(Args)] +struct UnpackArgs { + /// IRO file to unpack + #[arg()] + iro_path: PathBuf, + + /// Output directory path (default is the name of the IRO to unpack) + #[arg(short, long)] + output: Option, } fn main() { @@ -72,6 +71,17 @@ fn main() { process::exit(1); } }, + Commands::Unpack(args) => match unpack_archive(args.iro_path, args.output) { + Ok(output_dir) => { + println!("IRO unpacked into \"{}\" directory", output_dir.display()); + process::exit(0); + } + Err(err) => { + let stderr = std::io::stderr(); + writeln!(stderr.lock(), "[iroga error]: {}", err).ok(); + process::exit(1); + } + }, } } @@ -154,6 +164,83 @@ fn pack_archive(dir_to_pack: PathBuf, output_path: Option) -> Result) -> Result { + // compute output filepath: either default generated name or given output_path + let output_path = match output_path { + Some(path) => path, + None => { + let filename = iro_path + .file_name() + .ok_or(Error::CannotDetectDefaultName(iro_path.clone()))? + .to_str() + .ok_or(Error::CannotDetectDefaultName(iro_path.clone()))? + .trim_end_matches(".iro"); + Path::new(filename).to_owned() + } + }; + if std::fs::read_dir(&output_path).is_ok() { + return Err(Error::OutputPathExists(output_path)); + } + + let mut iro_file = std::fs::File::open(&iro_path)?; + let mut iro_header_bytes = [0u8; 20]; + iro_file.read_exact(&mut iro_header_bytes)?; + let (_, iro_header) = parse_iro_header_v2(&iro_header_bytes)?; + + println!("IRO metadata"); + println!("- version: {}", iro_header.version); + println!("- type: {}", iro_header.flags); + println!("- number of files: {}", iro_header.num_files); + println!(); + + let mut iro_entries: Vec = Vec::new(); + for _ in 0..iro_header.num_files { + let mut entry_len_bytes = [0u8; 2]; + iro_file.read_exact(&mut entry_len_bytes)?; + let entry_len = u16::from_le_bytes(entry_len_bytes); + println!("{}", entry_len); + + let mut entry_bytes = vec![0u8; entry_len as usize - 2]; + iro_file.read_exact(entry_bytes.as_mut())?; + println!("{:?}", entry_bytes); + + let (_, iro_entry) = parse_iro_entry_v2(&entry_bytes)?; + + iro_entries.push(iro_entry); + } + + for iro_entry in iro_entries { + let iro_entry_path = parse_utf16(&iro_entry.path)?.replace('\\', "/"); + let entry_path = output_path.join(&iro_entry_path); + std::fs::create_dir_all( + entry_path + .parent() + .ok_or(Error::ParentPathDoesNotExist(entry_path.clone()))?, + )?; + let mut entry_file = std::fs::File::create(&entry_path).unwrap(); + + let mut buf_reader = BufReader::new(&iro_file); + buf_reader.seek(std::io::SeekFrom::Start(iro_entry.offset))?; + let mut entry_buffer = buf_reader.take(iro_entry.data_len as u64); + std::io::copy(&mut entry_buffer, &mut entry_file)?; + + println!("\"{}\" file written!", iro_entry_path); + } + + Ok(output_path) +} + +fn parse_utf16(bytes: &[u8]) -> Result { + let bytes_u16 = bytes + .chunks(2) + .map(|e| e.try_into().map(u16::from_le_bytes)) + .collect::, _>>() + .map_err(|_| Error::InvalidUtf16("uneven bytes".to_owned()))?; + + String::from_utf16(&bytes_u16) + .map_err(|_| Error::InvalidUtf16("bytes in u16 cannot be converted to string".to_owned())) +} + fn unicode_filepath_bytes(path: &Path, strip_prefix_str: &Path) -> Result, Error> { Ok(path .strip_prefix(strip_prefix_str)? diff --git a/tests/test.rs b/tests/test.rs index ddfb9ba..a080f67 100644 --- a/tests/test.rs +++ b/tests/test.rs @@ -1,12 +1,12 @@ use assert_cmd::Command; use assert_fs::{ assert::PathAssert, - fixture::{FileTouch, FileWriteStr, PathChild}, + fixture::{FileTouch, FileWriteBin, FileWriteStr, PathChild}, }; use hex_literal::hex; #[test] -pub fn not_exists_file() { +pub fn pack_not_exists_file() { let dir = assert_fs::TempDir::new().unwrap(); iroga_cmd() .current_dir(dir.path()) @@ -19,7 +19,7 @@ pub fn not_exists_file() { } #[test] -pub fn not_dir() { +pub fn pack_not_dir() { let dir = assert_fs::TempDir::new().unwrap(); dir.child("not_dir").touch().unwrap(); iroga_cmd() @@ -32,7 +32,7 @@ pub fn not_dir() { } #[test] -pub fn output_file_already_exists() { +pub fn pack_output_file_already_exists() { let dir = assert_fs::TempDir::new().unwrap(); dir.child("dir/file.txt").touch().unwrap(); dir.child("dir.iro").touch().unwrap(); @@ -43,11 +43,11 @@ pub fn output_file_already_exists() { .assert() .failure() .code(1) - .stderr(predicates::str::contains("output file path already exists")); + .stderr(predicates::str::contains("output path already exists")); } #[test] -pub fn single_file() { +pub fn pack_single_file() { const EXPECTED_BYTES: &[u8] = &hex!( "49 52 4f 53 02 00 01 00 00 00 00 00 10 00 00 00" "01 00 00 00 24 00 10 00 66 00 69 00 6c 00 65 00" @@ -74,7 +74,7 @@ pub fn single_file() { } #[test] -pub fn multiple_files() { +pub fn pack_multiple_files() { const EXPECTED_BYTES: &[u8] = &hex!( "49 52 4f 53 02 00 01 00 00 00 00 00 10 00 00 00" "03 00 00 00 1e 00 0a 00 61 00 2e 00 74 00 78 00" @@ -103,6 +103,105 @@ pub fn multiple_files() { dir.close().unwrap(); } +#[test] +pub fn unpack_not_exists_file() { + let dir = assert_fs::TempDir::new().unwrap(); + iroga_cmd() + .current_dir(dir.path()) + .arg("unpack") + .arg(dir.path().join("not_exists_file.iro")) + .assert() + .failure() + .code(1); + assert!(!dir.child("not_exists_file.iro").exists()); +} + +#[test] +pub fn unpack_not_file() { + let dir = assert_fs::TempDir::new().unwrap(); + dir.child("not_file/is_file").touch().unwrap(); + iroga_cmd() + .arg("unpack") + .arg(dir.path().join("not_file")) + .assert() + .failure() + .code(1); + assert!(dir.child("not_file").is_dir()); +} + +#[test] +pub fn unpack_output_path_already_exists() { + let dir = assert_fs::TempDir::new().unwrap(); + dir.child("dir/file.txt").touch().unwrap(); + dir.child("dir.iro").touch().unwrap(); + iroga_cmd() + .current_dir(dir.path()) + .arg("unpack") + .arg("dir.iro") + .assert() + .failure() + .code(1) + .stderr(predicates::str::contains("output path already exists")); +} + +#[test] +pub fn unpack_single_file() { + let iro_bytes: &[u8] = &hex!( + "49 52 4f 53 02 00 01 00 00 00 00 00 10 00 00 00" + "01 00 00 00 24 00 10 00 66 00 69 00 6c 00 65 00" + "2e 00 74 00 78 00 74 00 00 00 00 00 38 00 00 00" + "00 00 00 00 17 00 00 00 48 65 6c 6c 6f 20 57 6f" + "72 6c 64 21 0d 0a 0d 0a 48 69 21 0d 0a 0d 0a " + ); + let dir = assert_fs::TempDir::new().unwrap(); + dir.child("single.iro") + .write_binary(iro_bytes) + .unwrap(); + + iroga_cmd() + .current_dir(dir.path()) + .arg("unpack") + .arg(dir.path().join("single.iro")) + .assert() + .success() + .code(0); + + assert!(dir.child("single/file.txt").exists()); + dir.child("single/file.txt").assert("Hello World!\r\n\r\nHi!\r\n\r\n"); + dir.close().unwrap(); +} + +#[test] +pub fn unpack_multiple_files() { + let iro_bytes: &[u8] = &hex!( + "49 52 4f 53 02 00 01 00 00 00 00 00 10 00 00 00" + "03 00 00 00 1e 00 0a 00 61 00 2e 00 74 00 78 00" + "74 00 00 00 00 00 76 00 00 00 00 00 00 00 01 00" + "00 00 1e 00 0a 00 62 00 2e 00 74 00 78 00 74 00" + "00 00 00 00 77 00 00 00 00 00 00 00 01 00 00 00" + "26 00 12 00 64 00 69 00 72 00 5c 00 63 00 2e 00" + "74 00 78 00 74 00 00 00 00 00 78 00 00 00 00 00" + "00 00 01 00 00 00 41 42 43 " + ); + let dir = assert_fs::TempDir::new().unwrap(); + dir.child("multiple.iro") + .write_binary(iro_bytes) + .unwrap(); + + iroga_cmd() + .current_dir(dir.path()) + .arg("unpack") + .arg(dir.path().join("multiple.iro")) + .assert() + .success() + .code(0); + + dir.child("multiple/a.txt").assert("A"); + dir.child("multiple/b.txt").assert("B"); + dir.child("multiple/dir/c.txt").assert("C"); + dir.close().unwrap(); +} + fn iroga_cmd() -> Command { Command::cargo_bin("iroga").unwrap() }