diff --git a/Cargo.lock b/Cargo.lock index b196dcd89..a9a39cbe6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1759,7 +1759,7 @@ dependencies = [ "serde", "serde-wasm-bindgen", "serde_json", - "strum", + "strum 0.25.0", "strum_macros 0.25.3", "wasm-bindgen", ] @@ -3326,6 +3326,12 @@ version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" +[[package]] +name = "strum" +version = "0.26.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" + [[package]] name = "strum_macros" version = "0.25.3" @@ -4499,6 +4505,7 @@ dependencies = [ "sha1", "sha2 0.10.8", "smallvec", + "strum 0.26.3", "strum_macros 0.26.4", "thiserror 2.0.6", "tlsh-fixed", diff --git a/Cargo.toml b/Cargo.toml index 304a30c07..0a09dc5d0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -94,6 +94,7 @@ serde_json = "1.0.133" sha1 = "0.10.6" sha2 = "0.10.8" smallvec = "1.13.2" +strum = "0.26.3" strum_macros = "0.26.4" thiserror = "2.0.3" # Using tlsh-fixed instead of tlsh because tlsh-fixed includes a fix for this diff --git a/cli/src/commands/check.rs b/cli/src/commands/check.rs index 5e4be8df3..14cc7c29a 100644 --- a/cli/src/commands/check.rs +++ b/cli/src/commands/check.rs @@ -100,7 +100,7 @@ pub fn exec_check( if config.rule_name_regexp.is_some() { compiler.rule_name_regexp( config.rule_name_regexp.clone().unwrap().as_str(), - ); + )?; } compiler.colorize_errors(io::stdout().is_tty()); diff --git a/lib/Cargo.toml b/lib/Cargo.toml index 693d5f514..4b93a9a05 100644 --- a/lib/Cargo.toml +++ b/lib/Cargo.toml @@ -250,6 +250,7 @@ rsa = { workspace = true, optional = true } smallvec = { workspace = true, features = ["serde"] } serde = { workspace = true, features = ["rc"] } serde_json = { workspace = true, features = ["preserve_order"] } +strum = { workspace = true } strum_macros = { workspace = true } thiserror = { workspace = true } tlsh-fixed = { workspace = true, optional = true } diff --git a/lib/src/compiler/mod.rs b/lib/src/compiler/mod.rs index 2aa9887e2..bd7d268fa 100644 --- a/lib/src/compiler/mod.rs +++ b/lib/src/compiler/mod.rs @@ -23,6 +23,7 @@ use itertools::{izip, Itertools, MinMaxResult}; use log::*; use nom::AsChar; use regex::Regex; +use regex::Error as RegexError; use regex_syntax::hir; use rustc_hash::{FxHashMap, FxHashSet}; use serde::{Deserialize, Serialize}; @@ -503,18 +504,20 @@ impl<'a> Compiler<'a> { } /// Regexp used to validate the rule name. - pub fn rule_name_regexp(&mut self, regexp_string: &str) -> &mut Self { + pub fn rule_name_regexp( + &mut self, + regexp_string: &str, + ) -> Result<&mut Self, RegexError> { // The documentation says the default is an empty string, which is not // true (but it is easier for users to understand). The actual default // is None. Because users might stick an empty string in there if they // don't want to use this feature (instead of just removing it from the // config file), check for the empty string here. if !regexp_string.is_empty() { - let re = Regex::new(regexp_string) - .expect("Unable to parse rule name regular expression"); + let re = Regex::new(regexp_string)?; self.rule_name_regexp = Some(re); } - self + Ok(self) } /// Adds some YARA source code to be compiled. diff --git a/lib/src/compiler/tests/mod.rs b/lib/src/compiler/tests/mod.rs index 7368d24f2..a576e3139 100644 --- a/lib/src/compiler/tests/mod.rs +++ b/lib/src/compiler/tests/mod.rs @@ -922,6 +922,12 @@ fn test_errors() { } } +#[test] +fn test_invalid_rule_name_regex() { + // Make sure invalid regex result in Err. + assert!(Compiler::new().rule_name_regexp("(AXS|ERS").is_err()); +} + #[test] fn test_warnings() { let mut mint = goldenfile::Mint::new("."); @@ -972,7 +978,7 @@ fn test_warnings() { } if rules.starts_with("// rule name regex") { - compiler.rule_name_regexp("^AXSERS$"); + compiler.rule_name_regexp("^AXSERS$").unwrap(); } src.push_str(rules.as_str()); diff --git a/lib/src/lib.rs b/lib/src/lib.rs index 815797303..ea3cd1857 100644 --- a/lib/src/lib.rs +++ b/lib/src/lib.rs @@ -103,43 +103,43 @@ pub mod warnings { pub mod config { use serde::Deserialize; use serde::Serialize; - use strum_macros::Display; + use strum_macros::{Display, EnumString}; /// Types allowed in the check.metadata table of the config file. Used to /// require specific metadata identifiers have specific types by "yr check". - #[derive(Display, Deserialize, Serialize, Debug, Clone)] + #[derive(Display, Deserialize, Serialize, Debug, Clone, EnumString)] pub enum MetaValueType { /// Represents a String type #[serde(rename = "string")] - #[strum(to_string = "string")] + #[strum(serialize = "string")] String, /// Represents an Integer type #[serde(rename = "int")] - #[strum(to_string = "int")] + #[strum(serialize = "int")] Integer, /// Represents a Float type #[serde(rename = "float")] - #[strum(to_string = "float")] + #[strum(serialize = "float")] Float, /// Represents a Boolean type #[serde(rename = "bool")] - #[strum(to_string = "bool")] + #[strum(serialize = "bool")] Bool, /// Represents a SHA256 (string) type #[serde(rename = "sha256")] - #[strum(to_string = "sha256 (string)")] + #[strum(serialize = "sha256", to_string = "sha256 (string)")] SHA256, /// Represents a SHA1 (string) type #[serde(rename = "sha1")] - #[strum(to_string = "sha1 (string)")] + #[strum(serialize = "sha1", to_string = "sha1 (string)")] SHA1, /// Represents a MD5 (string) type #[serde(rename = "md5")] - #[strum(to_string = "md5 (string)")] + #[strum(serialize = "md5", to_string = "md5 (string)")] MD5, /// Represents a generic hash (string) type. Can be MD5/SHA1/SHA256 #[serde(rename = "hash")] - #[strum(to_string = "hash (string)")] + #[strum(serialize = "hash", to_string = "hash (string)")] HASH, } } diff --git a/py/src/lib.rs b/py/src/lib.rs index 5d9a0287e..ce43f742c 100644 --- a/py/src/lib.rs +++ b/py/src/lib.rs @@ -15,11 +15,13 @@ matches = rules.scan(b'some dummy data') #![deny(missing_docs)] +use std::collections::BTreeMap; use std::marker::PhantomPinned; use std::mem; use std::ops::Deref; use std::path::PathBuf; use std::pin::Pin; +use std::str::FromStr; use std::time::Duration; use protobuf_json_mapping::print_to_string as proto_to_json; @@ -93,6 +95,54 @@ impl Compiler { } } + /// Specify required metadata during compilation. Any rule which does not + /// meet the required specifications will result in a warning. + /// + /// The metadata argument must be a dictionary where the keys are the + /// required metadata identifiers and the value is a string which is the + /// required type for the metadata value. + /// + /// See the ["check" section](https://virustotal.github.io/yara-x/docs/cli/config-file/) + /// of the config file guide for acceptable value types. Any invalid types + /// will result in a ValueError being raised. + #[pyo3(signature = (metadata))] + fn required_metadata( + &mut self, + metadata: BTreeMap, + ) -> PyResult<()> { + let mut converted_metadata: BTreeMap< + String, + yrx::config::MetaValueType, + > = BTreeMap::new(); + for (k, v) in metadata.iter() { + converted_metadata.insert( + k.to_string(), + yrx::config::MetaValueType::from_str(v).map_err(|err| { + PyValueError::new_err(format!( + "Incorrect value \"{}\": {}", + v, + err.to_string() + )) + })?, + ); + } + self.inner.required_metadata(converted_metadata); + Ok(()) + } + + /// Specify a regular expression that the compiler will enforce upon each + /// rule name. Any rule which has a name which does not match this regex + /// will return an InvalidRuleName warning. + /// + /// If the regexp does not compile a ValueError is returned. + #[pyo3(signature = (regexp_str))] + fn rule_name_regexp(&mut self, regexp_str: &str) -> PyResult<()> { + self.inner + .rule_name_regexp(regexp_str) + .map_err(|err| PyValueError::new_err(err.to_string()))?; + Ok(()) + } + /// Adds a YARA source code to be compiled. /// /// This function may be invoked multiple times to add several sets of YARA diff --git a/py/tests/test_api.py b/py/tests/test_api.py index 8248d2633..85850843b 100644 --- a/py/tests/test_api.py +++ b/py/tests/test_api.py @@ -29,6 +29,43 @@ def test_error_on_slow_pattern(): compiler.add_source(r'rule test {strings: $a = /a.*/ condition: $a}') +def test_invalid_rule_name_regexp(): + compiler = yara_x.Compiler() + with pytest.raises(ValueError): + compiler.rule_name_regexp("(AXS|ERS") + + +def test_invalid_required_metadata(): + compiler = yara_x.Compiler() + with pytest.raises(ValueError): + compiler.required_metadata({"test": "AXSERS"}) + + +def test_check_warnings(): + compiler = yara_x.Compiler() + compiler.rule_name_regex("^foobar_") + compiler.required_metadata({"a": "int", "b": "string"}) + + compiler.add_source( + 'rule test { meta: a = 1 strings: $a = {01 [0-1][0-1] 02 } condition: $a }') + + warnings = compiler.warnings() + + assert len(warnings) == 3 + + assert warnings[0]['type'] == "MissingMetadata" + assert warnings[0]['code'] == "missing_metadata" + assert warnings[0]['title'] == "required metadata missing" + + assert warnings[1]['type'] == "IncorrectMetadataType" + assert warnings[1]['code'] == "incorrect_metadata_type" + assert warnings[1]['title'] == "metadata has incorrect type" + + assert warnings[1]['type'] == "InvalidRuleName" + assert warnings[1]['code'] == "invalid_rule_name" + assert warnings[1]['title'] == "rule name does not meet requirements" + + def test_int_globals(): compiler = yara_x.Compiler() compiler.define_global('some_int', 1)