Skip to content

Commit

Permalink
API changes and add linting to python API.
Browse files Browse the repository at this point in the history
rule_name_regexp now returns a Result. This makes error handling in the case of
an invalid (or too large) regex easier.
  • Loading branch information
wxsBSD committed Jan 14, 2025
1 parent ef04f71 commit 92dd0c7
Show file tree
Hide file tree
Showing 9 changed files with 122 additions and 17 deletions.
9 changes: 8 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ serde_json = "1.0.133"
sha1 = "0.10.6"
sha2 = "0.10.8"
smallvec = "1.13.2"
strum = "0.26.3"
strum_macros = "0.26.4"
thiserror = "2.0.3"
# Using tlsh-fixed instead of tlsh because tlsh-fixed includes a fix for this
Expand Down
2 changes: 1 addition & 1 deletion cli/src/commands/check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ pub fn exec_check(
if config.rule_name_regexp.is_some() {
compiler.rule_name_regexp(
config.rule_name_regexp.clone().unwrap().as_str(),
);
)?;
}

compiler.colorize_errors(io::stdout().is_tty());
Expand Down
1 change: 1 addition & 0 deletions lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ rsa = { workspace = true, optional = true }
smallvec = { workspace = true, features = ["serde"] }
serde = { workspace = true, features = ["rc"] }
serde_json = { workspace = true, features = ["preserve_order"] }
strum = { workspace = true }
strum_macros = { workspace = true }
thiserror = { workspace = true }
tlsh-fixed = { workspace = true, optional = true }
Expand Down
11 changes: 7 additions & 4 deletions lib/src/compiler/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use itertools::{izip, Itertools, MinMaxResult};
use log::*;
use nom::AsChar;
use regex::Regex;
use regex::Error as RegexError;
use regex_syntax::hir;
use rustc_hash::{FxHashMap, FxHashSet};
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -503,18 +504,20 @@ impl<'a> Compiler<'a> {
}

/// Regexp used to validate the rule name.
pub fn rule_name_regexp(&mut self, regexp_string: &str) -> &mut Self {
pub fn rule_name_regexp(
&mut self,
regexp_string: &str,
) -> Result<&mut Self, RegexError> {
// The documentation says the default is an empty string, which is not
// true (but it is easier for users to understand). The actual default
// is None. Because users might stick an empty string in there if they
// don't want to use this feature (instead of just removing it from the
// config file), check for the empty string here.
if !regexp_string.is_empty() {
let re = Regex::new(regexp_string)
.expect("Unable to parse rule name regular expression");
let re = Regex::new(regexp_string)?;
self.rule_name_regexp = Some(re);
}
self
Ok(self)
}

/// Adds some YARA source code to be compiled.
Expand Down
8 changes: 7 additions & 1 deletion lib/src/compiler/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -922,6 +922,12 @@ fn test_errors() {
}
}

#[test]
fn test_invalid_rule_name_regex() {
// Make sure invalid regex result in Err.
assert!(Compiler::new().rule_name_regexp("(AXS|ERS").is_err());
}

#[test]
fn test_warnings() {
let mut mint = goldenfile::Mint::new(".");
Expand Down Expand Up @@ -972,7 +978,7 @@ fn test_warnings() {
}

if rules.starts_with("// rule name regex") {
compiler.rule_name_regexp("^AXSERS$");
compiler.rule_name_regexp("^AXSERS$").unwrap();
}

src.push_str(rules.as_str());
Expand Down
20 changes: 10 additions & 10 deletions lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -103,43 +103,43 @@ pub mod warnings {
pub mod config {
use serde::Deserialize;
use serde::Serialize;
use strum_macros::Display;
use strum_macros::{Display, EnumString};

/// Types allowed in the check.metadata table of the config file. Used to
/// require specific metadata identifiers have specific types by "yr check".
#[derive(Display, Deserialize, Serialize, Debug, Clone)]
#[derive(Display, Deserialize, Serialize, Debug, Clone, EnumString)]
pub enum MetaValueType {
/// Represents a String type
#[serde(rename = "string")]
#[strum(to_string = "string")]
#[strum(serialize = "string")]
String,
/// Represents an Integer type
#[serde(rename = "int")]
#[strum(to_string = "int")]
#[strum(serialize = "int")]
Integer,
/// Represents a Float type
#[serde(rename = "float")]
#[strum(to_string = "float")]
#[strum(serialize = "float")]
Float,
/// Represents a Boolean type
#[serde(rename = "bool")]
#[strum(to_string = "bool")]
#[strum(serialize = "bool")]
Bool,
/// Represents a SHA256 (string) type
#[serde(rename = "sha256")]
#[strum(to_string = "sha256 (string)")]
#[strum(serialize = "sha256", to_string = "sha256 (string)")]
SHA256,
/// Represents a SHA1 (string) type
#[serde(rename = "sha1")]
#[strum(to_string = "sha1 (string)")]
#[strum(serialize = "sha1", to_string = "sha1 (string)")]
SHA1,
/// Represents a MD5 (string) type
#[serde(rename = "md5")]
#[strum(to_string = "md5 (string)")]
#[strum(serialize = "md5", to_string = "md5 (string)")]
MD5,
/// Represents a generic hash (string) type. Can be MD5/SHA1/SHA256
#[serde(rename = "hash")]
#[strum(to_string = "hash (string)")]
#[strum(serialize = "hash", to_string = "hash (string)")]
HASH,
}
}
Expand Down
50 changes: 50 additions & 0 deletions py/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,13 @@ matches = rules.scan(b'some dummy data')

#![deny(missing_docs)]

use std::collections::BTreeMap;
use std::marker::PhantomPinned;
use std::mem;
use std::ops::Deref;
use std::path::PathBuf;
use std::pin::Pin;
use std::str::FromStr;
use std::time::Duration;

use protobuf_json_mapping::print_to_string as proto_to_json;
Expand Down Expand Up @@ -93,6 +95,54 @@ impl Compiler {
}
}

/// Specify required metadata during compilation. Any rule which does not
/// meet the required specifications will result in a warning.
///
/// The metadata argument must be a dictionary where the keys are the
/// required metadata identifiers and the value is a string which is the
/// required type for the metadata value.
///
/// See the ["check" section](https://virustotal.github.io/yara-x/docs/cli/config-file/)
/// of the config file guide for acceptable value types. Any invalid types
/// will result in a ValueError being raised.
#[pyo3(signature = (metadata))]
fn required_metadata(
&mut self,
metadata: BTreeMap<String, String>,
) -> PyResult<()> {
let mut converted_metadata: BTreeMap<
String,
yrx::config::MetaValueType,
> = BTreeMap::new();
for (k, v) in metadata.iter() {
converted_metadata.insert(
k.to_string(),
yrx::config::MetaValueType::from_str(v).map_err(|err| {
PyValueError::new_err(format!(
"Incorrect value \"{}\": {}",
v,
err.to_string()
))
})?,
);
}
self.inner.required_metadata(converted_metadata);
Ok(())
}

/// Specify a regular expression that the compiler will enforce upon each
/// rule name. Any rule which has a name which does not match this regex
/// will return an InvalidRuleName warning.
///
/// If the regexp does not compile a ValueError is returned.
#[pyo3(signature = (regexp_str))]
fn rule_name_regexp(&mut self, regexp_str: &str) -> PyResult<()> {
self.inner
.rule_name_regexp(regexp_str)
.map_err(|err| PyValueError::new_err(err.to_string()))?;
Ok(())
}

/// Adds a YARA source code to be compiled.
///
/// This function may be invoked multiple times to add several sets of YARA
Expand Down
37 changes: 37 additions & 0 deletions py/tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,43 @@ def test_error_on_slow_pattern():
compiler.add_source(r'rule test {strings: $a = /a.*/ condition: $a}')


def test_invalid_rule_name_regexp():
compiler = yara_x.Compiler()
with pytest.raises(ValueError):
compiler.rule_name_regexp("(AXS|ERS")


def test_invalid_required_metadata():
compiler = yara_x.Compiler()
with pytest.raises(ValueError):
compiler.required_metadata({"test": "AXSERS"})


def test_check_warnings():
compiler = yara_x.Compiler()
compiler.rule_name_regex("^foobar_")
compiler.required_metadata({"a": "int", "b": "string"})

compiler.add_source(
'rule test { meta: a = 1 strings: $a = {01 [0-1][0-1] 02 } condition: $a }')

warnings = compiler.warnings()

assert len(warnings) == 3

assert warnings[0]['type'] == "MissingMetadata"
assert warnings[0]['code'] == "missing_metadata"
assert warnings[0]['title'] == "required metadata missing"

assert warnings[1]['type'] == "IncorrectMetadataType"
assert warnings[1]['code'] == "incorrect_metadata_type"
assert warnings[1]['title'] == "metadata has incorrect type"

assert warnings[1]['type'] == "InvalidRuleName"
assert warnings[1]['code'] == "invalid_rule_name"
assert warnings[1]['title'] == "rule name does not meet requirements"


def test_int_globals():
compiler = yara_x.Compiler()
compiler.define_global('some_int', 1)
Expand Down

0 comments on commit 92dd0c7

Please sign in to comment.