Skip to content

Commit

Permalink
Fixed MIXED_CHARSET spam filter check
Browse files Browse the repository at this point in the history
  • Loading branch information
mdecimus committed Jan 5, 2025
1 parent cae7d43 commit 043b53f
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 14 deletions.
2 changes: 1 addition & 1 deletion crates/common/src/scripts/functions/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ pub fn register_functions_trusted() -> FunctionMap {
.with_function("winnow", fn_winnow)
.with_function("has_zwsp", fn_has_zwsp)
.with_function("has_obscured", fn_has_obscured)
.with_function("is_single_script", fn_is_single_script)
.with_function("is_mixed_charset", fn_is_mixed_charset)
.with_function("puny_decode", fn_puny_decode)
.with_function("unicode_skeleton", fn_unicode_skeleton)
.with_function("cure_text", fn_cure_text)
Expand Down
9 changes: 5 additions & 4 deletions crates/common/src/scripts/functions/unicode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
*/

use sieve::{runtime::Variable, Context};
use unicode_security::MixedScript;

use crate::scripts::IsMixedCharset;

pub fn fn_is_ascii<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
match &v[0] {
Expand Down Expand Up @@ -80,12 +81,12 @@ pub fn fn_unicode_skeleton<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable
.into()
}

pub fn fn_is_single_script<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
pub fn fn_is_mixed_charset<'x>(_: &'x Context<'x>, v: Vec<Variable>) -> Variable {
let text = v[0].to_string();
if !text.is_empty() {
text.as_ref().is_single_script()
text.as_ref().is_mixed_charset()
} else {
true
false
}
.into()
}
19 changes: 19 additions & 0 deletions crates/common/src/scripts/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use std::sync::Arc;

use sieve::{runtime::Variable, Envelope};
use store::Value;
use unicode_security::mixed_script::AugmentedScriptSet;

use crate::IntoString;

Expand Down Expand Up @@ -56,3 +57,21 @@ pub fn to_store_value(value: &Variable) -> Value<'static> {
v => Value::Text(v.to_string().into_owned().into()),
}
}

pub trait IsMixedCharset {
fn is_mixed_charset(&self) -> bool;
}

impl<T: AsRef<str>> IsMixedCharset for T {
fn is_mixed_charset(&self) -> bool {
let mut set: Option<AugmentedScriptSet> = None;

for ch in self.as_ref().chars() {
if !ch.is_ascii() {
set.get_or_insert_default().intersect_with(ch.into());
}
}

set.map_or(false, |set| set.is_empty())
}
}
10 changes: 6 additions & 4 deletions crates/spam-filter/src/analysis/mime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,14 @@
use std::{collections::HashSet, future::Future, vec};

use common::{
scripts::functions::{array::cosine_similarity, unicode::CharUtils},
scripts::{
functions::{array::cosine_similarity, unicode::CharUtils},
IsMixedCharset,
},
Server,
};
use mail_parser::{HeaderName, MimeHeaders, PartType};
use nlp::tokenizers::types::TokenType;
use unicode_security::MixedScript;

use crate::{SpamFilterContext, TextPart};

Expand Down Expand Up @@ -304,8 +306,8 @@ impl SpamFilterAnalyzeMime for Server {
|| ctx.input.message.html_body.contains(&part_id)
})
.map_or(false, |p| match p {
TextPart::Plain { text_body, .. } => !text_body.is_single_script(),
TextPart::Html { text_body, .. } => !text_body.is_single_script(),
TextPart::Plain { text_body, .. } => text_body.is_mixed_charset(),
TextPart::Html { text_body, .. } => text_body.is_mixed_charset(),
TextPart::None => false,
})
{
Expand Down
4 changes: 2 additions & 2 deletions crates/spam-filter/src/analysis/url.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ use std::{borrow::Cow, future::Future, time::Duration};

use common::config::spamfilter::{Element, IpResolver, Location};
use common::scripts::functions::unicode::CharUtils;
use common::scripts::IsMixedCharset;
use common::Server;
use hyper::{header::LOCATION, Uri};
use nlp::tokenizers::types::TokenType;
use reqwest::redirect::Policy;
use unicode_security::MixedScript;

use crate::modules::dnsbl::check_dnsbl;
use crate::modules::expression::StringResolver;
Expand Down Expand Up @@ -244,7 +244,7 @@ impl SpamFilterAnalyzeUrl for Server {
}
}

if !host.fqdn.is_single_script() {
if host.fqdn.is_mixed_charset() {
ctx.result.add_tag("MIXED_CHARSET_URL");
}
}
Expand Down
4 changes: 2 additions & 2 deletions tests/resources/smtp/antispam/combined.test
Original file line number Diff line number Diff line change
Expand Up @@ -1021,8 +1021,8 @@ dmarc.result pass
dmarc.policy reject
remote_ip 173.224.123.255
tls.version TLS1_2
expect_header X-Spam-Result: DMARC_POLICY_ALLOW (-0.50), DKIM_ALLOW (-0.20), SPF_ALLOW (-0.20), ARC_NA (0.00), DKIM_SIGNED (0.00), FROM_EQ_ENV_FROM (0.00), FROM_HAS_DN (0.00), HAS_EXTERNAL_IMG (0.00), HAS_REPLYTO (0.00), HAS_X_PRIO_THREE (0.00), HTML_SHORT_1 (0.00), RCPT_COUNT_ONE (0.00), REPLYTO_DN_EQ_FROM_DN (0.00), REPLYTO_DOM_EQ_FROM_DOM (0.00), TO_DN_ALL (0.00), TO_EQ_FROM (0.00), RCVD_COUNT_ZERO (0.10), RCVD_NO_TLS_LAST (0.10), HELO_NORES_A_OR_MX (0.30), MID_RHS_NOT_FQDN (0.50), UNPARSABLE_URL (0.50), FROMHOST_NORES_A_OR_MX (1.50), DIRECT_TO_MX (2.00), FORGED_RECIPIENTS (2.00), SUBJ_ALL_CAPS (3.00)
expect_header X-Spam-Status: Yes, score=9.10
expect_header X-Spam-Result: DMARC_POLICY_ALLOW (-0.50), DKIM_ALLOW (-0.20), SPF_ALLOW (-0.20), ARC_NA (0.00), DKIM_SIGNED (0.00), FROM_EQ_ENV_FROM (0.00), FROM_HAS_DN (0.00), HAS_EXTERNAL_IMG (0.00), HAS_REPLYTO (0.00), HAS_X_PRIO_THREE (0.00), HTML_SHORT_1 (0.00), RCPT_COUNT_ONE (0.00), REPLYTO_DN_EQ_FROM_DN (0.00), REPLYTO_DOM_EQ_FROM_DOM (0.00), TO_DN_ALL (0.00), TO_EQ_FROM (0.00), RCVD_COUNT_ZERO (0.10), RCVD_NO_TLS_LAST (0.10), HELO_NORES_A_OR_MX (0.30), MID_RHS_NOT_FQDN (0.50), UNPARSABLE_URL (0.50), DATE_IN_PAST (1.00), FROMHOST_NORES_A_OR_MX (1.50), DIRECT_TO_MX (2.00), FORGED_RECIPIENTS (2.00), SUBJ_ALL_CAPS (3.00)
expect_header X-Spam-Status: Yes, score=10.10

Return-Path: <[email protected]>
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; s=default; d=landeray.com;
Expand Down
2 changes: 1 addition & 1 deletion tests/resources/smtp/antispam/url.test
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ expect MIXED_CHARSET_URL

Subject: test

my site is https://www.xn--80ak6aa92e.com/
my site is https://www.xn--1ca81o6aa92e.com/
<!-- NEXT TEST -->
expect UNPARSABLE_URL

Expand Down

0 comments on commit 043b53f

Please sign in to comment.