Skip to content

Commit

Permalink
Revert "Improve spam filtering" (#6379)
Browse files Browse the repository at this point in the history
  • Loading branch information
smithellis authored Nov 26, 2024
1 parent 439b819 commit c7260c7
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 44 deletions.
28 changes: 9 additions & 19 deletions kitsune/questions/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
from kitsune.questions.models import AAQConfig, Answer, Question
from kitsune.questions.utils import remove_pii
from kitsune.sumo.forms import KitsuneBaseForumForm
from kitsune.sumo.utils import check_for_spam_content
from kitsune.upload.models import ImageAttachment

# labels and help text
Expand Down Expand Up @@ -186,24 +185,6 @@ def __init__(self, product=None, *args, **kwargs):
topics = Topic.active.filter(products=product, in_aaq=True)
self.fields["category"].queryset = topics

def clean(self, *args, **kwargs):
"""
Generic clean method used by all forms in the question app.
Parse content for suspicious content.
- Toll free numbers
- NANP numbers
- Links - not necessarily spam content
"""

cdata = self.cleaned_data.get("content")
if not cdata:
return super().clean(*args, **kwargs)

if check_for_spam_content(cdata):
self.cleaned_data.update({"is_spam": True})

return self.cleaned_data

def save(self, user, locale, product, *args, **kwargs):
self.instance.creator = user
self.instance.locale = locale
Expand Down Expand Up @@ -247,6 +228,15 @@ class Meta:
model = Answer
fields = ("content",)

def clean(self, *args, **kwargs):
"""Override clean method to exempt question owner from spam filtering."""
cdata = super(AnswerForm, self).clean(*args, **kwargs)
# if there is a reply from the owner, remove the spam flag
if self.user and self.question and self.user == self.question.creator:
cdata.pop("is_spam", None)

return cdata


class WatchQuestionForm(forms.Form):
"""Form to subscribe to question updates."""
Expand Down
6 changes: 3 additions & 3 deletions kitsune/questions/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,12 +644,12 @@ def aaq(request, product_slug=None, step=1, is_loginless=False):
product=product,
)

# Submitting the question counts as a vote
question_vote(request, question.id)

if form.cleaned_data.get("is_spam"):
_add_to_moderation_queue(request, question)

# Submitting the question counts as a vote
question_vote(request, question.id)

my_questions_url = reverse("users.questions", args=[request.user.username])
messages.add_message(
request,
Expand Down
19 changes: 1 addition & 18 deletions kitsune/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1136,26 +1136,9 @@ def filter_exceptions(event, hint):
]

# Regexes
REGEX_TIMEOUT = config("REGEX_TIMEOUT", default=5, cast=int)
TOLL_FREE_REGEX = re.compile(r"^.*8(00|33|44|55|66|77|88)[2-9]\d{6,}$")
REGEX_TIMEOUT = config("REGEX_TIMEOUT", default=5, cast=int)
NANP_REGEX = re.compile(r"[0-9]{3}-?[a-zA-Z2-9][a-zA-Z0-9]{2}-?[a-zA-Z0-9]{4}")
ANY_PHONE_NUMBER = re.compile(
r"""
(?<!\w) # Assert position is not preceded by a word character (prevents partial matches)
(?:\+|00|011)? # Match optional country code prefix (+, 00, or 011)
[\s.-]?\(? # Optional separator (space, dot, dash) and optional opening parenthesis
\d{1,4} # Match 1-4 digits (area code or first part of the number)
\)? # Optional closing parenthesis
[\s.-]? # Optional separator (space, dot, dash)
\d{1,4} # Match 1-4 digits (first part of the phone number)
[\s.-]? # Optional separator (space, dot, dash)
\d{1,4} # Match 1-4 digits (second part of the phone number)
[\s.-]? # Optional separator (space, dot, dash)
\d{1,9} # Match 1-9 digits (remaining part of the phone number)
(?!\w) # Assert position is not followed by a word character (prevents partial matches)
""",
re.VERBOSE, # Allows for the use of comments and whitespace in the pattern for readability
)

if ES_ENABLE_CONSOLE_LOGGING and DEV:
es_trace_logger = logging.getLogger("elasticsearch.trace")
Expand Down
7 changes: 3 additions & 4 deletions kitsune/sumo/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,18 +321,17 @@ def check_for_spam_content(data):
- Toll free numbers
- Vanity toll free numbers
- Links in the text.
- Any phone number-ish string of digits
"""

# keep only digits
# keep only the digits in text
digits = "".join(filter(type(data).isdigit, data))
is_toll_free = settings.TOLL_FREE_REGEX.match(digits)

is_nanp_number = any(settings.NANP_REGEX.findall(data))
is_phone_number = any(settings.ANY_PHONE_NUMBER.findall(data))

has_links = has_blocked_link(data)

return is_toll_free or is_nanp_number or is_phone_number or has_links
return is_toll_free or is_nanp_number or has_links


@lru_cache(maxsize=settings.WEBPACK_LRU_CACHE)
Expand Down

0 comments on commit c7260c7

Please sign in to comment.