Skip to content

Commit

Permalink
feat: add command for removing old user data
Browse files Browse the repository at this point in the history
Command is supposed to be run in maintenance runs e.g cron jobs for
cleaning the old user data.

Refs KER-398
  • Loading branch information
nicobav committed Nov 22, 2024
1 parent 07bef10 commit 120621d
Show file tree
Hide file tree
Showing 3 changed files with 279 additions and 0 deletions.
53 changes: 53 additions & 0 deletions democracy/management/commands/remove_user_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
from django.conf import settings
from django.core.management.base import BaseCommand
from django.db import transaction
from django.utils import timezone

from democracy.utils import user_data_remover


class Command(BaseCommand):
def add_arguments(self, parser):
parser.add_argument(
"--remove-user-data-from-old-objects",
action="store_true",
help="Remove user reference from old objects.",
)
parser.add_argument(
"--delete-comment-version-history",
action="store_true",
help="Delete old comments version history.",
)
parser.add_argument(
"--delete-users",
action="store_true",
help="Delete users without activity created before threshold.",
)
parser.add_argument(
"--older-than-days",
default=settings.DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS,
type=int,
help=f"Specify the number of days for removal; "
f"defaults to {settings.DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS}."
f"Data as old or older than this will be removed.",
)

@transaction.atomic
def handle(self, *args, **options):
threshold_time = timezone.now() - timezone.timedelta(
days=options["older_than_days"]
)

if options["remove_user_data_from_old_objects"]:
user_data_remover.remove_old_objects_user_data(threshold_time)
user_data_remover.remove_user_from_old_comments(threshold_time)
user_data_remover.remove_user_votes_from_old_comments(threshold_time)
user_data_remover.remove_user_from_old_poll_answers(threshold_time)
user_data_remover.remove_user_from_old_hearings(threshold_time)
user_data_remover.remove_contact_persons_from_old_hearings(threshold_time)

if options["delete_comment_version_history"]:
user_data_remover.delete_old_comments_versions(threshold_time)

if options["delete_users"]:
user_data_remover.delete_old_users_without_activity(threshold_time)
221 changes: 221 additions & 0 deletions democracy/tests/integrationtest/test_remove_user_data_command.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
import freezegun
import reversion
from django.conf import settings
from django.core.management import call_command
from django.test import TestCase
from django.utils import timezone
from reversion.models import Version

from democracy.factories.hearing import (
MinimalHearingFactory,
SectionCommentFactory,
SectionFactory,
)
from democracy.factories.poll import SectionPollFactory, SectionPollOptionFactory
from democracy.models import ContactPerson, SectionComment, SectionPollAnswer
from kerrokantasi.models import User
from kerrokantasi.tests.factories import UserFactory


def run_remove_user_data_command(*args):
call_command("remove_user_data", *args)


class RemoveUserDataCommandTestCase(TestCase):
@classmethod
def setUpTestData(cls):
with freezegun.freeze_time(
timezone.now()
- timezone.timedelta(
days=settings.DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS + 1
)
):
sec = SectionFactory(post=True)
cls.old_user = UserFactory(username="old_user", date_joined=timezone.now())
cls.old_user_without_activity = UserFactory(
username="tobe_deleted_user", date_joined=timezone.now()
)
with reversion.create_revision():
cls.old_section_comment = SectionCommentFactory(
created_by=cls.old_user, section=sec, post=True
)
cls.old_section_comment.title = "Old Title"
cls.old_section_comment.save()
cls.old_section_comment.voters.add(cls.old_user)
cls.old_section_comment.recache_n_votes()
cls.old_hearing = MinimalHearingFactory(created_by=cls.old_user)
SectionComment.objects.filter(section__hearing=cls.old_hearing).delete()
cls.old_contact_person = ContactPerson.objects.create(
name="Old Contact Person", created_by=cls.old_user
)
cls.old_hearing.contact_persons.add(cls.old_contact_person)
poll = SectionPollFactory(section=sec)
option = SectionPollOptionFactory(poll=poll)
cls.old_poll_answer = SectionPollAnswer.objects.create(
created_by=cls.old_user,
option=option,
comment=cls.old_section_comment,
)

cls.new_user = UserFactory(username="newer_user", date_joined=timezone.now())
cls.new_section_comment = SectionCommentFactory(
created_by=cls.new_user, section=sec, post=True
)
cls.new_hearing = MinimalHearingFactory(
created_by=cls.new_user, close_at=timezone.now()
)
cls.new_contact_person = ContactPerson.objects.create(
name="New Contact Person", created_by=cls.new_user
)
cls.new_hearing.contact_persons.add(cls.new_contact_person)
cls.new_poll_answer = SectionPollAnswer.objects.create(
created_by=cls.new_user, option=option, comment=cls.new_section_comment
)

old_objects = [
"old_section_comment",
"old_poll_answer",
"old_hearing",
]
new_objects = [
"new_section_comment",
"new_hearing",
"new_contact_person",
"new_poll_answer",
]

def test_delete_user(self):
self.old_user.delete()
self.old_section_comment.refresh_from_db()
self.old_hearing.refresh_from_db()
self.old_poll_answer.refresh_from_db()
self.old_contact_person.refresh_from_db()
self.assertIsNone(self.old_section_comment.created_by)
self.assertIsNone(self.old_hearing.created_by)
self.assertIsNone(self.old_poll_answer.created_by)
self.assertIsNone(self.old_contact_person.created_by)
self.assertIsNotNone(self.old_section_comment.content)
self.assertNotEqual(self.old_section_comment.content, "")
self.assertGreater(self.old_section_comment.id, 0)

def assert_old_objects_created_by_matches(self, exclude=()):
for model in [model for model in self.old_objects if model not in exclude]:
obj = getattr(self, model)
obj.refresh_from_db()
self.assertEqual(obj.created_by, self.old_user)

def assert_old_objects_created_by_none(self, exclude=()):
for model in [model for model in self.old_objects if model not in exclude]:
obj = getattr(self, model)
obj.refresh_from_db()
self.assertIsNone(obj.created_by)

def assert_new_objects_created_by_matches(self):
for model in self.new_objects:
obj = getattr(self, model)
obj.refresh_from_db()
self.assertEqual(obj.created_by, self.new_user)

def test_all_options(self):
"""Test remove_user_data command with all options."""
args = [
"--remove-user-data-from-old-objects",
"--delete-comment-version-history",
"--delete-users",
"--older-than-days",
str(settings.DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS),
]
run_remove_user_data_command(*args)

self.old_section_comment.refresh_from_db()
self.old_poll_answer.refresh_from_db()
self.old_hearing.refresh_from_db()
old_user = User.objects.filter(id=self.old_user.id).first()
user_without_activity = User.objects.filter(
username=self.old_user_without_activity.username
).first()
self.new_user.refresh_from_db()

self.assert_old_objects_created_by_none()
self.assertEqual(0, self.old_hearing.contact_persons.count())
self.assertEqual(self.old_section_comment.n_unregistered_votes, 1)
self.assertEqual(self.old_section_comment.voters.count(), 0)
self.assertEqual(self.old_section_comment.n_votes, 1)
self.assert_new_objects_created_by_matches()
self.assertEqual(
0, Version.objects.get_for_object(self.old_section_comment).count()
)

self.assertIsNone(user_without_activity)
self.assertIsNone(old_user)
self.assertIsNotNone(self.new_user)

def test_remove_only_user_data_from_old_objects(self):
"""Test remove_user_data command with remove_user_data_from_old_objects option."""
args = ["--remove-user-data-from-old-objects"]
run_remove_user_data_command(*args)

self.old_section_comment.refresh_from_db()
self.old_poll_answer.refresh_from_db()
self.old_hearing.refresh_from_db()

self.assert_old_objects_created_by_none()
self.assertEqual(self.old_section_comment.n_unregistered_votes, 1)
self.assertEqual(self.old_section_comment.voters.count(), 0)
self.assertEqual(self.old_section_comment.n_votes, 1)
self.assertEqual(0, self.old_hearing.contact_persons.count())

self.assert_new_objects_created_by_matches()

def test_remove_user_data_from_old_objects_with_delete_version_option(self):
"""Test remove_user_data command with remove_user_data_from_old_objects option."""
args = [
"--remove-user-data-from-old-objects",
"--delete-comment-version-history",
]

self.assertGreater(
Version.objects.get_for_object(self.old_section_comment).count(), 0
)
run_remove_user_data_command(*args)

self.old_section_comment.refresh_from_db()
self.old_poll_answer.refresh_from_db()
self.old_hearing.refresh_from_db()

self.assert_old_objects_created_by_none()
self.assertEqual(self.old_section_comment.n_unregistered_votes, 1)
self.assertEqual(self.old_section_comment.voters.count(), 0)
self.assertEqual(self.old_section_comment.n_votes, 1)
self.assertEqual(0, self.old_hearing.contact_persons.count())

self.assert_new_objects_created_by_matches()

self.assertEqual(
Version.objects.get_for_object(self.old_section_comment).count(), 0
)

def test_only_delete_inactive_users(self):
"""Test remove_user_data command with delete_users option."""
args = ["--delete-users"]
run_remove_user_data_command(*args)

self.old_section_comment.refresh_from_db()
self.old_poll_answer.refresh_from_db()
self.old_hearing.refresh_from_db()

self.assert_old_objects_created_by_matches()
self.assertEqual(self.old_section_comment.n_unregistered_votes, 0)
self.assertEqual(self.old_section_comment.voters.count(), 1)
self.assertEqual(self.old_section_comment.n_votes, 1)
self.assertEqual(1, self.old_hearing.contact_persons.count())

self.assert_new_objects_created_by_matches()
old_user = User.objects.filter(id=self.old_user.id).first()
user_without_activity = User.objects.filter(
username=self.old_user_without_activity.username
).first()
self.new_user.refresh_from_db()
self.assertIsNone(user_without_activity)
self.assertIsNotNone(old_user)
self.assertIsNotNone(self.new_user)
5 changes: 5 additions & 0 deletions kerrokantasi/settings/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def get_git_revision_hash():
GDPR_API_DELETE_SCOPE=(str, "gdprdelete"),
# Audit logging
AUDIT_LOG_ENABLED=(bool, False),
DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS=(int, 365 * 5), # Five years.
)

# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
Expand Down Expand Up @@ -433,3 +434,7 @@ def get_git_revision_hash():
"ENABLED": env("AUDIT_LOG_ENABLED"),
"ORIGIN": "kerrokantasi",
}

DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS = env(
"DEFAULT_USER_DATA_REMOVAL_THRESHOLD_DAYS"
)

0 comments on commit 120621d

Please sign in to comment.