Skip to content

Commit

Permalink
Delete correct files from remote settings:
Browse files Browse the repository at this point in the history
- delete any existing attachments matching filters we are reuploading
- delete any stashes that are older than the oldest filter
  • Loading branch information
KevinMind committed Nov 22, 2024
1 parent 42bd98c commit aefa829
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 58 deletions.
106 changes: 63 additions & 43 deletions src/olympia/blocklist/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,54 +108,28 @@ def upload_filter(generation_time, filter_list=None, create_stash=False):
bucket, REMOTE_SETTINGS_COLLECTION_MLBF, sign_off_needed=False
)
mlbf = MLBF.load_from_storage(generation_time, error_on_missing=True)
is_base = len(filter_list) > 0
oldest_base_filter_id: int | None = None
# Download old records before uploading new ones
# this ensures we do not delete any records we just uplaoded
old_records = server.records()
attachment_types_to_delete = []

if is_base:
for block_type in filter_list:
data = {
'key_format': MLBF.KEY_FORMAT,
'generation_time': generation_time,
'attachment_type': BLOCKLIST_RECORD_MLBF_BASE(block_type),
}
with mlbf.storage.open(mlbf.filter_path(block_type), 'rb') as filter_file:
attachment = ('filter.bin', filter_file, 'application/octet-stream')
server.publish_attachment(data, attachment)
base_filter_id: int | None = get_config(
# Currently we read from the old singular config key for
# hard blocks to preserve backward compatibility.
# In https://github.com/mozilla/addons/issues/15193
# we can remove this and start reading from the new plural key.
MLBF_BASE_ID_CONFIG_KEY(block_type, compat=True),
json_value=True,
)
if base_filter_id is not None:
if oldest_base_filter_id is None:
oldest_base_filter_id = base_filter_id
else:
oldest_base_filter_id = min(oldest_base_filter_id, base_filter_id)
for block_type in filter_list:
attachment_type = BLOCKLIST_RECORD_MLBF_BASE(block_type)
data = {
'key_format': MLBF.KEY_FORMAT,
'generation_time': generation_time,
'attachment_type': attachment_type,
}
with mlbf.storage.open(mlbf.filter_path(block_type), 'rb') as filter_file:
attachment = ('filter.bin', filter_file, 'application/octet-stream')
server.publish_attachment(data, attachment)
# After we have succesfully uploaded the new filter
# we can safely delete others of that type
attachment_types_to_delete.append(attachment_type)

statsd.incr('blocklist.tasks.upload_filter.upload_mlbf')
statsd.incr('blocklist.tasks.upload_filter.upload_mlbf.base')

if oldest_base_filter_id is not None:
for record in server.records():
record_time = int(
record['stash_time']
if 'stash_time' in record
else record['generation_time']
if 'generation_time' in record
# If we don't have a generation time to use
# then setting the base filter id will ensure
# the record is not deleted.
else oldest_base_filter_id
)
if record_time < oldest_base_filter_id:
server.delete_record(record['id'])

cleanup_old_files.delay(base_filter_id=oldest_base_filter_id)
statsd.incr('blocklist.tasks.upload_filter.reset_collection')

# It is possible to upload a stash and a filter in the same task
if create_stash:
with mlbf.storage.open(mlbf.stash_path, 'r') as stash_file:
Expand All @@ -169,8 +143,12 @@ def upload_filter(generation_time, filter_list=None, create_stash=False):
server.publish_record(stash_upload_data)
statsd.incr('blocklist.tasks.upload_filter.upload_stash')

# Commit the changes to remote settings for review.
# only after this can we safely update config timestamps
server.complete_session()
set_config(MLBF_TIME_CONFIG_KEY, generation_time, json_value=True)

# Update the base_filter_id for uploaded filters
for block_type in filter_list:
# We currently write to the old singular config key for hard blocks
# to preserve backward compatibility.
Expand All @@ -187,6 +165,48 @@ def upload_filter(generation_time, filter_list=None, create_stash=False):
MLBF_BASE_ID_CONFIG_KEY(block_type), generation_time, json_value=True
)

oldest_base_filter_id: int | None = None

# Get the oldest base_filter_id from the set of defined IDs
# We should delete stashes that are older than this time
for block_type in BlockType:
base_filter_id = get_config(
# Currently we read from the old singular config key for
# hard blocks to preserve backward compatibility.
# In https://github.com/mozilla/addons/issues/15193
# we can remove this and start reading from the new plural key.
MLBF_BASE_ID_CONFIG_KEY(block_type, compat=True),
json_value=True,
)
if base_filter_id is not None:
if oldest_base_filter_id is None:
oldest_base_filter_id = base_filter_id
else:
oldest_base_filter_id = min(oldest_base_filter_id, base_filter_id)

for record in old_records:
# Delete attachment records that match the
# attachment types of filters we just uplaoded
# this ensures we only have one filter attachment
# per block_type
if 'attachment' in record:
attachment_type = record['attachment_type']

if attachment_type in attachment_types_to_delete:
server.delete_record(record['id'])

# Delete stash records that are older than the oldest
# pre-existing filter attachment records. These records
# cannot apply to any existing filter since we uploaded
elif 'stash' in record and oldest_base_filter_id is not None:
record_time = record['stash_time']

if record_time < oldest_base_filter_id:
server.delete_record(record['id'])

cleanup_old_files.delay(base_filter_id=oldest_base_filter_id)
statsd.incr('blocklist.tasks.upload_filter.reset_collection')


@task
def cleanup_old_files(*, base_filter_id):
Expand Down
51 changes: 37 additions & 14 deletions src/olympia/blocklist/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,17 @@ def test_calls_save_to_block_objects_or_delete_block_objects_depending_on_action

@pytest.mark.django_db
class TestUploadMLBFToRemoteSettings(TestCase):
def _attachment(self, id, attachment_type, generation_time):
return {
'id': id,
'attachment': {},
'generation_time': generation_time,
'attachment_type': attachment_type,
}

def _stash(self, id, stash_time):
return {'id': id, 'stash': {}, 'stash_time': stash_time}

def setUp(self):
self.user = user_factory()
self.addon = addon_factory()
Expand Down Expand Up @@ -287,36 +298,48 @@ def test_skip_cleanup_when_no_filters(self):

def test_cleanup_old_records(self):
"""
Clean up 0 because its the only record older than the generation time.
Clean up 0 because its the only record matching the uplaoded filters
attachment_type or is older than genreation_time
"""
self._test_cleanup_old_records(
filter_list={
BlockType.BLOCKED: self.generation_time,
},
records=[
{'id': '0', 'generation_time': self.generation_time - 1},
{'id': '1', 'generation_time': self.generation_time},
{'id': '2', 'generation_time': self.generation_time + 1},
self._attachment(0, 'bloomfilter-base', self.generation_time - 1),
self._attachment(
1, 'softblocks-bloomfilter-base', self.generation_time
),
self._stash(2, self.generation_time + 1),
],
expected_calls=[mock.call('0')],
expected_calls=[mock.call(0)],
)

def test_cleanup_oldest_records(self):
def test_cleanup_oldest_stash_records(self):
"""
Clean up 0 and 1 because they are both older than
the oldest filter (generation_time + 1).
The oldest base id time is (self.generation_time -3)
Delete 0 as matching attachment
Delete 1 as older than base id
"""
self._test_cleanup_old_records(
filter_list={
BlockType.BLOCKED: self.generation_time + 2,
BlockType.SOFT_BLOCKED: self.generation_time + 1,
BlockType.BLOCKED: self.generation_time - 3,
},
records=[
{'id': '0', 'generation_time': self.generation_time - 1},
{'id': '1', 'generation_time': self.generation_time},
{'id': '2', 'generation_time': self.generation_time + 1},
# Deleted, matching attachment
self._attachment(0, 'bloomfilter-base', self.generation_time - 5),
self._stash(
1, self.generation_time - 4
), # deleted older than oldest filter
# Not deleted, not matching attachment
self._attachment(
1, 'softblocks-bloomfilter-base', self.generation_time - 3
),
# Both Not delted, not older than oldest filter
self._stash(1, self.generation_time - 2),
self._stash(2, self.generation_time - 1),
],
expected_calls=[mock.call('0'), mock.call('1')],
expected_calls=[mock.call(0), mock.call(1)],
)

def test_create_stashed_filter(self):
Expand Down
10 changes: 9 additions & 1 deletion src/olympia/lib/remote_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,15 @@ def delete_record(self, legacy_id):
f'{settings.REMOTE_SETTINGS_WRITER_URL}buckets/{self.bucket}/'
f'collections/{self.collection}/records/{legacy_id}'
)
requests.delete(url, headers=self.headers)
response = requests.delete(url, headers=self.headers)
if response.status_code not in (200, 201):
log.error(
f'Deleting record [{legacy_id}] failed: {response.content}',
stack_info=True,
)
raise ConnectionError('Remote settings record not deleted')

log.info(f'Deleted record [{legacy_id}]')
self._changes = True

def delete_all_records(self):
Expand Down

0 comments on commit aefa829

Please sign in to comment.