Skip to content

Commit

Permalink
Ensure blocks of type X are excluded from stash if filter of type X i…
Browse files Browse the repository at this point in the history
…s also being uploaded
  • Loading branch information
KevinMind committed Nov 21, 2024
1 parent c14077c commit 766f489
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 42 deletions.
15 changes: 9 additions & 6 deletions src/olympia/blocklist/cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def _upload_mlbf_to_remote_settings(*, force_base=False):
)

base_filters_to_update: List[BlockType] = []
update_stash = False
create_stash = False

# Determine which base filters need to be re uploaded
# and whether a new stash needs to be created
Expand All @@ -83,7 +83,9 @@ def _upload_mlbf_to_remote_settings(*, force_base=False):
if block_type == BlockType.SOFT_BLOCKED and not waffle.switch_is_active(
'enable-soft-blocking'
):
log.info("Skipping soft-blocks because enable-soft-blocking switch is inactive")
log.info(
'Skipping soft-blocks because enable-soft-blocking switch is inactive'
)
continue

base_filter = MLBF.load_from_storage(get_base_generation_time(block_type))
Expand All @@ -108,11 +110,12 @@ def _upload_mlbf_to_remote_settings(*, force_base=False):
# only update the stash if we should AND if
# we aren't already reuploading the filter for this block type
elif should_update_stash:
update_stash = True
create_stash = True

skip_update = len(base_filters_to_update) == 0 and not update_stash
skip_update = len(base_filters_to_update) == 0 and not create_stash
if skip_update:
log.info('No new/modified/deleted Blocks in database; skipping MLBF generation')
# Delete the locally generated MLBF directory and files as they are not needed
mlbf.delete()
return

Expand All @@ -129,7 +132,7 @@ def _upload_mlbf_to_remote_settings(*, force_base=False):
len(mlbf.data.not_blocked_items),
)

if update_stash:
if create_stash:
mlbf.generate_and_write_stash(previous_filter)

for block_type in base_filters_to_update:
Expand All @@ -138,7 +141,7 @@ def _upload_mlbf_to_remote_settings(*, force_base=False):
upload_filter.delay(
mlbf.created_at,
filter_list=[key.name for key in base_filters_to_update],
upload_stash=update_stash,
create_stash=create_stash,
)


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def add_arguments(self, parser):
'--block-type',
help='Block type to export',
default=None,
choices=[block_type.name for block_type in BlockType],
)

def load_json(self, json_path):
Expand Down
49 changes: 31 additions & 18 deletions src/olympia/blocklist/mlbf.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,28 +32,29 @@ def ordered_diff_lists(
return extras, deletes, changed_count


def generate_mlbf(stats, blocked, not_blocked):
def generate_mlbf(stats, include, exclude):
log.info('Starting to generating bloomfilter')

cascade = FilterCascade(
defaultHashAlg=HashAlgorithm.SHA256,
salt=secrets.token_bytes(16),
)

len_blocked = len(blocked)
len_unblocked = len(not_blocked)
len_include = len(include)
len_exclude = len(exclude)

# We can only set error rates if both blocked and unblocked are non-empty
if len_blocked > 0 and len_unblocked > 0:
error_rates = sorted((len_blocked, len_unblocked))
# We can only set error rates if both include and exclude are non-empty
if len_include > 0 and len_exclude > 0:
error_rates = sorted((len_include, len_exclude))
cascade.set_crlite_error_rates(
include_len=error_rates[0], exclude_len=error_rates[1]
)

stats['mlbf_blocked_count'] = len(blocked)
stats['mlbf_notblocked_count'] = len(not_blocked)
# TODO: these stats need to be block_type aware
stats['mlbf_blocked_count'] = len(include)
stats['mlbf_notblocked_count'] = len(exclude)

cascade.initialize(include=blocked, exclude=not_blocked)
cascade.initialize(include=include, exclude=exclude)

stats['mlbf_version'] = cascade.version
stats['mlbf_layers'] = cascade.layerCount()
Expand All @@ -63,7 +64,7 @@ def generate_mlbf(stats, blocked, not_blocked):
f'Filter cascade layers: {cascade.layerCount()}, ' f'bit: {cascade.bitCount()}'
)

cascade.verify(include=blocked, exclude=not_blocked)
cascade.verify(include=include, exclude=exclude)
return cascade


Expand Down Expand Up @@ -221,20 +222,32 @@ def delete(self):
log.info(f'Deleted {self.storage.base_location}')

def generate_and_write_filter(self, block_type: BlockType):
"""
Generate and write the bloom filter for a given block type.
Included items will be items in the specified block type list.
Excluded items will be items in all other data types.
We use the language of inlcude and exclude to distinguish this concept
from blocked and unblocked which are more specific to the block type.
"""
stats = {}

blocked = self.data[block_type]
include_items = []
exclude_items = []

not_blocked = [
self.data[_block_type]
for _block_type in BlockType
if _block_type != block_type
]
# Map over the data types in the MLBFDataType enum
for data_type in MLBFDataType:
# if the data type is in the specified block type,
# add it to the include items
if data_type == block_type:
include_items = self.data[data_type]
# otherwise add items to the exclude items
else:
exclude_items.extend(self.data[data_type])

bloomfilter = generate_mlbf(
stats=stats,
blocked=blocked,
not_blocked=not_blocked,
include=include_items,
exclude=exclude_items,
)

# write bloomfilter
Expand Down
4 changes: 2 additions & 2 deletions src/olympia/blocklist/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def monitor_remote_settings():


@task
def upload_filter(generation_time, filter_list=None, upload_stash=False):
def upload_filter(generation_time, filter_list=None, create_stash=False):
# We cannot send enum values to tasks so we serialize them as strings
# and deserialize them here back to the enum values.
filter_list: List[BlockType] = (
Expand Down Expand Up @@ -152,7 +152,7 @@ def upload_filter(generation_time, filter_list=None, upload_stash=False):
statsd.incr('blocklist.tasks.upload_filter.reset_collection')

# It is possible to upload a stash and a filter in the same task
if upload_stash:
if create_stash:
with mlbf.storage.open(mlbf.stash_path, 'r') as stash_file:
stash_data = json.load(stash_file)
# If we have a stash, write that
Expand Down
26 changes: 13 additions & 13 deletions src/olympia/blocklist/tests/test_cron.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def _test_skip_update_unless_force_base(self, enable_soft_blocking=False):
mock.call(
self.current_time,
filter_list=filter_list,
upload_stash=False,
create_stash=False,
)
) in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list

Expand Down Expand Up @@ -149,7 +149,7 @@ def _test_skip_update_unless_no_base_mlbf(
mock.call(
self.current_time,
filter_list=filter_list,
upload_stash=False,
create_stash=False,
)
) in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list

Expand Down Expand Up @@ -197,7 +197,7 @@ def test_missing_last_filter_uses_base_filter(self):
mock.call(
self.current_time,
filter_list=[],
upload_stash=True,
create_stash=True,
)
) in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list

Expand Down Expand Up @@ -313,7 +313,7 @@ def _test_upload_stash_unless_force_base(
mock.call(
self.current_time,
filter_list=[],
upload_stash=True,
create_stash=True,
)
in self.mocks[
'olympia.blocklist.cron.upload_filter.delay'
Expand Down Expand Up @@ -341,7 +341,7 @@ def _test_upload_stash_unless_force_base(
mock.call(
self.current_time,
filter_list=expected_block_types,
upload_stash=False,
create_stash=False,
)
in self.mocks[
'olympia.blocklist.cron.upload_filter.delay'
Expand Down Expand Up @@ -456,7 +456,7 @@ def test_upload_stash_unless_missing_base_filter(self):
mock.call(
self.current_time,
filter_list=[],
upload_stash=True,
create_stash=True,
)
]
mlbf = MLBF.load_from_storage(self.current_time)
Expand All @@ -472,7 +472,7 @@ def test_upload_stash_unless_missing_base_filter(self):
mock.call(
self.current_time,
filter_list=[BlockType.BLOCKED.name],
upload_stash=False,
create_stash=False,
)
in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list
)
Expand All @@ -485,7 +485,7 @@ def test_upload_stash_unless_missing_base_filter(self):
mock.call(
self.current_time,
filter_list=[BlockType.BLOCKED.name, BlockType.SOFT_BLOCKED.name],
upload_stash=False,
create_stash=False,
)
) in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list

Expand All @@ -503,7 +503,7 @@ def test_upload_stash_unless_enough_changes(self):
mock.call(
self.current_time,
filter_list=[],
upload_stash=True,
create_stash=True,
)
]
mlbf = MLBF.load_from_storage(self.current_time)
Expand All @@ -521,7 +521,7 @@ def test_upload_stash_unless_enough_changes(self):
mock.call(
self.current_time,
filter_list=[BlockType.BLOCKED.name],
upload_stash=False,
create_stash=False,
)
in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list
)
Expand Down Expand Up @@ -658,7 +658,7 @@ def test_invalid_cache_results_in_diff(self):
mock.call(
next_time,
filter_list=[],
upload_stash=True,
create_stash=True,
)
in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list
)
Expand All @@ -672,14 +672,14 @@ def test_pass_correct_arguments_to_upload_filter(self):
spy_delay.assert_called_with(
self.current_time,
filter_list=[BlockType.BLOCKED.name],
upload_stash=False,
create_stash=False,
)
with override_switch('enable-soft-blocking', active=True):
upload_mlbf_to_remote_settings(force_base=True)
spy_delay.assert_called_with(
self.current_time,
filter_list=[BlockType.BLOCKED.name, BlockType.SOFT_BLOCKED.name],
upload_stash=False,
create_stash=False,
)


Expand Down
6 changes: 3 additions & 3 deletions src/olympia/blocklist/tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -292,13 +292,13 @@ def test_cleanup_oldest_records(self):
expected_calls=[mock.call('0'), mock.call('1')],
)

def test_upload_stashed_filter(self):
def test_create_stashed_filter(self):
old_mlbf = MLBF.generate_from_db(self.generation_time - 1)
blocked_version = self._block_version(is_signed=True)
mlbf = MLBF.generate_from_db(self.generation_time)
mlbf.generate_and_write_stash(old_mlbf)

upload_filter.delay(self.generation_time, upload_stash=True)
upload_filter.delay(self.generation_time, create_stash=True)

assert not self.mocks['delete_record'].called
with mlbf.storage.open(mlbf.stash_path, 'rb') as stash_file:
Expand Down Expand Up @@ -336,7 +336,7 @@ def test_raises_when_no_filter_exists(self):

def test_raises_when_no_stash_exists(self):
with self.assertRaises(FileNotFoundError):
upload_filter.delay(self.generation_time, upload_stash=True)
upload_filter.delay(self.generation_time, create_stash=True)

def test_default_is_no_op(self):
MLBF.generate_from_db(self.generation_time).generate_and_write_filter(
Expand Down

0 comments on commit 766f489

Please sign in to comment.