diff --git a/src/olympia/blocklist/cron.py b/src/olympia/blocklist/cron.py index ea4bf34c886c..d814749dc6a4 100644 --- a/src/olympia/blocklist/cron.py +++ b/src/olympia/blocklist/cron.py @@ -73,7 +73,7 @@ def _upload_mlbf_to_remote_settings(*, force_base=False): ) base_filters_to_update: List[BlockType] = [] - update_stash = False + create_stash = False # Determine which base filters need to be re uploaded # and whether a new stash needs to be created @@ -83,7 +83,9 @@ def _upload_mlbf_to_remote_settings(*, force_base=False): if block_type == BlockType.SOFT_BLOCKED and not waffle.switch_is_active( 'enable-soft-blocking' ): - log.info("Skipping soft-blocks because enable-soft-blocking switch is inactive") + log.info( + 'Skipping soft-blocks because enable-soft-blocking switch is inactive' + ) continue base_filter = MLBF.load_from_storage(get_base_generation_time(block_type)) @@ -108,11 +110,12 @@ def _upload_mlbf_to_remote_settings(*, force_base=False): # only update the stash if we should AND if # we aren't already reuploading the filter for this block type elif should_update_stash: - update_stash = True + create_stash = True - skip_update = len(base_filters_to_update) == 0 and not update_stash + skip_update = len(base_filters_to_update) == 0 and not create_stash if skip_update: log.info('No new/modified/deleted Blocks in database; skipping MLBF generation') + # Delete the locally generated MLBF directory and files as they are not needed mlbf.delete() return @@ -129,7 +132,7 @@ def _upload_mlbf_to_remote_settings(*, force_base=False): len(mlbf.data.not_blocked_items), ) - if update_stash: + if create_stash: mlbf.generate_and_write_stash(previous_filter) for block_type in base_filters_to_update: @@ -138,7 +141,7 @@ def _upload_mlbf_to_remote_settings(*, force_base=False): upload_filter.delay( mlbf.created_at, filter_list=[key.name for key in base_filters_to_update], - upload_stash=update_stash, + create_stash=create_stash, ) diff --git a/src/olympia/blocklist/management/commands/export_blocklist.py b/src/olympia/blocklist/management/commands/export_blocklist.py index 85402295034e..8c4dfbaf4c1b 100644 --- a/src/olympia/blocklist/management/commands/export_blocklist.py +++ b/src/olympia/blocklist/management/commands/export_blocklist.py @@ -34,6 +34,7 @@ def add_arguments(self, parser): '--block-type', help='Block type to export', default=None, + choices=[block_type.name for block_type in BlockType], ) def load_json(self, json_path): diff --git a/src/olympia/blocklist/mlbf.py b/src/olympia/blocklist/mlbf.py index 26bd2264b6d9..48b0d367c9b4 100644 --- a/src/olympia/blocklist/mlbf.py +++ b/src/olympia/blocklist/mlbf.py @@ -32,7 +32,7 @@ def ordered_diff_lists( return extras, deletes, changed_count -def generate_mlbf(stats, blocked, not_blocked): +def generate_mlbf(stats, include, exclude): log.info('Starting to generating bloomfilter') cascade = FilterCascade( @@ -40,20 +40,21 @@ def generate_mlbf(stats, blocked, not_blocked): salt=secrets.token_bytes(16), ) - len_blocked = len(blocked) - len_unblocked = len(not_blocked) + len_include = len(include) + len_exclude = len(exclude) - # We can only set error rates if both blocked and unblocked are non-empty - if len_blocked > 0 and len_unblocked > 0: - error_rates = sorted((len_blocked, len_unblocked)) + # We can only set error rates if both include and exclude are non-empty + if len_include > 0 and len_exclude > 0: + error_rates = sorted((len_include, len_exclude)) cascade.set_crlite_error_rates( include_len=error_rates[0], exclude_len=error_rates[1] ) - stats['mlbf_blocked_count'] = len(blocked) - stats['mlbf_notblocked_count'] = len(not_blocked) + # TODO: these stats need to be block_type aware + stats['mlbf_blocked_count'] = len(include) + stats['mlbf_notblocked_count'] = len(exclude) - cascade.initialize(include=blocked, exclude=not_blocked) + cascade.initialize(include=include, exclude=exclude) stats['mlbf_version'] = cascade.version stats['mlbf_layers'] = cascade.layerCount() @@ -63,7 +64,7 @@ def generate_mlbf(stats, blocked, not_blocked): f'Filter cascade layers: {cascade.layerCount()}, ' f'bit: {cascade.bitCount()}' ) - cascade.verify(include=blocked, exclude=not_blocked) + cascade.verify(include=include, exclude=exclude) return cascade @@ -221,20 +222,32 @@ def delete(self): log.info(f'Deleted {self.storage.base_location}') def generate_and_write_filter(self, block_type: BlockType): + """ + Generate and write the bloom filter for a given block type. + Included items will be items in the specified block type list. + Excluded items will be items in all other data types. + We use the language of inlcude and exclude to distinguish this concept + from blocked and unblocked which are more specific to the block type. + """ stats = {} - blocked = self.data[block_type] + include_items = [] + exclude_items = [] - not_blocked = [ - self.data[_block_type] - for _block_type in BlockType - if _block_type != block_type - ] + # Map over the data types in the MLBFDataType enum + for data_type in MLBFDataType: + # if the data type is in the specified block type, + # add it to the include items + if data_type == block_type: + include_items = self.data[data_type] + # otherwise add items to the exclude items + else: + exclude_items.extend(self.data[data_type]) bloomfilter = generate_mlbf( stats=stats, - blocked=blocked, - not_blocked=not_blocked, + include=include_items, + exclude=exclude_items, ) # write bloomfilter diff --git a/src/olympia/blocklist/tasks.py b/src/olympia/blocklist/tasks.py index e044235ac71d..dac0bc56c132 100644 --- a/src/olympia/blocklist/tasks.py +++ b/src/olympia/blocklist/tasks.py @@ -97,7 +97,7 @@ def monitor_remote_settings(): @task -def upload_filter(generation_time, filter_list=None, upload_stash=False): +def upload_filter(generation_time, filter_list=None, create_stash=False): # We cannot send enum values to tasks so we serialize them as strings # and deserialize them here back to the enum values. filter_list: List[BlockType] = ( @@ -152,7 +152,7 @@ def upload_filter(generation_time, filter_list=None, upload_stash=False): statsd.incr('blocklist.tasks.upload_filter.reset_collection') # It is possible to upload a stash and a filter in the same task - if upload_stash: + if create_stash: with mlbf.storage.open(mlbf.stash_path, 'r') as stash_file: stash_data = json.load(stash_file) # If we have a stash, write that diff --git a/src/olympia/blocklist/tests/test_cron.py b/src/olympia/blocklist/tests/test_cron.py index f94eb033a5f7..6cc8b59ed777 100644 --- a/src/olympia/blocklist/tests/test_cron.py +++ b/src/olympia/blocklist/tests/test_cron.py @@ -106,7 +106,7 @@ def _test_skip_update_unless_force_base(self, enable_soft_blocking=False): mock.call( self.current_time, filter_list=filter_list, - upload_stash=False, + create_stash=False, ) ) in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list @@ -149,7 +149,7 @@ def _test_skip_update_unless_no_base_mlbf( mock.call( self.current_time, filter_list=filter_list, - upload_stash=False, + create_stash=False, ) ) in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list @@ -197,7 +197,7 @@ def test_missing_last_filter_uses_base_filter(self): mock.call( self.current_time, filter_list=[], - upload_stash=True, + create_stash=True, ) ) in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list @@ -313,7 +313,7 @@ def _test_upload_stash_unless_force_base( mock.call( self.current_time, filter_list=[], - upload_stash=True, + create_stash=True, ) in self.mocks[ 'olympia.blocklist.cron.upload_filter.delay' @@ -341,7 +341,7 @@ def _test_upload_stash_unless_force_base( mock.call( self.current_time, filter_list=expected_block_types, - upload_stash=False, + create_stash=False, ) in self.mocks[ 'olympia.blocklist.cron.upload_filter.delay' @@ -456,7 +456,7 @@ def test_upload_stash_unless_missing_base_filter(self): mock.call( self.current_time, filter_list=[], - upload_stash=True, + create_stash=True, ) ] mlbf = MLBF.load_from_storage(self.current_time) @@ -472,7 +472,7 @@ def test_upload_stash_unless_missing_base_filter(self): mock.call( self.current_time, filter_list=[BlockType.BLOCKED.name], - upload_stash=False, + create_stash=False, ) in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list ) @@ -485,7 +485,7 @@ def test_upload_stash_unless_missing_base_filter(self): mock.call( self.current_time, filter_list=[BlockType.BLOCKED.name, BlockType.SOFT_BLOCKED.name], - upload_stash=False, + create_stash=False, ) ) in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list @@ -503,7 +503,7 @@ def test_upload_stash_unless_enough_changes(self): mock.call( self.current_time, filter_list=[], - upload_stash=True, + create_stash=True, ) ] mlbf = MLBF.load_from_storage(self.current_time) @@ -521,7 +521,7 @@ def test_upload_stash_unless_enough_changes(self): mock.call( self.current_time, filter_list=[BlockType.BLOCKED.name], - upload_stash=False, + create_stash=False, ) in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list ) @@ -658,7 +658,7 @@ def test_invalid_cache_results_in_diff(self): mock.call( next_time, filter_list=[], - upload_stash=True, + create_stash=True, ) in self.mocks['olympia.blocklist.cron.upload_filter.delay'].call_args_list ) @@ -672,14 +672,14 @@ def test_pass_correct_arguments_to_upload_filter(self): spy_delay.assert_called_with( self.current_time, filter_list=[BlockType.BLOCKED.name], - upload_stash=False, + create_stash=False, ) with override_switch('enable-soft-blocking', active=True): upload_mlbf_to_remote_settings(force_base=True) spy_delay.assert_called_with( self.current_time, filter_list=[BlockType.BLOCKED.name, BlockType.SOFT_BLOCKED.name], - upload_stash=False, + create_stash=False, ) diff --git a/src/olympia/blocklist/tests/test_tasks.py b/src/olympia/blocklist/tests/test_tasks.py index bba82f4442eb..ab0ec1c619e2 100644 --- a/src/olympia/blocklist/tests/test_tasks.py +++ b/src/olympia/blocklist/tests/test_tasks.py @@ -292,13 +292,13 @@ def test_cleanup_oldest_records(self): expected_calls=[mock.call('0'), mock.call('1')], ) - def test_upload_stashed_filter(self): + def test_create_stashed_filter(self): old_mlbf = MLBF.generate_from_db(self.generation_time - 1) blocked_version = self._block_version(is_signed=True) mlbf = MLBF.generate_from_db(self.generation_time) mlbf.generate_and_write_stash(old_mlbf) - upload_filter.delay(self.generation_time, upload_stash=True) + upload_filter.delay(self.generation_time, create_stash=True) assert not self.mocks['delete_record'].called with mlbf.storage.open(mlbf.stash_path, 'rb') as stash_file: @@ -336,7 +336,7 @@ def test_raises_when_no_filter_exists(self): def test_raises_when_no_stash_exists(self): with self.assertRaises(FileNotFoundError): - upload_filter.delay(self.generation_time, upload_stash=True) + upload_filter.delay(self.generation_time, create_stash=True) def test_default_is_no_op(self): MLBF.generate_from_db(self.generation_time).generate_and_write_filter(