Skip to content

Commit

Permalink
fix(k8s): make 'load_and_stream' nemesis work on K8S
Browse files Browse the repository at this point in the history
The 'load_and_stream' nemesis is failing all the time on the K8S with
the following error:

  TimeoutError: timeout occurred while waiting for end log line \
    (['(?:storage_service|sstables_loader) - load_and_stream:'] on node: \
      sct-cluster-us-east1-b-us-east1-2

The logs really exist, but SCT reads it with a delay in between 5-10
minutes. So, to workaround it, set big enough timeouts to catch the
delayed logs for sure.

Closes: scylladb#6314
  • Loading branch information
vponomaryov authored and soyacz committed Aug 21, 2023
1 parent e63c185 commit c760db4
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 4 deletions.
5 changes: 4 additions & 1 deletion sdcm/nemesis.py
Original file line number Diff line number Diff line change
Expand Up @@ -1551,7 +1551,10 @@ def disrupt_load_and_stream(self):
test_data=test_data)
for sstables_info, load_on_node in map_files_to_node:
SstableLoadUtils.upload_sstables(load_on_node, test_data=sstables_info, table_name="standard1")
SstableLoadUtils.run_load_and_stream(load_on_node)
# NOTE: on K8S logs may appear with a delay, so add a bigger timeout for it.
# See https://github.com/scylladb/scylla-cluster-tests/issues/6314
kwargs = {"start_timeout": 1800, "end_timeout": 1800} if self._is_it_on_kubernetes() else {}
SstableLoadUtils.run_load_and_stream(load_on_node, **kwargs)

# pylint: disable=too-many-statements
def disrupt_nodetool_refresh(self, big_sstable: bool = False):
Expand Down
6 changes: 4 additions & 2 deletions sdcm/utils/sstable/load_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,11 +110,13 @@ def upload_sstables(node, test_data: TestDataInventory, keyspace_name: str = 'ke
node.remoter.sudo(f'rm -f {table_folder}/upload/manifest.json')

@classmethod
def run_load_and_stream(cls, node, keyspace_name: str = 'keyspace1', table_name: str = 'standard1', timeout=300):
def run_load_and_stream(cls, node, # pylint: disable=too-many-arguments
keyspace_name: str = 'keyspace1', table_name: str = 'standard1',
start_timeout=60, end_timeout=300):
"""runs load and stream using API request and waits for it to finish"""
with wait_for_log_lines(node, start_line_patterns=[cls.LOAD_AND_STREAM_RUN_EXPR],
end_line_patterns=[cls.LOAD_AND_STREAM_DONE_EXPR.format(keyspace_name, table_name)],
start_timeout=60, end_timeout=timeout):
start_timeout=start_timeout, end_timeout=end_timeout):
LOGGER.info("Running load and stream on the node %s for %s.%s'", node.name, keyspace_name, table_name)

# `load_and_stream` parameter is not supported by nodetool yet. This is workaround
Expand Down
2 changes: 1 addition & 1 deletion unit_tests/test_utils_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,4 +228,4 @@ def test_distribute_test_files_to_cluster_nodes():

def test_load_and_stream_waits_for_log_lines(self):
self.node.remoter = Remoter(self.node.system_log)
SstableLoadUtils.run_load_and_stream(self.node, timeout=1)
SstableLoadUtils.run_load_and_stream(self.node, start_timeout=1, end_timeout=2)

0 comments on commit c760db4

Please sign in to comment.