From ac939f80fa0c0620ef9eb084b762fb1bb3c34e0d Mon Sep 17 00:00:00 2001 From: JackyWoo Date: Sat, 15 Jun 2024 19:09:12 +0800 Subject: [PATCH] Fix stale log index when there is an snapshot but no log in disk --- src/Service/NuRaftLogSegment.cpp | 4 +--- src/Service/NuRaftLogSegment.h | 9 +++----- src/Service/NuRaftStateMachine.cpp | 2 +- tests/integration/test_snapshots/test.py | 27 ++++++++++++++++++++++++ 4 files changed, 32 insertions(+), 10 deletions(-) diff --git a/src/Service/NuRaftLogSegment.cpp b/src/Service/NuRaftLogSegment.cpp index ffb66062ed..a7bbb37716 100644 --- a/src/Service/NuRaftLogSegment.cpp +++ b/src/Service/NuRaftLogSegment.cpp @@ -706,13 +706,11 @@ int NuRaftLogSegment::truncate(const UInt64 last_index_kept) return ret; } -ptr LogSegmentStore::segment_store = nullptr; - ptr LogSegmentStore::getInstance(const String & log_dir_, bool force_new) { + static ptr segment_store; if (segment_store == nullptr || force_new) segment_store = cs_new(log_dir_); - return segment_store; } diff --git a/src/Service/NuRaftLogSegment.h b/src/Service/NuRaftLogSegment.h index 44048dd258..36ed7d3b54 100644 --- a/src/Service/NuRaftLogSegment.h +++ b/src/Service/NuRaftLogSegment.h @@ -239,7 +239,7 @@ class NuRaftLogSegment class LogSegmentStore { public: - using SegmentVector = std::vector>; + using Segments = std::vector>; static constexpr UInt32 MAX_SEGMENT_FILE_SIZE = 1000 * 1024 * 1024; //1G, 0.3K/Log, 3M logs static constexpr UInt32 MAX_SEGMENT_COUNT = 50; //50G @@ -296,7 +296,7 @@ class LogSegmentStore int reset(UInt64 next_log_index); /// get closed segments - SegmentVector & getClosedSegments() { return segments; } + Segments & getClosedSegments() { return segments; } /// get file format version LogVersion getVersion(UInt64 index); @@ -312,9 +312,6 @@ class LogSegmentStore /// find segment by log index int getSegment(UInt64 log_index, ptr & ptr); - /// global instance - static ptr segment_store; - /// file log store directory String log_dir; @@ -331,7 +328,7 @@ class LogSegmentStore Poco::Logger * log; /// closed segments - SegmentVector segments; + Segments segments; /// open segments ptr open_segment; diff --git a/src/Service/NuRaftStateMachine.cpp b/src/Service/NuRaftStateMachine.cpp index a368020a26..79eaf75b02 100644 --- a/src/Service/NuRaftStateMachine.cpp +++ b/src/Service/NuRaftStateMachine.cpp @@ -87,7 +87,7 @@ NuRaftStateMachine::NuRaftStateMachine( { LOG_INFO(log, "No previous last commit idx found, skip replaying logs."); } - else if (previous_last_commit_id < last_committed_idx) + else if (previous_last_commit_id <= last_committed_idx) { LOG_WARNING( log, diff --git a/tests/integration/test_snapshots/test.py b/tests/integration/test_snapshots/test.py index 37aa62a455..90cb6a11b7 100644 --- a/tests/integration/test_snapshots/test.py +++ b/tests/integration/test_snapshots/test.py @@ -1,6 +1,7 @@ import os import random import string +import time import pytest @@ -72,6 +73,7 @@ def test_state_after_restart(started_cluster, node): finally: close_zk_clients([node_zk, node_zk2]) + @pytest.mark.parametrize( 'node', [ @@ -115,3 +117,28 @@ def test_ephemeral_after_restart(started_cluster, node): assert list(sorted(existing_children)) == list(sorted(node_zk2.get_children("/test_ephemeral_after_restart"))) finally: close_zk_clients([node_zk, node_zk2]) + + +@pytest.mark.parametrize( + 'node', + [ + cluster.add_instance('node5', main_configs=['configs/enable_keeper.xml'], with_zookeeper=True, stay_alive=True), + cluster.add_instance('node6', main_configs=['configs/enable_async_snapshot_keeper.xml'], with_zookeeper=True, stay_alive=True) + ] +) +def test_restart_with_no_log(started_cluster, node): + node_zk = node_zk2 = None + try: + node_zk = node.get_fake_zk() + node_zk.create("/test_restart_with_no_log", b"somevalue") + + node.send_4lw_cmd(cmd="csnp") + time.sleep(1) # wait for snapshot to be taken + + node.restart_raftkeeper(kill=True) + node.wait_for_join_cluster() + + node_zk2 = node.get_fake_zk() + assert node_zk2.exists("/test_restart_with_no_log") + finally: + close_zk_clients([node_zk, node_zk2])