diff --git a/build.yaml b/build.yaml index 744ae947a..63667f283 100644 --- a/build.yaml +++ b/build.yaml @@ -3,7 +3,7 @@ version: 1 machamp: keydb-build: # Optional - build counter is linked to the build def - tag_template: 0.0.%build.counter% + tag_template: "0.0.%build.counter%-%sha%" # Optional - value in seconds before a build is terminated, default is 3600 seconds timeout: 3600 # Optional - update ghe or not, default to true @@ -46,3 +46,25 @@ machamp: # https://github.sc-corp.net/Snapchat/img/tree/master/keydb/ubuntu-20-04 builder_image: us.gcr.io/snapchat-build-artifacts/prod/snapchat/img/keydb/keydb-ubuntu-20-04@sha256:cf869a3f5d1de1e1d976bb906689c37b7031938eb68661b844a38c532f27248c command: ./runtest-rotation + keydb-docker-build: + # Optional - build counter is linked to the build def + tag_template: "%sha%" + # Optional - value in seconds before a build is terminated, default is 3600 seconds + timeout: 3600 + # Optional - update ghe or not, default to true + update_ghe: true + code_coverage: false + # Required + steps: + # to ensure a clearer docker build env + code-checkout: + type: cmd + command: echo checkout + # default machamp builder image does not work for multi arch + builder_image: us.gcr.io/snapchat-build-artifacts/prod/snapchat/img/ubuntu/ubuntu-23-04@sha256:bd43177a80e6ce1c3583e8ea959b88a9081c0f56b765ec9c5a157c27a637c23b + docker: + parent: code-checkout + type: docker # published images can be found in https://console.cloud.google.com/gcr/images/machamp-prod/global/keydb + dockerfile: machamp_scripts/Dockerfile + image_name: keydb # git commit sha will be deafult tag in the final image + workspace_context: ./ # This is the workspace context that your Dockerfile will use to move files around. // If the workspace context is just the root of the repository, you can just use "./". diff --git a/ci.yaml b/ci.yaml index ab346113f..595d23239 100644 --- a/ci.yaml +++ b/ci.yaml @@ -1,16 +1,35 @@ # Doc: https://wiki.sc-corp.net/display/TOOL/ci.yaml+User+Guide version: 1 on: + # https://wiki.sc-corp.net/display/TOOL/Onboard+Machamp+Build+By+ci.yaml+Configuration + # on pull_request is used for any pr build pull_request: - - workflows: - # All builds that use machamp should use the defined `backend_workflow` - - workflow_type: backend_workflow - # references a build defined in build.yaml - build_name: keydb-build - arch_types: ["amd64", "arm64"] + - branches: ['!!main', '*'] # this branch pattern means any branch but not main branch will trigger this pr build + workflows: + # All builds that use machamp should use the defined `backend_workflow` + - workflow_type: backend_workflow + # references a build defined in build.yaml + build_name: keydb-build + arch_types: ["amd64", "arm64"] + - workflow_type: backend_workflow + # references a build defined in build.yaml + build_name: keydb-docker-build + arch_types: ["amd64", "arm64"] + # on push is used for release branch, meaning: trigger this build when there is commit pushed to this branch push: - branches: [main] workflows: - workflow_type: backend_workflow build_name: keydb-build arch_types: ["amd64", "arm64"] + - workflow_type: backend_workflow + # references a build defined in build.yaml + build_name: keydb-docker-build + arch_types: ["amd64", "arm64"] + +# below defines which branch is release branch / release tag +machamp: + releases: + # Note: machamp will only respect the ci.yaml file from default branch for "release branch" definition (most repositories using master/main as default branch) + # https://wiki.sc-corp.net/display/TOOL/Onboard+Machamp+Build+By+ci.yaml+Configuration + - branch_name: ^main$ diff --git a/deps/cpp-statsd-client/.clang-format b/deps/cpp-statsd-client/.clang-format new file mode 100644 index 000000000..2d8fd6e65 --- /dev/null +++ b/deps/cpp-statsd-client/.clang-format @@ -0,0 +1,105 @@ +AccessModifierOffset: -4 +AllowAllParametersOfDeclarationOnNextLine: false +AllowShortFunctionsOnASingleLine: Empty +BinPackArguments: false +BinPackParameters: false +ColumnLimit: 120 +IndentCaseLabels: false +IndentWidth: 4 + +--- +Language: Cpp +# BasedOnStyle: Google +#AccessModifierOffset: -1 +AlignAfterOpenBracket: Align +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignEscapedNewlinesLeft: true +AlignOperands: true +AlignTrailingComments: true +#AllowAllParametersOfDeclarationOnNextLine: true +AllowShortBlocksOnASingleLine: false +AllowShortCaseLabelsOnASingleLine: false +#AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: true +AllowShortLoopsOnASingleLine: true +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: true +AlwaysBreakTemplateDeclarations: true +#BinPackArguments: true +#BinPackParameters: true +BraceWrapping: + AfterClass: false + AfterControlStatement: false + AfterEnum: false + AfterFunction: false + AfterNamespace: false + AfterObjCDeclaration: false + AfterStruct: false + AfterUnion: false + BeforeCatch: false + BeforeElse: false + IndentBraces: false +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeTernaryOperators: true +BreakConstructorInitializersBeforeComma: false +BreakAfterJavaFieldAnnotations: false +BreakStringLiterals: true +#ColumnLimit: 80 +CommentPragmas: '^ IWYU pragma:' +ConstructorInitializerAllOnOneLineOrOnePerLine: true +ConstructorInitializerIndentWidth: 4 +ContinuationIndentWidth: 4 +Cpp11BracedListStyle: true +DerivePointerAlignment: true +DisableFormat: false +ExperimentalAutoDetectBinPacking: false +ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ] +IncludeCategories: + - Regex: '^<.*\.h>' + Priority: 1 + - Regex: '^<.*' + Priority: 2 + - Regex: '.*' + Priority: 3 +IncludeIsMainRegex: '([-_](test|unittest))?$' +#IndentCaseLabels: true +#IndentWidth: 2 +IndentWrappedFunctionNames: false +JavaScriptQuotes: Leave +JavaScriptWrapImports: true +KeepEmptyLinesAtTheStartOfBlocks: false +MacroBlockBegin: '' +MacroBlockEnd: '' +MaxEmptyLinesToKeep: 1 +NamespaceIndentation: None +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: false +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakComment: 300 +PenaltyBreakFirstLessLess: 120 +PenaltyBreakString: 1000 +PenaltyExcessCharacter: 1000000 +PenaltyReturnTypeOnItsOwnLine: 200 +PointerAlignment: Left +ReflowComments: true +SortIncludes: true +SpaceAfterCStyleCast: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeParens: ControlStatements +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 2 +SpacesInAngles: false +SpacesInContainerLiterals: true +SpacesInCStyleCastParentheses: false +SpacesInParentheses: false +SpacesInSquareBrackets: false +Standard: Auto +TabWidth: 8 +UseTab: Never +... + diff --git a/deps/cpp-statsd-client/.github/workflows/coverage.yml b/deps/cpp-statsd-client/.github/workflows/coverage.yml new file mode 100644 index 000000000..4642670ee --- /dev/null +++ b/deps/cpp-statsd-client/.github/workflows/coverage.yml @@ -0,0 +1,30 @@ +name: Coverage + +on: [push, pull_request] +jobs: + coverage: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + - name: dependencies + shell: bash + run: | + sudo apt-get update + sudo apt-get install -y -qq make cmake gcc g++ lcov bc + - name: build + shell: bash + run: | + export LD_LIBRARY_PATH=.:$(cat /etc/ld.so.conf.d/* | grep -vF "#" | tr "\\n" ":" | sed -e "s/:$//g") + cmake . -DCMAKE_BUILD_TYPE=Debug -DENABLE_COVERAGE=On + make all -j$(nproc) + - name: coverage + shell: bash + run: | + make coverage + lines=$(lcov --summary coverage.info | grep -F lines | awk '{print $2}' | sed -e "s/%//g") + if (( $(echo "${lines} < ${COVERAGE_THRESHOLD}" | bc -l) )); then + echo "Line coverage dropped below ${COVERAGE_THRESHOLD}% to ${lines}%" + exit 1 + fi + env: + COVERAGE_THRESHOLD: 85.0 diff --git a/deps/cpp-statsd-client/.github/workflows/lint.yml b/deps/cpp-statsd-client/.github/workflows/lint.yml new file mode 100644 index 000000000..1ed921645 --- /dev/null +++ b/deps/cpp-statsd-client/.github/workflows/lint.yml @@ -0,0 +1,13 @@ +name: Lint + +on: [push, pull_request] +jobs: + lint: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + - name: lint + uses: DoozyX/clang-format-lint-action@v0.12 + with: + clangFormatVersion: 12 + source: './include/cpp-statsd-client ./tests' diff --git a/deps/cpp-statsd-client/.github/workflows/linux.yml b/deps/cpp-statsd-client/.github/workflows/linux.yml new file mode 100644 index 000000000..a431e1408 --- /dev/null +++ b/deps/cpp-statsd-client/.github/workflows/linux.yml @@ -0,0 +1,23 @@ +name: Linux + +on: [push, pull_request] +jobs: + linux: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + - name: dependencies + shell: bash + run: | + sudo apt-get update + sudo apt-get install -y -qq make cmake gcc g++ + - name: build + shell: bash + run: | + export LD_LIBRARY_PATH=.:$(cat /etc/ld.so.conf.d/* | grep -vF "#" | tr "\\n" ":" | sed -e "s/:$//g") + cmake . -DCMAKE_BUILD_TYPE=RelWithDebInfo -DENABLE_SANITIZERS=On + make all -j$(nproc) + - name: test + shell: bash + run: | + make test diff --git a/deps/cpp-statsd-client/.github/workflows/windows.yml b/deps/cpp-statsd-client/.github/workflows/windows.yml new file mode 100644 index 000000000..3b864009c --- /dev/null +++ b/deps/cpp-statsd-client/.github/workflows/windows.yml @@ -0,0 +1,18 @@ +name: Windows + +on: [push, pull_request] +jobs: + windows: + runs-on: windows-latest + steps: + - uses: actions/checkout@v2 + - name: dependencies + run: | + choco install cmake + - name: build + run: | + cmake -S . -B build -G "Visual Studio 16 2019" -A x64 + cmake --build build --target ALL_BUILD --config Release + - name: test + run: | + cmake --build build --target RUN_TESTS --config Release diff --git a/deps/cpp-statsd-client/.gitignore b/deps/cpp-statsd-client/.gitignore new file mode 100644 index 000000000..c5e82d745 --- /dev/null +++ b/deps/cpp-statsd-client/.gitignore @@ -0,0 +1 @@ +bin \ No newline at end of file diff --git a/deps/cpp-statsd-client/CMakeLists.txt b/deps/cpp-statsd-client/CMakeLists.txt new file mode 100644 index 000000000..9b8d64cfa --- /dev/null +++ b/deps/cpp-statsd-client/CMakeLists.txt @@ -0,0 +1,85 @@ +# Basic project setup +cmake_minimum_required(VERSION 3.5) +project(cpp-statsd-client + VERSION 1.0.2 + LANGUAGES CXX + DESCRIPTION "A header-only StatsD client implemented in C++" + HOMEPAGE_URL "https://github.com/vthiery/cpp-statsd-client") + +option(CPP_STATSD_STANDALONE "Allows configuration of targets for verifying library functionality" ON) +option(ENABLE_TESTS "Build tests" ON) +option(ENABLE_COVERAGE "Build with coverage instrumentalisation" OFF) + +if(NOT CPP_STATSD_STANDALONE) + set(ENABLE_TESTS OFF) + set(ENABLE_COVERAGE OFF) +endif() + +include(GNUInstallDirs) +include(CMakePackageConfigHelpers) +find_package(Threads) + +# Optional code coverage targets +if(ENABLE_COVERAGE) + set(COVERAGE_EXCLUDES /usr/*) + include(${PROJECT_SOURCE_DIR}/cmake/CodeCoverage.cmake) + APPEND_COVERAGE_COMPILER_FLAGS() + SETUP_TARGET_FOR_COVERAGE_LCOV(NAME coverage + EXECUTABLE testStatsdClient + DEPENDENCIES ${PROJECT_NAME} + ) +endif() + +# The library target +add_library(${PROJECT_NAME} INTERFACE) +target_include_directories( + ${PROJECT_NAME} + INTERFACE $ + $) +target_link_libraries(${PROJECT_NAME} INTERFACE Threads::Threads) +if(WIN32) + target_link_libraries(${PROJECT_NAME} INTERFACE ws2_32) +endif() + +# The installation and pkg-config-like cmake config +install(TARGETS ${PROJECT_NAME} + EXPORT ${PROJECT_NAME}_Targets + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) +write_basic_package_version_file("${PROJECT_NAME}ConfigVersion.cmake" + VERSION ${PROJECT_VERSION} + COMPATIBILITY SameMajorVersion) +configure_package_config_file( + "${PROJECT_SOURCE_DIR}/cmake/${PROJECT_NAME}Config.cmake.in" + "${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" + INSTALL_DESTINATION + ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake) +install(EXPORT ${PROJECT_NAME}_Targets + FILE ${PROJECT_NAME}Targets.cmake + NAMESPACE ${PROJECT_NAME}:: + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake) +install(FILES "${PROJECT_BINARY_DIR}/${PROJECT_NAME}Config.cmake" + "${PROJECT_BINARY_DIR}/${PROJECT_NAME}ConfigVersion.cmake" + DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/${PROJECT_NAME}/cmake) +install(DIRECTORY ${PROJECT_SOURCE_DIR}/include DESTINATION include) + +if(ENABLE_TESTS) + # The test targets + add_executable(testStatsdClient ${CMAKE_CURRENT_SOURCE_DIR}/tests/testStatsdClient.cpp) + if(WIN32) + target_compile_options(testStatsdClient PRIVATE -W4 -WX /external:W0) + else() + target_compile_options(testStatsdClient PRIVATE -Wall -Wextra -pedantic -Werror) + endif() + target_include_directories(testStatsdClient PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/tests) + target_link_libraries(testStatsdClient ${PROJECT_NAME}) + + set_property(TARGET testStatsdClient PROPERTY CXX_STANDARD 11) + set_property(TARGET testStatsdClient PROPERTY CXX_EXTENSIONS OFF) + + # The test suite + enable_testing() + add_test(ctestTestStatsdClient testStatsdClient) + add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} DEPENDS testStatsdClient) +endif() diff --git a/deps/cpp-statsd-client/LICENSE.md b/deps/cpp-statsd-client/LICENSE.md new file mode 100644 index 000000000..632d564b7 --- /dev/null +++ b/deps/cpp-statsd-client/LICENSE.md @@ -0,0 +1,9 @@ +MIT License + +Copyright (c) 2017 Vincent Thiery + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its contributors
# may be used to endorse or promote products derived from this software without
# specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. Processing code coverage counters and generating report." The separator character '.' + * is automatically inserted between the prefix and the stats + * key, therefore you should neither append one to the prefix + * nor prepend one to the key + * + * The sampling frequency is specified per call and uses a + * random number generator to determine whether or not the stat + * will be recorded this time or not. + * + * The top level configuration includes 2 optional parameters + * that determine how the stats are delivered to statsd. These + * parameters are the batching size and the send interval. + * + * The batching size controls the number of bytes to send + * in each UDP datagram to statsd. This is not a hard limit as + * we continue appending to a batch of stats until the limit + * has been reached or surpassed. When this occurs we add the + * batch to a queue and create a new batch to appended to. A + * value of 0 for the batching size will disable batching such + * that each stat will be sent to the daemon individually. + * + * The send interval controls the rate at which queued batches + * of stats will be sent to statsd. If batching is disabled, + * this value is ignored and each individual stat is sent to + * statsd immediately in a blocking fashion. If batching is + * enabled (ie. non-zero) then the send interval is the number + * of milliseconds to wait before flushing the queue of + * batched stats messages to the daemon. This is done in a non- + * blocking fashion via a background thread. If the send + * interval is 0 then the stats messages are appended to a + * queue until the caller manually flushes the queue via the + * flush method. + * + */ +class StatsdClient { +public: + //!@name Constructor and destructor, non-copyable + //!@{ + + //! Constructor + StatsdClient(const std::string& host, + const uint16_t port, + const std::string& prefix, + const uint64_t batchsize = 0, + const uint64_t sendInterval = 1000, + const unsigned int gaugePrecision = 4) noexcept; + + StatsdClient(const StatsdClient&) = delete; + StatsdClient& operator=(const StatsdClient&) = delete; + + //!@} + + //!@name Methods + //!@{ + + //! Sets a configuration { host, port, prefix, batchsize } + void setConfig(const std::string& host, + const uint16_t port, + const std::string& prefix, + const uint64_t batchsize = 0, + const uint64_t sendInterval = 1000, + const unsigned int gaugePrecision = 4) noexcept; + + //! Returns the error message as an std::string + const std::string& errorMessage() const noexcept; + + //! Increments the key, at a given frequency rate + void increment(const std::string& key, + float frequency = 1.0f, + const std::vector& tags = {}) const noexcept; + + //! Increments the key, at a given frequency rate + void decrement(const std::string& key, + float frequency = 1.0f, + const std::vector& tags = {}) const noexcept; + + //! Adjusts the specified key by a given delta, at a given frequency rate + void count(const std::string& key, + const int delta, + float frequency = 1.0f, + const std::vector& tags = {}) const noexcept; + + //! Records a gauge for the key, with a given value, at a given frequency rate + template + void gauge(const std::string& key, + const T value, + float frequency = 1.0f, + const std::vector& tags = {}) const noexcept; + + //! Records a timing for a key, at a given frequency + void timing(const std::string& key, + const unsigned int ms, + float frequency = 1.0f, + const std::vector& tags = {}) const noexcept; + + //! Records a count of unique occurrences for a key, at a given frequency + void set(const std::string& key, + const unsigned int sum, + float frequency = 1.0f, + const std::vector& tags = {}) const noexcept; + + //! Seed the RNG that controls sampling + void seed(unsigned int seed = std::random_device()()) noexcept; + + //! Flush any queued stats to the daemon + void flush() noexcept; + + //!@} + +private: + // @name Private methods + // @{ + + //! Send a value for a key, according to its type, at a given frequency + template + void send(const std::string& key, + const T value, + const char* type, + float frequency, + const std::vector& tags) const noexcept; + + //!@} + +private: + //! The prefix to be used for metrics + std::string m_prefix; + + //! The UDP sender to be used for actual sending + std::unique_ptr m_sender; + + //! The random number generator for handling sampling + mutable std::mt19937 m_randomEngine; + + //! The buffer string format our stats before sending them + mutable std::string m_buffer; + + //! Fixed floating point precision of gauges + unsigned int m_gaugePrecision; +}; + +namespace detail { +inline std::string sanitizePrefix(std::string prefix) { + // For convenience we provide the dot when generating the stat message + if (!prefix.empty() && prefix.back() == '.') { + prefix.pop_back(); + } + return prefix; +} + +// All supported metric types +constexpr char METRIC_TYPE_COUNT[] = "c"; +constexpr char METRIC_TYPE_GAUGE[] = "g"; +constexpr char METRIC_TYPE_TIMING[] = "ms"; +constexpr char METRIC_TYPE_SET[] = "s"; +} // namespace detail + +inline StatsdClient::StatsdClient(const std::string& host, + const uint16_t port, + const std::string& prefix, + const uint64_t batchsize, + const uint64_t sendInterval, + const unsigned int gaugePrecision) noexcept + : m_prefix(detail::sanitizePrefix(prefix)), + m_sender(new UDPSender{host, port, batchsize, sendInterval}), + m_gaugePrecision(gaugePrecision) { + // Initialize the random generator to be used for sampling + seed(); + // Avoid re-allocations by reserving a generous buffer + m_buffer.reserve(256); +} + +inline void StatsdClient::setConfig(const std::string& host, + const uint16_t port, + const std::string& prefix, + const uint64_t batchsize, + const uint64_t sendInterval, + const unsigned int gaugePrecision) noexcept { + m_prefix = detail::sanitizePrefix(prefix); + m_sender.reset(new UDPSender(host, port, batchsize, sendInterval)); + m_gaugePrecision = gaugePrecision; +} + +inline const std::string& StatsdClient::errorMessage() const noexcept { + return m_sender->errorMessage(); +} + +inline void StatsdClient::decrement(const std::string& key, + float frequency, + const std::vector& tags) const noexcept { + count(key, -1, frequency, tags); +} + +inline void StatsdClient::increment(const std::string& key, + float frequency, + const std::vector& tags) const noexcept { + count(key, 1, frequency, tags); +} + +inline void StatsdClient::count(const std::string& key, + const int delta, + float frequency, + const std::vector& tags) const noexcept { + send(key, delta, detail::METRIC_TYPE_COUNT, frequency, tags); +} + +template +inline void StatsdClient::gauge(const std::string& key, + const T value, + const float frequency, + const std::vector& tags) const noexcept { + send(key, value, detail::METRIC_TYPE_GAUGE, frequency, tags); +} + +inline void StatsdClient::timing(const std::string& key, + const unsigned int ms, + float frequency, + const std::vector& tags) const noexcept { + send(key, ms, detail::METRIC_TYPE_TIMING, frequency, tags); +} + +inline void StatsdClient::set(const std::string& key, + const unsigned int sum, + float frequency, + const std::vector& tags) const noexcept { + send(key, sum, detail::METRIC_TYPE_SET, frequency, tags); +} + +template +inline void StatsdClient::send(const std::string& key, + const T value, + const char* type, + float frequency, + const std::vector& tags) const noexcept { + // Bail if we can't send anything anyway + if (!m_sender->initialized()) { + return; + } + + // A valid frequency is: 0 <= f <= 1 + // At 0 you never emit the stat, at 1 you always emit the stat and with anything else you roll the dice + frequency = std::max(std::min(frequency, 1.f), 0.f); + constexpr float epsilon{0.0001f}; + const bool isFrequencyOne = std::fabs(frequency - 1.0f) < epsilon; + const bool isFrequencyZero = std::fabs(frequency) < epsilon; + if (isFrequencyZero || + (!isFrequencyOne && (frequency < std::uniform_real_distribution(0.f, 1.f)(m_randomEngine)))) { + return; + } + + // Format the stat message + std::stringstream valueStream; + valueStream << std::fixed << std::setprecision(m_gaugePrecision) << value; + + m_buffer.clear(); + + m_buffer.append(m_prefix); + if (!m_prefix.empty() && !key.empty()) { + m_buffer.push_back('.'); + } + + m_buffer.append(key); + m_buffer.push_back(':'); + m_buffer.append(valueStream.str()); + m_buffer.push_back('|'); + m_buffer.append(type); + + if (frequency < 1.f) { + m_buffer.append("|@0."); + m_buffer.append(std::to_string(static_cast(frequency * 100))); + } + + if (!tags.empty()) { + m_buffer.append("|#"); + for (const auto& tag : tags) { + m_buffer.append(tag); + m_buffer.push_back(','); + } + m_buffer.pop_back(); + } + + // Send the message via the UDP sender + m_sender->send(m_buffer); +} + +inline void StatsdClient::seed(unsigned int seed) noexcept { + m_randomEngine.seed(seed); +} + +inline void StatsdClient::flush() noexcept { + m_sender->flush(); +} + +} // namespace Statsd + +#endif diff --git a/deps/cpp-statsd-client/include/cpp-statsd-client/UDPSender.hpp b/deps/cpp-statsd-client/include/cpp-statsd-client/UDPSender.hpp new file mode 100644 index 000000000..c7d667a5f --- /dev/null +++ b/deps/cpp-statsd-client/include/cpp-statsd-client/UDPSender.hpp @@ -0,0 +1,345 @@ +#ifndef UDP_SENDER_HPP +#define UDP_SENDER_HPP + +#ifdef _WIN32 +#define NOMINMAX +#include +#include +#include +#else +#include +#include +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Statsd { + +#ifdef _WIN32 +using SOCKET_TYPE = SOCKET; +constexpr SOCKET_TYPE k_invalidSocket{INVALID_SOCKET}; +#define SOCKET_ERRNO WSAGetLastError() +#define SOCKET_CLOSE closesocket +#else +using SOCKET_TYPE = int; +constexpr SOCKET_TYPE k_invalidSocket{-1}; +#define SOCKET_ERRNO errno +#define SOCKET_CLOSE close +#endif + +/*! + * + * UDP sender + * + * A simple UDP sender handling batching. + * + */ +class UDPSender final { +public: + //!@name Constructor and destructor, non-copyable + //!@{ + + //! Constructor + UDPSender(const std::string& host, + const uint16_t port, + const uint64_t batchsize, + const uint64_t sendInterval) noexcept; + + //! Destructor + ~UDPSender(); + + UDPSender(const UDPSender&) = delete; + UDPSender& operator=(const UDPSender&) = delete; + UDPSender(UDPSender&&) = delete; + + //!@} + + //!@name Methods + //!@{ + + //! Send or enqueue a message + void send(const std::string& message) noexcept; + + //! Returns the error message as a string + const std::string& errorMessage() const noexcept; + + //! Returns true if the sender is initialized + bool initialized() const noexcept; + + //! Flushes any queued messages + void flush() noexcept; + + //!@} + +private: + // @name Private methods + // @{ + + //! Initialize the sender and returns true when it is initialized + bool initialize() noexcept; + + //! Queue a message to be sent to the daemon later + inline void queueMessage(const std::string& message) noexcept; + + //! Send a message to the daemon + void sendToDaemon(const std::string& message) noexcept; + + //!@} + +private: + // @name State variables + // @{ + + //! Shall we exit? + std::atomic m_mustExit{false}; + + //!@} + + // @name Network info + // @{ + + //! The hostname + std::string m_host; + + //! The port + uint16_t m_port; + + //! The structure holding the server + struct sockaddr_in m_server; + + //! The socket to be used + SOCKET_TYPE m_socket = k_invalidSocket; + + //!@} + + // @name Batching info + // @{ + + //! The batching size + uint64_t m_batchsize; + + //! The sending frequency in milliseconds + uint64_t m_sendInterval; + + //! The queue batching the messages + std::deque m_batchingMessageQueue; + + //! The mutex used for batching + std::mutex m_batchingMutex; + + //! The thread dedicated to the batching + std::thread m_batchingThread; + + //!@} + + //! Error message (optional string) + std::string m_errorMessage; +}; + +namespace detail { + +inline bool isValidSocket(const SOCKET_TYPE socket) { + return socket != k_invalidSocket; +} + +#ifdef _WIN32 +struct WinSockSingleton { + inline static const WinSockSingleton& getInstance() { + static const WinSockSingleton instance; + return instance; + } + inline bool ok() const { + return m_ok; + } + ~WinSockSingleton() { + WSACleanup(); + } + +private: + WinSockSingleton() { + WSADATA wsa; + m_ok = WSAStartup(MAKEWORD(2, 2), &wsa) == 0; + } + bool m_ok; +}; +#endif + +} // namespace detail + +inline UDPSender::UDPSender(const std::string& host, + const uint16_t port, + const uint64_t batchsize, + const uint64_t sendInterval) noexcept + : m_host(host), m_port(port), m_batchsize(batchsize), m_sendInterval(sendInterval) { + // Initialize the socket + if (!initialize()) { + return; + } + + // If batching is on, use a dedicated thread to send after the wait time is reached + if (m_batchsize != 0 && m_sendInterval > 0) { + // Define the batching thread + m_batchingThread = std::thread([this] { + // TODO: this will drop unsent stats, should we send all the unsent stats before we exit? + while (!m_mustExit.load(std::memory_order_acquire)) { + std::deque stagedMessageQueue; + + std::unique_lock batchingLock(m_batchingMutex); + m_batchingMessageQueue.swap(stagedMessageQueue); + batchingLock.unlock(); + + // Flush the queue + while (!stagedMessageQueue.empty()) { + sendToDaemon(stagedMessageQueue.front()); + stagedMessageQueue.pop_front(); + } + + // Wait before sending the next batch + std::this_thread::sleep_for(std::chrono::milliseconds(m_sendInterval)); + } + }); + } +} + +inline UDPSender::~UDPSender() { + if (!initialized()) { + return; + } + + // If we're running a background thread tell it to stop + if (m_batchingThread.joinable()) { + m_mustExit.store(true, std::memory_order_release); + m_batchingThread.join(); + } + + // Cleanup the socket + SOCKET_CLOSE(m_socket); +} + +inline void UDPSender::send(const std::string& message) noexcept { + m_errorMessage.clear(); + + // If batching is on, accumulate messages in the queue + if (m_batchsize > 0) { + queueMessage(message); + return; + } + + // Or send it right now + sendToDaemon(message); +} + +inline void UDPSender::queueMessage(const std::string& message) noexcept { + // We aquire a lock but only if we actually need to (i.e. there is a thread also accessing the queue) + auto batchingLock = + m_batchingThread.joinable() ? std::unique_lock(m_batchingMutex) : std::unique_lock(); + // Either we don't have a place to batch our message or we exceeded the batch size, so make a new batch + if (m_batchingMessageQueue.empty() || m_batchingMessageQueue.back().length() > m_batchsize) { + m_batchingMessageQueue.emplace_back(); + m_batchingMessageQueue.back().reserve(m_batchsize + 256); + } // When there is already a batch open we need a separator when its not empty + else if (!m_batchingMessageQueue.back().empty()) { + m_batchingMessageQueue.back().push_back('\n'); + } + // Add the new message to the batch + m_batchingMessageQueue.back().append(message); +} + +inline const std::string& UDPSender::errorMessage() const noexcept { + return m_errorMessage; +} + +inline bool UDPSender::initialize() noexcept { +#ifdef _WIN32 + if (!detail::WinSockSingleton::getInstance().ok()) { + m_errorMessage = "WSAStartup failed: errno=" + std::to_string(SOCKET_ERRNO); + } +#endif + + // Connect the socket + m_socket = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (!detail::isValidSocket(m_socket)) { + m_errorMessage = "socket creation failed: errno=" + std::to_string(SOCKET_ERRNO); + return false; + } + + std::memset(&m_server, 0, sizeof(m_server)); + m_server.sin_family = AF_INET; + m_server.sin_port = htons(m_port); + + if (inet_pton(AF_INET, m_host.c_str(), &m_server.sin_addr) == 0) { + // An error code has been returned by inet_aton + + // Specify the criteria for selecting the socket address structure + struct addrinfo hints; + std::memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_INET; + hints.ai_socktype = SOCK_DGRAM; + + // Get the address info using the hints + struct addrinfo* results = nullptr; + const int ret{getaddrinfo(m_host.c_str(), nullptr, &hints, &results)}; + if (ret != 0) { + // An error code has been returned by getaddrinfo + SOCKET_CLOSE(m_socket); + m_socket = k_invalidSocket; + m_errorMessage = "getaddrinfo failed: err=" + std::to_string(ret) + ", msg=" + gai_strerror(ret); + return false; + } + + // Copy the results in m_server + struct sockaddr_in* host_addr = (struct sockaddr_in*)results->ai_addr; + std::memcpy(&m_server.sin_addr, &host_addr->sin_addr, sizeof(struct in_addr)); + + // Free the memory allocated + freeaddrinfo(results); + } + + return true; +} + +inline void UDPSender::sendToDaemon(const std::string& message) noexcept { + // Try sending the message + const auto ret = sendto(m_socket, + message.data(), +#ifdef _WIN32 + static_cast(message.size()), +#else + message.size(), +#endif + 0, + (struct sockaddr*)&m_server, + sizeof(m_server)); + if (ret == -1) { + m_errorMessage = "sendto server failed: host=" + m_host + ":" + std::to_string(m_port) + + ", err=" + std::to_string(SOCKET_ERRNO); + } +} + +inline bool UDPSender::initialized() const noexcept { + return m_socket != k_invalidSocket; +} + +inline void UDPSender::flush() noexcept { + // We aquire a lock but only if we actually need to (ie there is a thread also accessing the queue) + auto batchingLock = + m_batchingThread.joinable() ? std::unique_lock(m_batchingMutex) : std::unique_lock(); + // Flush the queue + while (!m_batchingMessageQueue.empty()) { + sendToDaemon(m_batchingMessageQueue.front()); + m_batchingMessageQueue.pop_front(); + } +} + +} // namespace Statsd + +#endif diff --git a/deps/cpp-statsd-client/tests/StatsdServer.hpp b/deps/cpp-statsd-client/tests/StatsdServer.hpp new file mode 100644 index 000000000..e87fe5933 --- /dev/null +++ b/deps/cpp-statsd-client/tests/StatsdServer.hpp @@ -0,0 +1,80 @@ +#ifndef STATSD_SERVER_HPP +#define STATSD_SERVER_HPP + +// It might make sense to include this test class in the UDPSender header +// it includes most of the cross platform defines etc that we need for socket io +#include "cpp-statsd-client/UDPSender.hpp" + +#include +#include + +namespace Statsd { + +class StatsdServer { +public: + StatsdServer(unsigned short port = 8125) noexcept { +#ifdef _WIN32 + if (!detail::WinSockSingleton::getInstance().ok()) { + m_errorMessage = "WSAStartup failed: errno=" + std::to_string(SOCKET_ERRNO); + } +#endif + + // Create the socket + m_socket = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (!detail::isValidSocket(m_socket)) { + m_errorMessage = "socket creation failed: errno=" + std::to_string(SOCKET_ERRNO); + return; + } + + // Binding should be with ipv4 to all interfaces + struct sockaddr_in address {}; + address.sin_family = AF_INET; + address.sin_port = htons(port); + address.sin_addr.s_addr = INADDR_ANY; + + // Try to bind + if (bind(m_socket, reinterpret_cast(&address), sizeof(address)) != 0) { + SOCKET_CLOSE(m_socket); + m_socket = k_invalidSocket; + m_errorMessage = "bind failed: errno=" + std::to_string(SOCKET_ERRNO); + } + } + + ~StatsdServer() { + if (detail::isValidSocket(m_socket)) { + SOCKET_CLOSE(m_socket); + } + } + + const std::string& errorMessage() const noexcept { + return m_errorMessage; + } + + std::string receive() noexcept { + // If uninitialized then bail + if (!detail::isValidSocket(m_socket)) { + return ""; + } + + // Try to receive (this is blocking) + std::string buffer(256, '\0'); + int string_len; + if ((string_len = recv(m_socket, &buffer[0], static_cast(buffer.size()), 0)) < 1) { + m_errorMessage = "Could not recv on the socket file descriptor"; + return ""; + } + + // No error return the trimmed result + m_errorMessage.clear(); + buffer.resize(std::min(static_cast(string_len), buffer.size())); + return buffer; + } + +private: + SOCKET_TYPE m_socket; + std::string m_errorMessage; +}; + +} // namespace Statsd + +#endif diff --git a/deps/cpp-statsd-client/tests/testStatsdClient.cpp b/deps/cpp-statsd-client/tests/testStatsdClient.cpp new file mode 100644 index 000000000..74db8468e --- /dev/null +++ b/deps/cpp-statsd-client/tests/testStatsdClient.cpp @@ -0,0 +1,184 @@ +#include + +#include "StatsdServer.hpp" +#include "cpp-statsd-client/StatsdClient.hpp" + +using namespace Statsd; + +// Each test suite below spawns a thread to recv the client messages over UDP as if it were a real statsd server +// Note that we could just synchronously recv metrics and not use a thread but doing the test async has the +// advantage that we can test the threaded batching mode in a straightforward way. The server thread basically +// just keeps storing metrics in an vector until it hears a special one signaling the test is over and bails +void mock(StatsdServer& server, std::vector& messages) { + do { + // Grab the messages that are waiting + auto recvd = server.receive(); + + // Split the messages on '\n' + auto start = std::string::npos; + do { + // Keep this message + auto end = recvd.find('\n', ++start); + messages.emplace_back(recvd.substr(start, end)); + start = end; + + // Bail if we found the special quit message + if (messages.back().find("DONE") != std::string::npos) { + messages.pop_back(); + return; + } + } while (start != std::string::npos); + } while (server.errorMessage().empty() && !messages.back().empty()); +} + +template +void throwOnError(const SocketWrapper& wrapped, bool expectEmpty = true, const std::string& extraMessage = "") { + if (wrapped.errorMessage().empty() != expectEmpty) { + std::cerr << (expectEmpty ? wrapped.errorMessage() : extraMessage) << std::endl; + throw std::runtime_error(expectEmpty ? wrapped.errorMessage() : extraMessage); + } +} + +void throwOnWrongMessage(StatsdServer& server, const std::string& expected) { + auto actual = server.receive(); + if (actual != expected) { + std::cerr << "Expected: " << expected << " but got: " << actual << std::endl; + throw std::runtime_error("Incorrect stat received"); + } +} + +void testErrorConditions() { + // Resolve a rubbish ip and make sure initialization failed + StatsdClient client{"", 8125, "myPrefix", 20}; + throwOnError(client, false, "Should not be able to resolve a ridiculous ip"); +} + +void testReconfigure() { + StatsdServer server; + throwOnError(server); + + StatsdClient client("localhost", 8125, "first."); + client.increment("foo"); + throwOnWrongMessage(server, "first.foo:1|c"); + + client.setConfig("localhost", 8125, "second"); + client.increment("bar"); + throwOnWrongMessage(server, "second.bar:1|c"); + + client.setConfig("localhost", 8125, ""); + client.increment("third.baz"); + throwOnWrongMessage(server, "third.baz:1|c"); + + client.increment(""); + throwOnWrongMessage(server, ":1|c"); + + // TODO: test what happens with the batching after resolving the question about incomplete + // batches being dropped vs sent on reconfiguring +} + +void testSendRecv(uint64_t batchSize, uint64_t sendInterval) { + StatsdServer mock_server; + std::vector messages, expected; + std::thread server(mock, std::ref(mock_server), std::ref(messages)); + + // Set a new config that has the client send messages to a proper address that can be resolved + StatsdClient client("localhost", 8125, "sendRecv.", batchSize, sendInterval, 3); + throwOnError(client); + + // TODO: I forget if we need to wait for the server to be ready here before sending the first stats + // is there a race condition where the client sending before the server binds would drop that clients message + + for (int i = 0; i < 3; ++i) { + // Increment "coco" + client.increment("coco"); + throwOnError(client); + expected.emplace_back("sendRecv.coco:1|c"); + + // Decrement "kiki" + client.decrement("kiki"); + throwOnError(client); + expected.emplace_back("sendRecv.kiki:-1|c"); + + // Adjusts "toto" by +2 + client.seed(19); // this seed gets a hit on the first call + client.count("toto", 2, 0.1f); + throwOnError(client); + expected.emplace_back("sendRecv.toto:2|c|@0.10"); + + // Gets "sampled out" by the random number generator + client.count("popo", 9, 0.1f); + throwOnError(client); + + // Record a gauge "titi" to 3 + client.gauge("titi", 3); + throwOnError(client); + expected.emplace_back("sendRecv.titi:3|g"); + + // Record a gauge "titifloat" to -123.456789 with precision 3 + client.gauge("titifloat", -123.456789); + throwOnError(client); + expected.emplace_back("sendRecv.titifloat:-123.457|g"); + + // Record a timing of 2ms for "myTiming" + client.seed(19); + client.timing("myTiming", 2, 0.1f); + throwOnError(client); + expected.emplace_back("sendRecv.myTiming:2|ms|@0.10"); + + // Send a set with 1227 total uniques + client.set("tutu", 1227, 2.0f); + throwOnError(client); + expected.emplace_back("sendRecv.tutu:1227|s"); + + // Gauge but with tags + client.gauge("dr.röstigrabe", 333, 1.f, {"liegt", "im", "weste"}); + throwOnError(client); + expected.emplace_back("sendRecv.dr.röstigrabe:333|g|#liegt,im,weste"); + + // All the things + client.count("foo", -42, .9f, {"bar", "baz"}); + throwOnError(client); + expected.emplace_back("sendRecv.foo:-42|c|@0.90|#bar,baz"); + } + + // Signal the mock server we are done + client.timing("DONE", 0); + + // If manual flushing do it now + if (sendInterval == 0) { + client.flush(); + } + + // Wait for the server to stop + server.join(); + + // Make sure we get the exactly correct output + if (messages != expected) { + std::cerr << "Unexpected stats received by server, got:" << std::endl; + for (const auto& message : messages) { + std::cerr << message << std::endl; + } + std::cerr << std::endl << "But we expected:" << std::endl; + for (const auto& message : expected) { + std::cerr << message << std::endl; + } + throw std::runtime_error("Unexpected stats"); + } +} + +int main() { + // If any of these tests fail they throw an exception, not catching makes for a nonzero return code + + // general things that should be errors + testErrorConditions(); + // reconfiguring how you are sending + testReconfigure(); + // no batching + testSendRecv(0, 0); + // background batching + testSendRecv(32, 1000); + // manual flushing of batches + testSendRecv(16, 0); + + return EXIT_SUCCESS; +} diff --git a/machamp_scripts/Dockerfile b/machamp_scripts/Dockerfile new file mode 100644 index 000000000..b7f3bf6e4 --- /dev/null +++ b/machamp_scripts/Dockerfile @@ -0,0 +1,114 @@ +FROM ubuntu:20.04 +SHELL ["/bin/bash","-c"] +RUN groupadd -r keydb && useradd -r -g keydb keydb +# use gosu for easy step-down from root: https://github.com/tianon/gosu/releases +ENV GOSU_VERSION 1.14 +RUN set -eux; \ + savedAptMark="$(apt-mark showmanual)"; \ + apt-get update; \ + apt-get -o Dpkg::Options::="--force-confnew" install -y --no-install-recommends ca-certificates dirmngr gnupg wget; \ + rm -rf /var/lib/apt/lists/*; \ + dpkgArch="$(dpkg --print-architecture | awk -F- '{ print $NF }')"; \ + wget -O /usr/local/bin/gosu "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$dpkgArch"; \ + wget -O /usr/local/bin/gosu.asc "https://github.com/tianon/gosu/releases/download/$GOSU_VERSION/gosu-$dpkgArch.asc"; \ + export GNUPGHOME="$(mktemp -d)"; \ + gpg --batch --keyserver hkps://keys.openpgp.org --recv-keys B42F6819007F00F88E364FD4036A9C25BF357DD4; \ + gpg --batch --verify /usr/local/bin/gosu.asc /usr/local/bin/gosu; \ + gpgconf --kill all; \ + rm -rf "$GNUPGHOME" /usr/local/bin/gosu.asc; \ + apt-mark auto '.*' > /dev/null; \ + [ -z "$savedAptMark" ] || apt-mark manual $savedAptMark > /dev/null; \ + apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false; \ + chmod +x /usr/local/bin/gosu; \ + gosu --version; \ + gosu nobody true +# build KeyDB +ARG MAKE_JOBS="" +ARG ENABLE_FLASH="" +COPY . /tmp/keydb-internal +RUN set -eux; \ + cd /tmp/keydb-internal; \ + savedAptMark="$(apt-mark showmanual)"; \ + apt-get update; \ + DEBIAN_FRONTEND=noninteractive apt-get -o Dpkg::Options::="--force-confnew" install -qqy --no-install-recommends \ + dpkg-dev \ + pkg-config \ + ca-certificates \ + build-essential \ + nasm \ + autotools-dev \ + autoconf \ + libjemalloc-dev \ + tcl \ + tcl-dev \ + uuid-dev \ + libcurl4-openssl-dev \ + libbz2-dev \ + libzstd-dev \ + liblz4-dev \ + libsnappy-dev \ + libssl-dev \ + git; \ + # disable protected mode as it relates to docker + grep -E '^ *createBoolConfig[(]"protected-mode",.*, *1 *,.*[)],$' ./src/config.cpp; \ + sed -ri 's!^( *createBoolConfig[(]"protected-mode",.*, *)1( *,.*[)],)$!\10\2!' ./src/config.cpp; \ + grep -E '^ *createBoolConfig[(]"protected-mode",.*, *0 *,.*[)],$' ./src/config.cpp; \ + make distclean; \ + make -j$([ -z "$MAKE_JOBS" ] && nproc || echo "$MAKE_JOBS") BUILD_TLS=yes NO_LICENSE_CHECK=yes $([ -z "$ENABLE_FLASH" ] && echo "" || echo "ENABLE_FLASH=$ENABLE_FLASH"); \ + cd src; \ + mv modules/keydb_modstatsd/modstatsd.so /usr/local/lib/; \ + strip keydb-cli keydb-benchmark keydb-check-rdb keydb-check-aof keydb-diagnostic-tool keydb-sentinel; \ + mv keydb-server keydb-cli keydb-benchmark keydb-check-rdb keydb-check-aof keydb-diagnostic-tool keydb-sentinel /usr/local/bin/; \ + # clean up unused dependencies + echo $savedAptMark; \ + apt-mark auto '.*' > /dev/null; \ + [ -z "$savedAptMark" ] || apt-mark manual $savedAptMark > /dev/null; \ + find /usr/local -type f -executable -exec ldd '{}' ';' \ + | awk '/=>/ { print $(NF-1) }' \ + | sed 's:.*/::' \ + | sort -u \ + | xargs -r dpkg-query --search \ + | cut -d: -f1 \ + | sort -u \ + | xargs -r apt-mark manual \ + ; \ + apt-get purge -y --auto-remove -o APT::AutoRemove::RecommendsImportant=false; \ + rm -rf /var/lib/apt/lists/*; \ +# create working directories and organize files +RUN \ + mkdir /data && chown keydb:keydb /data; \ + mkdir /flash && chown keydb:keydb /flash; \ + mkdir -p /etc/keydb; \ + cp /tmp/keydb-internal/keydb.conf /etc/keydb/; \ + sed -i 's/^\(daemonize .*\)$/# \1/' /etc/keydb/keydb.conf; \ + sed -i 's/^\(dir .*\)$/# \1\ndir \/data/' /etc/keydb/keydb.conf; \ + sed -i 's/^\(logfile .*\)$/# \1/' /etc/keydb/keydb.conf; \ + sed -i 's/protected-mode yes/protected-mode no/g' /etc/keydb/keydb.conf; \ + sed -i 's/^\(bind .*\)$/# \1/' /etc/keydb/keydb.conf; \ + echo -e "\nloadmodule /usr/local/lib/modstatsd.so" >> /etc/keydb/keydb.conf; \ + ln -s keydb-cli redis-cli; \ + cd /etc/keydb; \ + ln -s keydb.conf redis.conf; \ + rm -rf /tmp/* +# generate entrypoint script +RUN set -eux; \ + echo '#!/bin/sh' > /usr/local/bin/docker-entrypoint.sh; \ + echo 'set -e' >> /usr/local/bin/docker-entrypoint.sh; \ + echo "# perpend 'keydb-server' if not provided as first argument" >> /usr/local/bin/docker-entrypoint.sh; \ + echo 'if [ "${1}" != "keydb-server" ]; then' >> /usr/local/bin/docker-entrypoint.sh; \ + echo ' set -- keydb-server "$@"' >> /usr/local/bin/docker-entrypoint.sh; \ + echo 'fi' >> /usr/local/bin/docker-entrypoint.sh; \ + echo "# allow the container to be started with `--user`" >> /usr/local/bin/docker-entrypoint.sh; \ + echo 'if [ "$1" = "keydb-server" -a "$(id -u)" = "0" ]; then' >> /usr/local/bin/docker-entrypoint.sh; \ + echo " find . \! -user keydb -exec chown keydb '{}' +" >> /usr/local/bin/docker-entrypoint.sh; \ + echo ' exec gosu keydb "$0" "$@"' >> /usr/local/bin/docker-entrypoint.sh; \ + echo 'fi' >> /usr/local/bin/docker-entrypoint.sh; \ + echo 'exec "$@"' >> /usr/local/bin/docker-entrypoint.sh; \ + chmod +x /usr/local/bin/docker-entrypoint.sh +# set remaining image properties +VOLUME /data +WORKDIR /data +ENV KEYDB_PRO_DIRECTORY=/usr/local/bin/ +ENTRYPOINT ["docker-entrypoint.sh"] +EXPOSE 6379 +CMD ["keydb-server","/etc/keydb/keydb.conf"] diff --git a/machamp_scripts/build.sh b/machamp_scripts/build.sh index e05a9b3bd..a395f26ee 100755 --- a/machamp_scripts/build.sh +++ b/machamp_scripts/build.sh @@ -2,7 +2,7 @@ # make the build git submodule init && git submodule update -make BUILD_TLS=yes -j$(nproc) KEYDB_CFLAGS='-Werror' KEYDB_CXXFLAGS='-Werror' +make BUILD_TLS=yes ENABLE_FLASH=yes -j$(nproc) KEYDB_CFLAGS='-Werror' KEYDB_CXXFLAGS='-Werror' # gen-cert ./utils/gen-test-certs.sh \ No newline at end of file diff --git a/src/IStorage.h b/src/IStorage.h index dc608d490..1e3542391 100644 --- a/src/IStorage.h +++ b/src/IStorage.h @@ -1,6 +1,7 @@ #pragma once #include #include "sds.h" +#include #define METADATA_DB_IDENTIFIER "c299fde0-6d42-4ec4-b939-34f680ffe39f" @@ -43,6 +44,11 @@ class IStorage endWriteBatch(); } + virtual std::vector getExpirationCandidates(unsigned int count) = 0; + virtual std::vector getEvictionCandidates(unsigned int count) = 0; + virtual void setExpire(const char *key, size_t cchKey, long long expire) = 0; + virtual void removeExpire(const char *key, size_t cchKey, long long expire) = 0; + virtual void beginWriteBatch() {} // NOP virtual void endWriteBatch() {} // NOP diff --git a/src/StorageCache.cpp b/src/StorageCache.cpp index 91d4b3657..ba7910399 100644 --- a/src/StorageCache.cpp +++ b/src/StorageCache.cpp @@ -84,19 +84,31 @@ void StorageCache::cacheKey(const char *rgch, size_t cch) bool StorageCache::erase(sds key) { + unsigned long long when = 0; + m_spstorage->retrieve(key, sdslen(key), [&when](const char *, size_t, const void * data, size_t cbdata) { + auto e = deserializeExpire((const char *)data, cbdata, nullptr); + if (e != nullptr) + when = e->when(); + }); bool result = m_spstorage->erase(key, sdslen(key)); std::unique_lock ul(m_lock); - if (result && m_pdict != nullptr) + if (result) { - uint64_t hash = dictSdsHash(key); - dictEntry *de = dictFind(m_pdict, reinterpret_cast(hash)); - serverAssert(de != nullptr); - de->v.s64--; - serverAssert(de->v.s64 >= 0); - if (de->v.s64 == 0) { - dictDelete(m_pdict, reinterpret_cast(hash)); - } else { - m_collisionCount--; + if (m_pdict != nullptr) + { + uint64_t hash = dictSdsHash(key); + dictEntry *de = dictFind(m_pdict, reinterpret_cast(hash)); + serverAssert(de != nullptr); + de->v.s64--; + serverAssert(de->v.s64 >= 0); + if (de->v.s64 == 0) { + dictDelete(m_pdict, reinterpret_cast(hash)); + } else { + m_collisionCount--; + } + } + if (when != 0) { + m_spstorage->removeExpire(key, sdslen(key), when); } } return result; @@ -111,6 +123,9 @@ void StorageCache::insert(sds key, const void *data, size_t cbdata, bool fOverwr } ul.unlock(); m_spstorage->insert(key, sdslen(key), (void*)data, cbdata, fOverwrite); + auto e = deserializeExpire((const char *)data, cbdata, nullptr); + if (e != nullptr) + m_spstorage->setExpire(key, sdslen(key), e->when()); } long _dictKeyIndex(dict *d, const void *key, uint64_t hash, dictEntry **existing); @@ -119,13 +134,18 @@ void StorageCache::bulkInsert(char **rgkeys, size_t *rgcbkeys, char **rgvals, si std::vector vechashes; if (m_pdict != nullptr) { vechashes.reserve(celem); + } - for (size_t ielem = 0; ielem < celem; ++ielem) { + for (size_t ielem = 0; ielem < celem; ++ielem) { + if (m_pdict != nullptr) { dictEntry *de = (dictEntry*)zmalloc(sizeof(dictEntry)); de->key = (void*)dictGenHashFunction(rgkeys[ielem], (int)rgcbkeys[ielem]); de->v.u64 = 1; vechashes.push_back(de); } + auto e = deserializeExpire(rgvals[ielem], rgcbvals[ielem], nullptr); + if (e != nullptr) + m_spstorage->setExpire(rgkeys[ielem], rgcbkeys[ielem], e->when()); } std::unique_lock ul(m_lock); diff --git a/src/StorageCache.h b/src/StorageCache.h index 4f3c1a374..829b41f08 100644 --- a/src/StorageCache.h +++ b/src/StorageCache.h @@ -51,11 +51,18 @@ class StorageCache bool enumerate(IStorage::callback fn) const { return m_spstorage->enumerate(fn); } bool enumerate_hashslot(IStorage::callback fn, unsigned int hashslot) const { return m_spstorage->enumerate_hashslot(fn, hashslot); } + std::vector getExpirationCandidates(unsigned int count) { return m_spstorage->getExpirationCandidates(count); } + std::vector getEvictionCandidates(unsigned int count) { return m_spstorage->getEvictionCandidates(count); } + void setExpire(const char *key, size_t cchKey, long long expire) { m_spstorage->setExpire(key, cchKey, expire); } + void removeExpire(const char *key, size_t cchKey, long long expire) { m_spstorage->removeExpire(key, cchKey, expire); } + void beginWriteBatch(); void endWriteBatch() { m_spstorage->endWriteBatch(); } void batch_lock() { return m_spstorage->batch_lock(); } void batch_unlock() { return m_spstorage->batch_unlock(); } + void flush() { m_spstorage->flush(); } + size_t count() const; const StorageCache *clone(); diff --git a/src/ae.cpp b/src/ae.cpp index a43969fe0..f2fb61ed3 100644 --- a/src/ae.cpp +++ b/src/ae.cpp @@ -890,7 +890,7 @@ void aeReleaseForkLock() void aeForkLockInChild() { - g_forkLock.setNotify(false); + g_forkLock.setMulti(false); } int aeThreadOwnsLock() diff --git a/src/aof.cpp b/src/aof.cpp index e529b4b0e..2f367ee4b 100644 --- a/src/aof.cpp +++ b/src/aof.cpp @@ -1592,8 +1592,7 @@ int rewriteAppendOnlyFileRio(rio *aof) { } /* Save the expire time */ if (o->FExpires()) { - std::unique_lock ul(g_expireLock); - expireEntry *pexpire = db->getExpire(&key); + expireEntry *pexpire = &o->expire; for (auto &subExpire : *pexpire) { if (subExpire.subkey() == nullptr) { diff --git a/src/cluster.cpp b/src/cluster.cpp index 82ad3d271..8e830df80 100644 --- a/src/cluster.cpp +++ b/src/cluster.cpp @@ -5610,7 +5610,6 @@ void migrateCommand(client *c) { /* Create RESTORE payload and generate the protocol to call the command. */ for (j = 0; j < num_keys; j++) { long long ttl = 0; - std::unique_lock ul(g_expireLock); expireEntry *pexpire = c->db->getExpire(kv[j]); long long expireat = INVALID_EXPIRE; if (pexpire != nullptr) @@ -5992,7 +5991,9 @@ clusterNode *getNodeByQuery(client *c, struct redisCommand *cmd, robj **argv, in /* If it is not the first key, make sure it is exactly * the same key as the first we saw. */ if (!equalStringObjects(firstkey,thiskey)) { - if (slot != thisslot) { + clusterNode* nThisKey = g_pserver->cluster->slots[thisslot]; + + if ((slot != thisslot) && (nThisKey != n || migrating_slot || importing_slot || g_pserver->cluster->migrating_slots_to[slot] != nullptr || g_pserver->cluster->importing_slots_from[slot] != nullptr)) { /* Error: multiple keys from different slots. */ getKeysFreeResult(&result); if (error_code) diff --git a/src/config.cpp b/src/config.cpp index 574cf89cb..32d5270cc 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -2597,6 +2597,19 @@ static int updateMaxmemory(long long val, long long prev, const char **err) { return 1; } +static int updateFlashMaxmemory(long long val, long long prev, const char **err) { + UNUSED(prev); + UNUSED(err); + if (val && g_pserver->m_pstorageFactory) { + size_t used = g_pserver->m_pstorageFactory->totalDiskspaceUsed(); + if ((unsigned long long)val < used) { + serverLog(LL_WARNING,"WARNING: the new maxstorage value set via CONFIG SET (%llu) is smaller than the current storage usage (%zu). This will result in key eviction and/or the inability to accept new write commands depending on the maxmemory-policy.", g_pserver->maxstorage, used); + } + performEvictions(false /*fPreSnapshot*/); + } + return 1; +} + static int updateGoodSlaves(long long val, long long prev, const char **err) { UNUSED(val); UNUSED(prev); @@ -2908,7 +2921,7 @@ standardConfig configs[] = { createIntConfig("list-compress-depth", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, g_pserver->list_compress_depth, 0, INTEGER_CONFIG, NULL, NULL), createIntConfig("rdb-key-save-delay", NULL, MODIFIABLE_CONFIG, INT_MIN, INT_MAX, g_pserver->rdb_key_save_delay, 0, INTEGER_CONFIG, NULL, NULL), createIntConfig("key-load-delay", NULL, MODIFIABLE_CONFIG, INT_MIN, INT_MAX, g_pserver->key_load_delay, 0, INTEGER_CONFIG, NULL, NULL), - createIntConfig("active-expire-effort", NULL, MODIFIABLE_CONFIG, 1, 10, cserver.active_expire_effort, 1, INTEGER_CONFIG, NULL, NULL), /* From 1 to 10. */ + createIntConfig("active-expire-effort", NULL, MODIFIABLE_CONFIG, 1, 10, g_pserver->active_expire_effort, 1, INTEGER_CONFIG, NULL, NULL), /* From 1 to 10. */ createIntConfig("hz", NULL, MODIFIABLE_CONFIG, 0, INT_MAX, g_pserver->config_hz, CONFIG_DEFAULT_HZ, INTEGER_CONFIG, NULL, updateHZ), createIntConfig("min-replicas-to-write", "min-slaves-to-write", MODIFIABLE_CONFIG, 0, INT_MAX, g_pserver->repl_min_slaves_to_write, 0, INTEGER_CONFIG, NULL, updateGoodSlaves), createIntConfig("min-replicas-max-lag", "min-slaves-max-lag", MODIFIABLE_CONFIG, 0, INT_MAX, g_pserver->repl_min_slaves_max_lag, 10, INTEGER_CONFIG, NULL, updateGoodSlaves), @@ -2940,7 +2953,7 @@ standardConfig configs[] = { /* Unsigned Long Long configs */ createULongLongConfig("maxmemory", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, g_pserver->maxmemory, 0, MEMORY_CONFIG, NULL, updateMaxmemory), - createULongLongConfig("maxstorage", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, g_pserver->maxstorage, 0, MEMORY_CONFIG, NULL, NULL), + createULongLongConfig("maxstorage", NULL, MODIFIABLE_CONFIG, 0, LLONG_MAX, g_pserver->maxstorage, 0, MEMORY_CONFIG, NULL, updateFlashMaxmemory), /* Size_t configs */ createSizeTConfig("hash-max-ziplist-entries", NULL, MODIFIABLE_CONFIG, 0, LONG_MAX, g_pserver->hash_max_ziplist_entries, 512, INTEGER_CONFIG, NULL, NULL), @@ -2969,6 +2982,7 @@ standardConfig configs[] = { createSDSConfig("availability-zone", NULL, MODIFIABLE_CONFIG, 0, g_pserver->sdsAvailabilityZone, "", NULL, NULL), createIntConfig("overload-protect-percent", NULL, MODIFIABLE_CONFIG, 0, 200, g_pserver->overload_protect_threshold, 0, INTEGER_CONFIG, NULL, NULL), createIntConfig("force-eviction-percent", NULL, MODIFIABLE_CONFIG, 0, 100, g_pserver->force_eviction_percent, 0, INTEGER_CONFIG, NULL, NULL), + createBoolConfig("enable-async-rehash", NULL, MODIFIABLE_CONFIG, g_pserver->enable_async_rehash, 1, NULL, NULL), #ifdef USE_OPENSSL createIntConfig("tls-port", NULL, MODIFIABLE_CONFIG, 0, 65535, g_pserver->tls_port, 0, INTEGER_CONFIG, NULL, updateTLSPort), /* TCP port. */ diff --git a/src/db.cpp b/src/db.cpp index 297910b9e..75a543f01 100644 --- a/src/db.cpp +++ b/src/db.cpp @@ -55,8 +55,8 @@ struct dbBackup { int expireIfNeeded(redisDb *db, robj *key, robj *o); void slotToKeyUpdateKeyCore(const char *key, size_t keylen, int add); -std::unique_ptr deserializeExpire(sds key, const char *str, size_t cch, size_t *poffset); -sds serializeStoredObjectAndExpire(redisDbPersistentData *db, const char *key, robj_roptr o); +std::unique_ptr deserializeExpire(const char *str, size_t cch, size_t *poffset); +sds serializeStoredObjectAndExpire(robj_roptr o); dictType dictChangeDescType { dictSdsHash, /* hash function */ @@ -83,6 +83,7 @@ void updateExpire(redisDb *db, sds key, robj *valOld, robj *valNew) serverAssert(db->FKeyExpires((const char*)key)); + valNew->expire = std::move(valOld->expire); valNew->SetFExpires(true); valOld->SetFExpires(false); return; @@ -281,8 +282,8 @@ robj *lookupKeyWriteOrReply(client *c, robj *key, robj *reply) { return o; } -bool dbAddCore(redisDb *db, sds key, robj *val, bool fUpdateMvcc, bool fAssumeNew = false, dict_iter *piterExisting = nullptr) { - serverAssert(!val->FExpires()); +bool dbAddCore(redisDb *db, sds key, robj *val, bool fUpdateMvcc, bool fAssumeNew = false, dict_iter *piterExisting = nullptr, bool fValExpires = false) { + serverAssert(fValExpires || !val->FExpires()); sds copy = sdsdupshared(key); uint64_t mvcc = getMvccTstamp(); @@ -1494,15 +1495,6 @@ void renameGenericCommand(client *c, int nx) { incrRefCount(o); - std::unique_ptr spexpire; - - { // scope pexpireOld since it will be invalid soon - std::unique_lock ul(g_expireLock); - expireEntry *pexpireOld = c->db->getExpire(c->argv[1]); - if (pexpireOld != nullptr) - spexpire = std::make_unique(std::move(*pexpireOld)); - } - if (lookupKeyWrite(c->db,c->argv[2]) != NULL) { if (nx) { decrRefCount(o); @@ -1513,10 +1505,12 @@ void renameGenericCommand(client *c, int nx) { * with the same name. */ dbDelete(c->db,c->argv[2]); } + bool fExpires = o->FExpires(); + long long whenT = o->expire.when(); dbDelete(c->db,c->argv[1]); - dbAdd(c->db,c->argv[2],o); - if (spexpire != nullptr) - setExpire(c,c->db,c->argv[2],std::move(*spexpire)); + o->SetFExpires(fExpires); + dbAddCore(c->db,szFromObj(c->argv[2]),o,true /*fUpdateMvcc*/,true/*fAssumeNew*/,nullptr,true/*fValExpires*/); + serverAssert(whenT == o->expire.when()); // dbDelete and dbAdd must not modify the expire, just the FExpire bit signalModifiedKey(c,c->db,c->argv[1]); signalModifiedKey(c,c->db,c->argv[2]); notifyKeyspaceEvent(NOTIFY_GENERIC,"rename_from", @@ -1579,22 +1573,15 @@ void moveCommand(client *c) { return; } - std::unique_ptr spexpire; - { // scope pexpireOld - std::unique_lock ul(g_expireLock); - expireEntry *pexpireOld = c->db->getExpire(c->argv[1]); - if (pexpireOld != nullptr) - spexpire = std::make_unique(std::move(*pexpireOld)); - } - if (o->FExpires()) - removeExpire(c->db,c->argv[1]); - serverAssert(!o->FExpires()); incrRefCount(o); + bool fExpire = o->FExpires(); + long long whenT = o->expire.when(); dbDelete(src,c->argv[1]); g_pserver->dirty++; - dbAdd(dst,c->argv[1],o); - if (spexpire != nullptr) setExpire(c,dst,c->argv[1],std::move(*spexpire)); + o->SetFExpires(fExpire); + dbAddCore(dst, szFromObj(c->argv[1]), o, true /*fUpdateMvcc*/, true /*fAssumeNew*/, nullptr, true /*fValExpires*/); + serverAssert(whenT == o->expire.when()); // add/delete must not modify the expire time signalModifiedKey(c,src,c->argv[1]); signalModifiedKey(c,dst,c->argv[1]); @@ -1662,7 +1649,7 @@ void copyCommand(client *c) { addReply(c,shared.czero); return; } - expire = c->db->getExpire(key); + expire = o->FExpires() ? &o->expire : nullptr; /* Return zero if the key already exists in the target DB. * If REPLACE option is selected, delete newkey from targetDB. */ @@ -1829,63 +1816,48 @@ int redisDbPersistentData::removeExpire(robj *key, dict_iter itr) { /* An expire may only be removed if there is a corresponding entry in the * main dict. Otherwise, the key will never be freed. */ serverAssertWithInfo(NULL,key,itr != nullptr); - std::unique_lock ul(g_expireLock); robj *val = itr.val(); if (!val->FExpires()) return 0; trackkey(key, true /* fUpdate */); - auto itrExpire = m_setexpire->find(itr.key()); - serverAssert(itrExpire != m_setexpire->end()); - m_setexpire->erase(itrExpire); val->SetFExpires(false); + serverAssert(m_numexpires > 0); + m_numexpires--; return 1; } int redisDbPersistentData::removeSubkeyExpire(robj *key, robj *subkey) { auto de = find(szFromObj(key)); serverAssertWithInfo(NULL,key,de != nullptr); - std::unique_lock ul(g_expireLock); robj *val = de.val(); if (!val->FExpires()) return 0; - - auto itr = m_setexpire->find(de.key()); - serverAssert(itr != m_setexpire->end()); - serverAssert(itr->key() == de.key()); - if (!itr->FFat()) + + if (!val->expire.FFat()) return 0; int found = 0; - for (auto subitr : *itr) + for (auto subitr : val->expire) { if (subitr.subkey() == nullptr) continue; if (sdscmp((sds)subitr.subkey(), szFromObj(subkey)) == 0) { - itr->erase(subitr); + val->expire.erase(subitr); found = 1; break; } } - if (itr->pfatentry()->size() == 0) + if (val->expire.pfatentry()->size() == 0) this->removeExpire(key, de); return found; } -void redisDbPersistentData::resortExpire(expireEntry &e) -{ - std::unique_lock ul(g_expireLock); - auto itr = m_setexpire->find(e.key()); - expireEntry eT = std::move(e); - m_setexpire->erase(itr); - m_setexpire->insert(eT); -} - /* Set an expire to the specified key. If the expire is set in the context * of an user calling a command 'c' is the client, otherwise 'c' is set * to NULL. The 'when' parameter is the absolute unix time in milliseconds @@ -1940,10 +1912,7 @@ void setExpire(client *c, redisDb *db, robj *key, expireEntry &&e) if (kde.val()->FExpires()) removeExpire(db, key); - e.setKeyUnsafe(kde.key()); - db->setExpire(std::move(e)); - kde.val()->SetFExpires(true); - + db->setExpire(kde.key(), std::move(e)); int writable_slave = listLength(g_pserver->masters) && g_pserver->repl_slave_ro == 0 && !g_pserver->fActiveReplica; if (c && writable_slave && !(c->flags & CLIENT_MASTER)) @@ -1954,14 +1923,15 @@ void setExpire(client *c, redisDb *db, robj *key, expireEntry &&e) * is associated with this key (i.e. the key is non volatile) */ expireEntry *redisDbPersistentDataSnapshot::getExpire(const char *key) { /* No expire? return ASAP */ - std::unique_lock ul(g_expireLock); if (expireSize() == 0) return nullptr; - auto itrExpire = m_setexpire->find(key); - if (itrExpire == m_setexpire->end()) + auto itr = find_cached_threadsafe(key); + if (itr == end()) + return nullptr; + if (!itr.val()->FExpires()) return nullptr; - return itrExpire.operator->(); + return &itr.val()->expire; } const expireEntry *redisDbPersistentDataSnapshot::getExpire(const char *key) const @@ -2062,15 +2032,13 @@ int keyIsExpired(const redisDbPersistentDataSnapshot *db, robj *key) { /* Don't expire anything while loading. It will be done later. */ if (g_pserver->loading) return 0; - std::unique_lock ul(g_expireLock); const expireEntry *pexpire = db->getExpire(key); mstime_t now; + long long when; if (pexpire == nullptr) return 0; /* No expire for this key */ - long long when = pexpire->FGetPrimaryExpire(); - - if (when == INVALID_EXPIRE) + if (!pexpire->FGetPrimaryExpire(&when)) return 0; /* If we are in the context of a Lua script, we pretend that time is @@ -2632,7 +2600,6 @@ void redisDbPersistentData::initialize() m_pdbSnapshot = nullptr; m_pdict = dictCreate(&dbDictType,this); m_pdictTombstone = dictCreate(&dbTombstoneDictType,this); - m_setexpire = new(MALLOC_LOCAL) expireset(); m_fAllChanged = 0; m_fTrackingChanges = 0; } @@ -2668,7 +2635,6 @@ void moduleClusterLoadCallback(const char * rgchKey, size_t cchKey, void *data) void redisDb::initialize(int id) { redisDbPersistentData::initialize(); - this->expireitr = setexpire()->end(); this->blocking_keys = dictCreate(&keylistDictType,NULL); this->ready_keys = dictCreate(&objectKeyPointerValueDictType,NULL); this->watched_keys = dictCreate(&keylistDictType,NULL); @@ -2714,6 +2680,8 @@ bool redisDbPersistentData::insert(char *key, robj *o, bool fAssumeNew, dict_ite serverAssert(dictFind(m_pdictTombstone, key) != nullptr); } #endif + if (o->FExpires()) + ++m_numexpires; trackkey(key, false /* fUpdate */); } else @@ -2735,11 +2703,6 @@ void redisDbPersistentData::prepOverwriteForSnapshot(char *key) auto itr = m_pdbSnapshot->find_cached_threadsafe(key); if (itr.key() != nullptr) { - if (itr.val()->FExpires()) { - // Note: I'm sure we could handle this, but its too risky at the moment. - // There are known bugs doing this with expires - return; - } sds keyNew = sdsdupshared(itr.key()); if (dictAdd(m_pdictTombstone, keyNew, (void*)dictHashKey(m_pdict, key)) != DICT_OK) sdsfree(keyNew); @@ -2761,7 +2724,7 @@ size_t redisDb::clear(bool fAsync, void(callback)(void*)) } else { redisDbPersistentData::clear(callback); } - expireitr = setexpire()->end(); + expires_cursor = 0; return removed; } @@ -2774,59 +2737,64 @@ void redisDbPersistentData::clear(void(callback)(void*)) m_cnewKeysPending = 0; m_fAllChanged++; } - { - std::unique_lock ul(g_expireLock); - delete m_setexpire; - m_setexpire = new (MALLOC_LOCAL) expireset(); - } if (m_spstorage != nullptr) m_spstorage->clear(callback); dictEmpty(m_pdictTombstone,callback); + + // To avoid issues with async rehash we completly free the old dict and create a fresh one + dictRelease(m_pdict); + dictRelease(m_pdictTombstone); + m_pdict = dictCreate(&dbDictType, this); + m_pdictTombstone = dictCreate(&dbTombstoneDictType, this); + m_pdbSnapshot = nullptr; + m_numexpires = 0; } void redisDbPersistentData::setExpire(robj *key, robj *subkey, long long when) { /* Reuse the sds from the main dict in the expire dict */ - std::unique_lock ul(g_expireLock); dictEntry *kde = dictFind(m_pdict,ptrFromObj(key)); serverAssertWithInfo(NULL,key,kde != NULL); trackkey(key, true /* fUpdate */); - if (((robj*)dictGetVal(kde))->getrefcount(std::memory_order_relaxed) == OBJ_SHARED_REFCOUNT) + robj *o = (robj*)dictGetVal(kde); + if (o->getrefcount(std::memory_order_relaxed) == OBJ_SHARED_REFCOUNT) { // shared objects cannot have the expire bit set, create a real object - dictSetVal(m_pdict, kde, dupStringObject((robj*)dictGetVal(kde))); + dictSetVal(m_pdict, kde, dupStringObject(o)); + o = (robj*)dictGetVal(kde); } const char *szSubKey = (subkey != nullptr) ? szFromObj(subkey) : nullptr; - if (((robj*)dictGetVal(kde))->FExpires()) { - auto itr = m_setexpire->find((sds)dictGetKey(kde)); - serverAssert(itr != m_setexpire->end()); - expireEntry eNew(std::move(*itr)); - eNew.update(szSubKey, when); - m_setexpire->erase(itr); - m_setexpire->insert(eNew); + if (o->FExpires()) { + o->expire.update(szSubKey, when); } else { - expireEntry e((sds)dictGetKey(kde), szSubKey, when); - ((robj*)dictGetVal(kde))->SetFExpires(true); - m_setexpire->insert(e); + expireEntry e(szSubKey, when); + o->expire = std::move(e); + o->SetFExpires(true); + ++m_numexpires; } } -void redisDbPersistentData::setExpire(expireEntry &&e) +void redisDbPersistentData::setExpire(const char *key, expireEntry &&e) { - std::unique_lock ul(g_expireLock); - trackkey(e.key(), true /* fUpdate */); - m_setexpire->insert(e); + trackkey(key, true /* fUpdate */); + auto itr = find(key); + if (!itr->FExpires()) + m_numexpires++; + itr->expire = std::move(e); + itr->SetFExpires(true); } bool redisDb::FKeyExpires(const char *key) { - std::unique_lock ul(g_expireLock); - return setexpireUnsafe()->find(key) != setexpire()->end(); + auto itr = find(key); + if (itr == end()) + return false; + return itr->FExpires(); } void redisDbPersistentData::updateValue(dict_iter itr, robj *val) @@ -2850,7 +2818,6 @@ void redisDbPersistentData::ensure(const char *sdsKey, dictEntry **pde) serverAssert(m_refCount == 0); if (m_pdbSnapshot == nullptr && g_pserver->m_pstorageFactory == nullptr) return; - std::unique_lock ul(g_expireLock); // First see if the key can be obtained from a snapshot if (*pde == nullptr && m_pdbSnapshot != nullptr) @@ -2872,7 +2839,11 @@ void redisDbPersistentData::ensure(const char *sdsKey, dictEntry **pde) else { sds strT = serializeStoredObject(itr.val()); - robj *objNew = deserializeStoredObject(this, sdsKey, strT, sdslen(strT)); + robj *objNew = deserializeStoredObject(strT, sdslen(strT)); + if (itr->FExpires()) { + objNew->expire = itr->expire; + objNew->SetFExpires(true); + } sdsfree(strT); dictAdd(m_pdict, keyNew, objNew); serverAssert(objNew->getrefcount(std::memory_order_relaxed) == 1); @@ -2902,26 +2873,19 @@ void redisDbPersistentData::ensure(const char *sdsKey, dictEntry **pde) std::unique_ptr spexpire; m_spstorage->retrieve((sds)sdsKey, [&](const char *, size_t, const void *data, size_t cb){ size_t offset = 0; - spexpire = deserializeExpire(sdsNewKey, (const char*)data, cb, &offset); - o = deserializeStoredObject(this, sdsNewKey, reinterpret_cast(data) + offset, cb - offset); + spexpire = deserializeExpire((const char*)data, cb, &offset); + o = deserializeStoredObject(reinterpret_cast(data) + offset, cb - offset); serverAssert(o != nullptr); }); if (o != nullptr) { dictAdd(m_pdict, sdsNewKey, o); - o->SetFExpires(spexpire != nullptr); - std::unique_lock ul(g_expireLock); - if (spexpire != nullptr) - { - auto itr = m_setexpire->find(sdsKey); - if (itr != m_setexpire->end()) - m_setexpire->erase(itr); - m_setexpire->insert(std::move(*spexpire)); - serverAssert(m_setexpire->find(sdsKey) != m_setexpire->end()); + o->SetFExpires(spexpire != nullptr); + if (spexpire != nullptr) { + o->expire = std::move(*spexpire); } - serverAssert(o->FExpires() == (m_setexpire->find(sdsKey) != m_setexpire->end())); g_pserver->stat_storage_provider_read_hits++; } else { sdsfree(sdsNewKey); @@ -2931,18 +2895,11 @@ void redisDbPersistentData::ensure(const char *sdsKey, dictEntry **pde) *pde = dictFind(m_pdict, sdsKey); } } - - if (*pde != nullptr && dictGetVal(*pde) != nullptr) - { - robj *o = (robj*)dictGetVal(*pde); - std::unique_lock ul(g_expireLock); - serverAssert(o->FExpires() == (m_setexpire->find(sdsKey) != m_setexpire->end())); - } } void redisDbPersistentData::storeKey(sds key, robj *o, bool fOverwrite) { - sds temp = serializeStoredObjectAndExpire(this, key, o); + sds temp = serializeStoredObjectAndExpire(o); m_spstorage->insert(key, temp, sdslen(temp), fOverwrite); sdsfree(temp); } @@ -2966,7 +2923,7 @@ void redisDbPersistentData::storeDatabase() if (itr == nullptr) return; robj *o = itr.val(); - sds temp = serializeStoredObjectAndExpire(db, (const char*) itr.key(), o); + sds temp = serializeStoredObjectAndExpire(o); storage->insert((sds)key, temp, sdslen(temp), fUpdate); sdsfree(temp); } @@ -3042,7 +2999,7 @@ void redisDbPersistentData::processChangesAsync(std::atomic &pendingJobs) while ((de = dictNext(di)) != nullptr) { robj *o = (robj*)dictGetVal(de); - sds temp = serializeStoredObjectAndExpire(this, (const char*) dictGetKey(de), o); + sds temp = serializeStoredObjectAndExpire(o); veckeys.push_back((sds)dictGetKey(de)); veccbkeys.push_back(sdslen((sds)dictGetKey(de))); vecvals.push_back(temp); @@ -3106,9 +3063,7 @@ redisDbPersistentData::~redisDbPersistentData() if (m_dictChanged) dictRelease(m_dictChanged); if (m_dictChangedStorageFlush) - dictRelease(m_dictChangedStorageFlush); - - delete m_setexpire; + dictRelease(m_dictChangedStorageFlush); } dict_iter redisDbPersistentData::random() @@ -3262,7 +3217,7 @@ sds serializeExpire(const expireEntry *pexpire) return str; } -std::unique_ptr deserializeExpire(sds key, const char *str, size_t cch, size_t *poffset) +std::unique_ptr deserializeExpire(const char *str, size_t cch, size_t *poffset) { unsigned celem; if (cch < sizeof(unsigned)) @@ -3294,25 +3249,21 @@ std::unique_ptr deserializeExpire(sds key, const char *str, size_t offset += sizeof(long long); if (spexpire == nullptr) - spexpire = std::make_unique(key, subkey, when); + spexpire = std::make_unique(subkey, when); else spexpire->update(subkey, when); if (subkey) sdsfree(subkey); } - - *poffset = offset; + if (poffset != nullptr) + *poffset = offset; return spexpire; } -sds serializeStoredObjectAndExpire(redisDbPersistentData *db, const char *key, robj_roptr o) +sds serializeStoredObjectAndExpire(robj_roptr o) { - std::unique_lock ul(g_expireLock); - auto itrExpire = db->setexpire()->find(key); - const expireEntry *pexpire = nullptr; - if (itrExpire != db->setexpire()->end()) - pexpire = &(*itrExpire); + const expireEntry *pexpire = o->FExpires() ? &o->expire : nullptr; sds str = serializeExpire(pexpire); str = serializeStoredObject(o, str); @@ -3395,8 +3346,8 @@ void redisDbPersistentData::prefetchKeysAsync(client *c, parsed_command &command robj *o = nullptr; m_spstorage->retrieve((sds)szFromObj(objKey), [&](const char *, size_t, const void *data, size_t cb){ size_t offset = 0; - spexpire = deserializeExpire(sharedKey, (const char*)data, cb, &offset); - o = deserializeStoredObject(this, sharedKey, reinterpret_cast(data) + offset, cb - offset); + spexpire = deserializeExpire((const char*)data, cb, &offset); + o = deserializeStoredObject(reinterpret_cast(data) + offset, cb - offset); serverAssert(o != nullptr); }); @@ -3431,18 +3382,9 @@ void redisDbPersistentData::prefetchKeysAsync(client *c, parsed_command &command } } dictAdd(m_pdict, sharedKey, o); - o->SetFExpires(spexpire != nullptr); - - std::unique_lock ul(g_expireLock); if (spexpire != nullptr) - { - auto itr = m_setexpire->find(sharedKey); - if (itr != m_setexpire->end()) - m_setexpire->erase(itr); - m_setexpire->insert(std::move(*spexpire)); - serverAssert(m_setexpire->find(sharedKey) != m_setexpire->end()); - } - serverAssert(o->FExpires() == (m_setexpire->find(sharedKey) != m_setexpire->end())); + o->expire = std::move(*spexpire); + o->SetFExpires(spexpire != nullptr); } } else diff --git a/src/debug.cpp b/src/debug.cpp index 688ca3a0e..817728ee8 100644 --- a/src/debug.cpp +++ b/src/debug.cpp @@ -146,11 +146,10 @@ void mixStringObjectDigest(unsigned char *digest, robj_roptr o) { * Note that this function does not reset the initial 'digest' passed, it * will continue mixing this object digest to anything that was already * present. */ -void xorObjectDigest(redisDb *db, robj_roptr keyobj, unsigned char *digest, robj_roptr o) { +void xorObjectDigest(unsigned char *digest, robj_roptr o) { uint32_t aux = htonl(o->type); mixDigest(digest,&aux,sizeof(aux)); - std::unique_lock ul(g_expireLock); - expireEntry *pexpire = db->getExpire(keyobj); + const expireEntry *pexpire = o->FExpires() ? &o->expire : nullptr; long long expiretime = INVALID_EXPIRE; char buf[128]; @@ -309,7 +308,7 @@ void computeDatasetDigest(unsigned char *final) { mixDigest(final,&aux,sizeof(aux)); /* Iterate this DB writing every entry */ - db->iterate_threadsafe([final, db](const char *key, robj_roptr o)->bool { + db->iterate_threadsafe([final](const char *key, robj_roptr o)->bool { unsigned char digest[20]; robj *keyobj; @@ -318,7 +317,7 @@ void computeDatasetDigest(unsigned char *final) { mixDigest(digest,key,sdslen(key)); - xorObjectDigest(db,keyobj,digest,o); + xorObjectDigest(digest,o); /* We can finally xor the key-val digest to the final digest */ xorDigest(final,digest,20); @@ -716,7 +715,7 @@ NULL * work on logically expired keys */ auto itr = c->db->find(c->argv[j]); robj* o = (robj*)(itr == NULL ? NULL : itr.val()); - if (o) xorObjectDigest(c->db,c->argv[j],digest,o); + if (o) xorObjectDigest(digest,o); sds d = sdsempty(); for (int i = 0; i < 20; i++) d = sdscatprintf(d, "%02x",digest[i]); @@ -843,10 +842,6 @@ NULL g_pserver->db[dbid]->getStats(buf,sizeof(buf)); stats = sdscat(stats,buf); - stats = sdscatprintf(stats,"[Expires set]\n"); - g_pserver->db[dbid]->getExpireStats(buf, sizeof(buf)); - stats = sdscat(stats, buf); - addReplyVerbatim(c,stats,sdslen(stats),"txt"); sdsfree(stats); } else if (!strcasecmp(szFromObj(c->argv[1]),"htstats-key") && c->argc == 3) { @@ -937,6 +932,21 @@ NULL mallctl_string(c, c->argv+2, c->argc-2); return; #endif + } else if(!strcasecmp(szFromObj(c->argv[1]),"flush-storage") && c->argc == 2) { + if (g_pserver->m_pstorageFactory != nullptr) { + for (int i = 0; i < cserver.dbnum; i++) { + g_pserver->db[i]->getStorageCache()->flush(); + } + addReply(c,shared.ok); + } else { + addReplyError(c, "Can't flush storage if no storage provider is set"); + } + } else if (!strcasecmp(szFromObj(c->argv[1]),"get-storage-usage") && c->argc == 2) { + if (g_pserver->m_pstorageFactory != nullptr) { + addReplyLongLong(c, g_pserver->m_pstorageFactory->totalDiskspaceUsed()); + } else { + addReplyLongLong(c, 0); + } } else { addReplySubcommandSyntaxError(c); return; diff --git a/src/defrag.cpp b/src/defrag.cpp index d48f4d804..b92888b89 100644 --- a/src/defrag.cpp +++ b/src/defrag.cpp @@ -47,7 +47,6 @@ extern "C" int je_get_defrag_hint(void* ptr); /* forward declarations*/ void defragDictBucketCallback(void *privdata, dictEntry **bucketref); dictEntry* replaceSatelliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, sds newkey, uint64_t hash, long *defragged); -bool replaceSatelliteOSetKeyPtr(expireset &set, sds oldkey, sds newkey); /* Defrag helper for generic allocations. * @@ -425,20 +424,6 @@ dictEntry* replaceSatelliteDictKeyPtrAndOrDefragDictEntry(dict *d, sds oldkey, s return NULL; } -bool replaceSatelliteOSetKeyPtr(expireset &set, sds oldkey, sds newkey) { - auto itr = set.find(oldkey); - if (itr != set.end()) - { - expireEntry eNew(std::move(*itr)); - eNew.setKeyUnsafe(newkey); - set.erase(itr); - set.insert(eNew); - serverAssert(set.find(newkey) != set.end()); - return true; - } - return false; -} - long activeDefragQuickListNode(quicklist *ql, quicklistNode **node_ref) { quicklistNode *newnode, *node = *node_ref; long defragged = 0; @@ -851,7 +836,6 @@ long defragModule(redisDb *db, dictEntry *kde) { * all the various pointers it has. Returns a stat of how many pointers were * moved. */ long defragKey(redisDb *db, dictEntry *de) { - std::unique_lock ul(g_expireLock); sds keysds = (sds)dictGetKey(de); robj *newob, *ob; unsigned char *newzl; @@ -862,15 +846,8 @@ long defragKey(redisDb *db, dictEntry *de) { /* Try to defrag the key name. */ newsds = activeDefragSds(keysds); - if (newsds) - { + if (newsds) { defragged++, de->key = newsds; - if (!db->setexpire()->empty()) { - bool fReplaced = replaceSatelliteOSetKeyPtr(*const_cast(db->setexpire()), keysds, newsds); - serverAssert(fReplaced == ob->FExpires()); - } else { - serverAssert(!ob->FExpires()); - } } if ((newob = activeDefragStringOb(ob, &defragged))) { diff --git a/src/evict.cpp b/src/evict.cpp index 1523b2814..432b807d6 100644 --- a/src/evict.cpp +++ b/src/evict.cpp @@ -100,6 +100,36 @@ unsigned long long estimateObjectIdleTime(robj_roptr o) { } } +unsigned long long getIdle(robj *obj, const expireEntry *e) { + unsigned long long idle; + /* Calculate the idle time according to the policy. This is called + * idle just because the code initially handled LRU, but is in fact + * just a score where an higher score means better candidate. */ + if (g_pserver->maxmemory_policy & MAXMEMORY_FLAG_LRU) { + idle = (obj != nullptr) ? estimateObjectIdleTime(obj) : 0; + } else if (g_pserver->maxmemory_policy & MAXMEMORY_FLAG_LFU) { + /* When we use an LRU policy, we sort the keys by idle time + * so that we expire keys starting from greater idle time. + * However when the policy is an LFU one, we have a frequency + * estimation, and we want to evict keys with lower frequency + * first. So inside the pool we put objects using the inverted + * frequency subtracting the actual frequency to the maximum + * frequency of 255. */ + idle = 255-LFUDecrAndReturn(obj); + } else if (g_pserver->maxmemory_policy == MAXMEMORY_VOLATILE_TTL) { + /* In this case the sooner the expire the better. */ + if (e != nullptr) + idle = ULLONG_MAX - e->when(); + else + idle = 0; + } else if (g_pserver->maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) { + idle = ULLONG_MAX; + } else { + serverPanic("Unknown eviction policy in storage eviction"); + } + return idle; +} + /* LRU approximation algorithm * * Redis uses an approximation of the LRU algorithm that runs in constant @@ -137,28 +167,7 @@ void evictionPoolAlloc(void) { void processEvictionCandidate(int dbid, sds key, robj *o, const expireEntry *e, struct evictionPoolEntry *pool) { - unsigned long long idle; - - /* Calculate the idle time according to the policy. This is called - * idle just because the code initially handled LRU, but is in fact - * just a score where an higher score means better candidate. */ - if (g_pserver->maxmemory_policy & MAXMEMORY_FLAG_LRU) { - idle = (o != nullptr) ? estimateObjectIdleTime(o) : 0; - } else if (g_pserver->maxmemory_policy & MAXMEMORY_FLAG_LFU) { - /* When we use an LRU policy, we sort the keys by idle time - * so that we expire keys starting from greater idle time. - * However when the policy is an LFU one, we have a frequency - * estimation, and we want to evict keys with lower frequency - * first. So inside the pool we put objects using the inverted - * frequency subtracting the actual frequency to the maximum - * frequency of 255. */ - idle = 255-LFUDecrAndReturn(o); - } else if (g_pserver->maxmemory_policy == MAXMEMORY_VOLATILE_TTL) { - /* In this case the sooner the expire the better. */ - idle = ULLONG_MAX - e->when(); - } else { - serverPanic("Unknown eviction policy in evictionPoolPopulate()"); - } + unsigned long long idle = getIdle(o,e); /* Insert the element inside the pool. * First, find the first empty bucket or the first populated @@ -222,52 +231,23 @@ void processEvictionCandidate(int dbid, sds key, robj *o, const expireEntry *e, * idle time are on the left, and keys with the higher idle time on the * right. */ -struct visitFunctor +int evictionPoolPopulate(int dbid, redisDb *db, bool fVolatile, struct evictionPoolEntry *pool) { - int dbid; - dict *dbdict; - struct evictionPoolEntry *pool; - int count = 0; - int tries = 0; - - bool operator()(const expireEntry &e) - { - dictEntry *de = dictFind(dbdict, e.key()); - if (de != nullptr) + int returnCount = 0; + dictEntry **samples = (dictEntry**)alloca(g_pserver->maxmemory_samples * sizeof(dictEntry*)); + int count = dictGetSomeKeys(db->dictUnsafeKeyOnly(),samples,g_pserver->maxmemory_samples); + for (int j = 0; j < count; j++) { + robj *o = (robj*)dictGetVal(samples[j]); + // If the object is in second tier storage we don't need to evict it (since it already is) + if (o != nullptr) { - processEvictionCandidate(dbid, (sds)dictGetKey(de), (robj*)dictGetVal(de), &e, pool); - ++count; - } - ++tries; - return tries < g_pserver->maxmemory_samples; - } -}; -int evictionPoolPopulate(int dbid, redisDb *db, expireset *setexpire, struct evictionPoolEntry *pool) -{ - if (setexpire != nullptr) - { - std::unique_lock ul(g_expireLock); - visitFunctor visitor { dbid, db->dictUnsafeKeyOnly(), pool, 0 }; - setexpire->random_visit(visitor); - return visitor.count; - } - else - { - int returnCount = 0; - dictEntry **samples = (dictEntry**)alloca(g_pserver->maxmemory_samples * sizeof(dictEntry*)); - int count = dictGetSomeKeys(db->dictUnsafeKeyOnly(),samples,g_pserver->maxmemory_samples); - for (int j = 0; j < count; j++) { - robj *o = (robj*)dictGetVal(samples[j]); - // If the object is in second tier storage we don't need to evict it (since it alrady is) - if (o != nullptr) - { - processEvictionCandidate(dbid, (sds)dictGetKey(samples[j]), o, nullptr, pool); + if (!fVolatile || o->FExpires()) { + processEvictionCandidate(dbid, (sds)dictGetKey(samples[j]), o, &o->expire, pool); ++returnCount; } } - return returnCount; } - return 0; + return returnCount; } /* ---------------------------------------------------------------------------- @@ -629,6 +609,31 @@ static unsigned long evictionTimeLimitUs() { return ULONG_MAX; /* No limit to eviction time */ } +void evict(redisDb *db, robj *keyobj) { + mstime_t eviction_latency; + propagateExpire(db,keyobj,g_pserver->lazyfree_lazy_eviction); + /* We compute the amount of memory freed by db*Delete() alone. + * It is possible that actually the memory needed to propagate + * the DEL in AOF and replication link is greater than the one + * we are freeing removing the key, but we can't account for + * that otherwise we would never exit the loop. + * + * AOF and Output buffer memory will be freed eventually so + * we only care about memory used by the key space. */ + latencyStartMonitor(eviction_latency); + if (g_pserver->lazyfree_lazy_eviction) + dbAsyncDelete(db,keyobj); + else + dbSyncDelete(db,keyobj); + latencyEndMonitor(eviction_latency); + latencyAddSampleIfNeeded("eviction-del",eviction_latency); + + signalModifiedKey(NULL,db,keyobj); + notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted", + keyobj, db->id); + decrRefCount(keyobj); +} + static void updateSysAvailableMemory() { if (g_pserver->force_eviction_percent) { g_pserver->cron_malloc_stats.sys_available = getMemAvailable(); @@ -666,7 +671,7 @@ int performEvictions(bool fPreSnapshot) { int keys_freed = 0; size_t mem_reported, mem_tofree; long long mem_freed; /* May be negative */ - mstime_t latency, eviction_latency; + mstime_t latency; long long delta; int slaves = listLength(g_pserver->slaves); const bool fEvictToStorage = !cserver.delete_on_evict && g_pserver->db[0]->FStorageProvider(); @@ -691,6 +696,43 @@ int performEvictions(bool fPreSnapshot) { monotime evictionTimer; elapsedStart(&evictionTimer); + if (g_pserver->maxstorage && g_pserver->m_pstorageFactory != nullptr) { + while (g_pserver->m_pstorageFactory->totalDiskspaceUsed() >= g_pserver->maxstorage && elapsedUs(evictionTimer) < eviction_time_limit_us) { + redisDb *db; + std::vector evictionPool; + robj *bestkey = nullptr; + redisDb *bestdb = nullptr; + unsigned long long bestidle = 0; + for (int i = 0; i < cserver.dbnum; i++) { + db = g_pserver->db[i]; + evictionPool = db->getStorageCache()->getEvictionCandidates(g_pserver->maxmemory_samples); + for (std::string key : evictionPool) { + robj *keyobj = createStringObject(key.c_str(), key.size()); + robj *obj = db->find(szFromObj(keyobj)); + if (obj != nullptr) { + expireEntry *e = db->getExpire(keyobj); + unsigned long long idle = getIdle(obj, e); + + if (bestkey == nullptr || bestidle < idle) { + if (bestkey != nullptr) + decrRefCount(bestkey); + incrRefCount(keyobj); + bestkey = keyobj; + bestidle = idle; + bestdb = db; + } + } + decrRefCount(keyobj); + } + } + if (bestkey) { + evict(bestdb, bestkey); + } else { + break; //could not find a key to evict so stop now + } + } + } + if (g_pserver->maxstorage && g_pserver->m_pstorageFactory != nullptr && g_pserver->m_pstorageFactory->totalDiskspaceUsed() >= g_pserver->maxstorage) goto cant_free_storage; @@ -718,14 +760,14 @@ int performEvictions(bool fPreSnapshot) { if (g_pserver->maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) { if ((keys = db->size()) != 0) { - total_keys += evictionPoolPopulate(i, db, nullptr, pool); + total_keys += evictionPoolPopulate(i, db, false, pool); } } else { keys = db->expireSize(); if (keys != 0) - total_keys += evictionPoolPopulate(i, db, db->setexpireUnsafe(), pool); + total_keys += evictionPoolPopulate(i, db, true, pool); } } if (!total_keys) break; /* No keys to evict. */ @@ -786,7 +828,7 @@ int performEvictions(bool fPreSnapshot) { { if (db->expireSize()) { - bestkey = (sds)db->random_expire().key(); + db->random_expire(&bestkey); bestdbid = j; break; } @@ -805,7 +847,7 @@ int performEvictions(bool fPreSnapshot) { if (db->removeCachedValue(bestkey, &deT)) { mem_freed += splazy->addEntry(db->dictUnsafeKeyOnly(), deT); ckeysFailed = 0; - g_pserver->stat_evictedkeys++; + g_pserver->stat_evictedkeys++; } else { delta = 0; @@ -817,30 +859,11 @@ int performEvictions(bool fPreSnapshot) { else { robj *keyobj = createStringObject(bestkey,sdslen(bestkey)); - propagateExpire(db,keyobj,g_pserver->lazyfree_lazy_eviction); - /* We compute the amount of memory freed by db*Delete() alone. - * It is possible that actually the memory needed to propagate - * the DEL in AOF and replication link is greater than the one - * we are freeing removing the key, but we can't account for - * that otherwise we would never exit the loop. - * - * AOF and Output buffer memory will be freed eventually so - * we only care about memory used by the key space. */ delta = (long long) zmalloc_used_memory(); - latencyStartMonitor(eviction_latency); - if (g_pserver->lazyfree_lazy_eviction) - dbAsyncDelete(db,keyobj); - else - dbSyncDelete(db,keyobj); - latencyEndMonitor(eviction_latency); - latencyAddSampleIfNeeded("eviction-del",eviction_latency); + evict(db, keyobj); delta -= (long long) zmalloc_used_memory(); mem_freed += delta; g_pserver->stat_evictedkeys++; - signalModifiedKey(NULL,db,keyobj); - notifyKeyspaceEvent(NOTIFY_EVICTED, "evicted", - keyobj, db->id); - decrRefCount(keyobj); } keys_freed++; diff --git a/src/expire.cpp b/src/expire.cpp index b727183ad..8d711cedf 100644 --- a/src/expire.cpp +++ b/src/expire.cpp @@ -33,8 +33,6 @@ #include "server.h" #include "cron.h" -fastlock g_expireLock {"Expire"}; - /* Helper function for the activeExpireCycle() function. * This function will try to expire the key that is stored in the hash table * entry 'de' of the 'expires' hash table of a Redis database. @@ -74,21 +72,20 @@ void activeExpireCycleExpireFullKey(redisDb *db, const char *key) { *----------------------------------------------------------------------------*/ -int activeExpireCycleExpire(redisDb *db, expireEntry &e, long long now, size_t &tried) { +int activeExpireCycleExpire(redisDb *db, const char *key, expireEntry &e, long long now, size_t &tried) { if (!e.FFat()) { - activeExpireCycleExpireFullKey(db, e.key()); + activeExpireCycleExpireFullKey(db, key); ++tried; return 1; } expireEntryFat *pfat = e.pfatentry(); - robj *val = db->find(e.key()); + robj *val = db->find(key); int deleted = 0; redisObjectStack objKey; - initStaticStringObject(objKey, (char*)e.key()); - bool fTtlChanged = false; + initStaticStringObject(objKey, (char*)key); while (!pfat->FEmpty()) { @@ -99,7 +96,7 @@ int activeExpireCycleExpire(redisDb *db, expireEntry &e, long long now, size_t & // Is it the full key expiration? if (pfat->nextExpireEntry().spsubkey == nullptr) { - activeExpireCycleExpireFullKey(db, e.key()); + activeExpireCycleExpireFullKey(db, key); return ++deleted; } @@ -109,7 +106,7 @@ int activeExpireCycleExpire(redisDb *db, expireEntry &e, long long now, size_t & if (setTypeRemove(val,pfat->nextExpireEntry().spsubkey.get())) { deleted++; if (setTypeSize(val) == 0) { - activeExpireCycleExpireFullKey(db, e.key()); + activeExpireCycleExpireFullKey(db, key); return deleted; } } @@ -119,7 +116,7 @@ int activeExpireCycleExpire(redisDb *db, expireEntry &e, long long now, size_t & if (hashTypeDelete(val,(sds)pfat->nextExpireEntry().spsubkey.get())) { deleted++; if (hashTypeLength(val) == 0) { - activeExpireCycleExpireFullKey(db, e.key()); + activeExpireCycleExpireFullKey(db, key); return deleted; } } @@ -129,7 +126,7 @@ int activeExpireCycleExpire(redisDb *db, expireEntry &e, long long now, size_t & if (zsetDel(val,(sds)pfat->nextExpireEntry().spsubkey.get())) { deleted++; if (zsetLength(val) == 0) { - activeExpireCycleExpireFullKey(db, e.key()); + activeExpireCycleExpireFullKey(db, key); return deleted; } } @@ -137,15 +134,15 @@ int activeExpireCycleExpire(redisDb *db, expireEntry &e, long long now, size_t & case OBJ_CRON: { - sds keyCopy = sdsdup(e.key()); + sds keyCopy = sdsdup(key); incrRefCount(val); aePostFunction(g_pserver->rgthreadvar[IDX_EVENT_LOOP_MAIN].el, [keyCopy, val]{ executeCronJobExpireHook(keyCopy, val); sdsfree(keyCopy); decrRefCount(val); }, true /*fLock*/, true /*fForceQueue*/); + break; } - return deleted; case OBJ_LIST: default: @@ -157,7 +154,6 @@ int activeExpireCycleExpire(redisDb *db, expireEntry &e, long long now, size_t & propagateSubkeyExpire(db, val->type, &objKey, &objSubkey); pfat->popfrontExpireEntry(); - fTtlChanged = true; if ((tried % ACTIVE_EXPIRE_CYCLE_SUBKEY_LOOKUPS_PER_LOOP) == 0) { break; } @@ -167,11 +163,6 @@ int activeExpireCycleExpire(redisDb *db, expireEntry &e, long long now, size_t & { removeExpire(db, &objKey); } - else if (!pfat->FEmpty() && fTtlChanged) - { - // We need to resort the expire entry since it may no longer be in the correct position - db->resortExpire(e); - } if (deleted) { @@ -317,8 +308,26 @@ void pexpireMemberAtCommand(client *c) * If type is ACTIVE_EXPIRE_CYCLE_SLOW, that normal expire cycle is * executed, where the time limit is a percentage of the REDIS_HZ period * as specified by the ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC define. */ +#define ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP 20 /* Keys for each DB loop. */ +#define ACTIVE_EXPIRE_CYCLE_FAST_DURATION 1000 /* Microseconds. */ +#define ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC 25 /* Max % of CPU to use. */ +#define ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE 10 /* % of stale keys after which + we do extra efforts. */ +/*static*/ void redisDbPersistentData::activeExpireCycleCore(int type) { + /* Adjust the running parameters according to the configured expire + * effort. The default effort is 1, and the maximum configurable effort + * is 10. */ + unsigned long + effort = g_pserver->active_expire_effort-1, /* Rescale from 0 to 9. */ + config_keys_per_loop = ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP + + ACTIVE_EXPIRE_CYCLE_KEYS_PER_LOOP/4*effort, + config_cycle_fast_duration = ACTIVE_EXPIRE_CYCLE_FAST_DURATION + + ACTIVE_EXPIRE_CYCLE_FAST_DURATION/4*effort, + config_cycle_slow_time_perc = ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC + + 2*effort, + config_cycle_acceptable_stale = ACTIVE_EXPIRE_CYCLE_ACCEPTABLE_STALE- + effort; -void activeExpireCycleCore(int type) { /* This function has some global state in order to continue the work * incrementally across calls. */ static unsigned int current_db = 0; /* Next DB to test. */ @@ -336,10 +345,16 @@ void activeExpireCycleCore(int type) { if (type == ACTIVE_EXPIRE_CYCLE_FAST) { /* Don't start a fast cycle if the previous cycle did not exit - * for time limit. Also don't repeat a fast cycle for the same period + * for time limit, unless the percentage of estimated stale keys is + * too high. Also never repeat a fast cycle for the same period * as the fast cycle total duration itself. */ - if (!timelimit_exit) return; - if (start < last_fast_cycle + ACTIVE_EXPIRE_CYCLE_FAST_DURATION*2) return; + if (!timelimit_exit && + g_pserver->stat_expired_stale_perc < config_cycle_acceptable_stale) + return; + + if (start < last_fast_cycle + (long long)config_cycle_fast_duration*2) + return; + last_fast_cycle = start; } @@ -353,16 +368,16 @@ void activeExpireCycleCore(int type) { if (dbs_per_call > cserver.dbnum || timelimit_exit) dbs_per_call = cserver.dbnum; - /* We can use at max ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC percentage of CPU time - * per iteration. Since this function gets called with a frequency of - * g_pserver->hz times per second, the following is the max amount of + /* We can use at max 'config_cycle_slow_time_perc' percentage of CPU + * time per iteration. Since this function gets called with a frequency of + * server.hz times per second, the following is the max amount of * microseconds we can spend in this function. */ - timelimit = 1000000*ACTIVE_EXPIRE_CYCLE_SLOW_TIME_PERC/g_pserver->hz/100; + timelimit = config_cycle_slow_time_perc*1000000/g_pserver->hz/100; timelimit_exit = 0; if (timelimit <= 0) timelimit = 1; if (type == ACTIVE_EXPIRE_CYCLE_FAST) - timelimit = ACTIVE_EXPIRE_CYCLE_FAST_DURATION; /* in microseconds. */ + timelimit = config_cycle_fast_duration; /* in microseconds. */ /* Accumulate some global stats as we expire keys, to have some idea * about the number of keys that are already logically expired, but still @@ -371,6 +386,9 @@ void activeExpireCycleCore(int type) { long total_expired = 0; for (j = 0; j < dbs_per_call && timelimit_exit == 0; j++) { + /* Expired and checked in a single loop. */ + unsigned long expired, sampled; + redisDb *db = g_pserver->db[(current_db % cserver.dbnum)]; /* Increment the DB now so we are sure if we run out of time @@ -378,48 +396,156 @@ void activeExpireCycleCore(int type) { * distribute the time evenly across DBs. */ current_db++; - long long now; - iteration++; - now = mstime(); - - /* If there is nothing to expire try next DB ASAP. */ - if (db->setexpireUnsafe()->empty()) - { - db->avg_ttl = 0; - db->last_expire_set = now; - continue; - } - - std::unique_lock ul(g_expireLock); - size_t expired = 0; - size_t tried = 0; - long long check = ACTIVE_EXPIRE_CYCLE_FAST_DURATION; // assume a check is roughly 1us. It isn't but good enough - db->expireitr = db->setexpireUnsafe()->enumerate(db->expireitr, now, [&](expireEntry &e) __attribute__((always_inline)) { - if (e.when() < now) - { - expired += activeExpireCycleExpire(db, e, now, tried); - } + if (g_pserver->m_pstorageFactory == nullptr) { + /* Continue to expire if at the end of the cycle there are still + * a big percentage of keys to expire, compared to the number of keys + * we scanned. The percentage, stored in config_cycle_acceptable_stale + * is not fixed, but depends on the Redis configured "expire effort". */ + do { + unsigned long num, slots; + long long now, ttl_sum; + int ttl_samples; + iteration++; + + /* If there is nothing to expire try next DB ASAP. */ + if (db->expireSize() == 0) { + db->avg_ttl = 0; + break; + } + num = dictSize(db->m_pdict); + slots = dictSlots(db->m_pdict); + now = mstime(); + + /* When there are less than 1% filled slots, sampling the key + * space is expensive, so stop here waiting for better times... + * The dictionary will be resized asap. */ + if (slots > DICT_HT_INITIAL_SIZE && + (num*100/slots < 1)) break; + + /* The main collection cycle. Sample random keys among keys + * with an expire set, checking for expired ones. */ + expired = 0; + sampled = 0; + ttl_sum = 0; + ttl_samples = 0; + + if (num > config_keys_per_loop) + num = config_keys_per_loop; + + /* Here we access the low level representation of the hash table + * for speed concerns: this makes this code coupled with dict.c, + * but it hardly changed in ten years. + * + * Note that certain places of the hash table may be empty, + * so we want also a stop condition about the number of + * buckets that we scanned. However scanning for free buckets + * is very fast: we are in the cache line scanning a sequential + * array of NULL pointers, so we can scan a lot more buckets + * than keys in the same time. */ + long max_buckets = num*20; + long checked_buckets = 0; + + while (sampled < num && checked_buckets < max_buckets) { + for (int table = 0; table < 2; table++) { + if (table == 1 && !dictIsRehashing(db->m_pdict)) break; + + unsigned long idx = db->expires_cursor; + idx &= db->m_pdict->ht[table].sizemask; + dictEntry *de = db->m_pdict->ht[table].table[idx]; + long long ttl; + + /* Scan the current bucket of the current table. */ + checked_buckets++; + while(de) { + /* Get the next entry now since this entry may get + * deleted. */ + dictEntry *e = de; + robj *o = (robj*)dictGetVal(de); + de = de->next; + if (!o->FExpires()) + continue; + + expireEntry *exp = &o->expire; + + serverAssert(exp->when() > 0); + ttl = exp->when()-now; + size_t tried = 0; + if (exp->when() <= now) { + if (activeExpireCycleExpire(db,(const char*)dictGetKey(e),*exp,now,tried)) expired++; + serverAssert(ttl <= 0); + } else { + serverAssert(ttl > 0); + } + if (ttl > 0) { + /* We want the average TTL of keys yet + * not expired. */ + ttl_sum += ttl; + ttl_samples++; + } + sampled++; + } + } + db->expires_cursor++; + } + total_expired += expired; + total_sampled += sampled; + + /* Update the average TTL stats for this database. */ + if (ttl_samples) { + long long avg_ttl = ttl_sum/ttl_samples; + + /* Do a simple running average with a few samples. + * We just use the current estimate with a weight of 2% + * and the previous estimate with a weight of 98%. */ + if (db->avg_ttl == 0) db->avg_ttl = avg_ttl; + db->avg_ttl = (db->avg_ttl/50)*49 + (avg_ttl/50); + } - if ((tried % ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP) == 0) - { /* We can't block forever here even if there are many keys to * expire. So after a given amount of milliseconds return to the * caller waiting for the other active expire cycle. */ - elapsed = ustime()-start; - if (elapsed > timelimit) { - timelimit_exit = 1; - g_pserver->stat_expired_time_cap_reached_count++; - return false; + if ((iteration & 0xf) == 0) { /* check once every 16 iterations. */ + elapsed = ustime()-start; + if (elapsed > timelimit) { + timelimit_exit = 1; + g_pserver->stat_expired_time_cap_reached_count++; + break; + } } - check = ACTIVE_EXPIRE_CYCLE_FAST_DURATION; - } - return true; - }, &check); + /* We don't repeat the cycle for the current database if there are + * an acceptable amount of stale keys (logically expired but yet + * not reclaimed). */ + } while (sampled == 0 || + (expired*100/sampled) > config_cycle_acceptable_stale); + } else { + long prev_expired; + long long now = mstime(); + size_t tried = 0; + std::vector keys; + do { + prev_expired = total_expired; + keys = db->getStorageCache()->getExpirationCandidates(ACTIVE_EXPIRE_CYCLE_LOOKUPS_PER_LOOP); + for (std::string key : keys) { + robj* keyobj = createStringObject(key.c_str(), key.size()); + db->find(szFromObj(keyobj)); + expireEntry *e = db->getExpire(keyobj); + if (e != nullptr && e->when() < now) + total_expired += activeExpireCycleExpire(db, szFromObj(keyobj), *e, now, tried); + decrRefCount(keyobj); + } + total_sampled += keys.size(); + elapsed = ustime()-start; + } while (keys.size() > 0 && (elapsed < timelimit) && (total_expired - prev_expired) > 0); - total_expired += expired; + if (ustime()-start > timelimit) { + timelimit_exit = 1; + g_pserver->stat_expired_time_cap_reached_count++; + } + } } elapsed = ustime()-start; + g_pserver->stat_expire_cycle_time_used += elapsed; latencyAddSampleIfNeeded("expire-cycle",elapsed/1000); /* Update our estimate of keys existing but yet to be expired. @@ -435,7 +561,7 @@ void activeExpireCycleCore(int type) { void activeExpireCycle(int type) { - runAndPropogateToReplicas(activeExpireCycleCore, type); + runAndPropogateToReplicas(redisDbPersistentData::activeExpireCycleCore, type); } /*----------------------------------------------------------------------------- @@ -481,7 +607,6 @@ void expireSlaveKeys(void) { if (slaveKeysWithExpire == NULL || dictSize(slaveKeysWithExpire) == 0) return; - std::unique_lock ul(g_expireLock); int cycles = 0, noexpire = 0; mstime_t start = mstime(); while(1) { @@ -496,19 +621,14 @@ void expireSlaveKeys(void) { while(dbids && dbid < cserver.dbnum) { if ((dbids & 1) != 0) { redisDb *db = g_pserver->db[dbid]; - - // the expire is hashed based on the key pointer, so we need the point in the main db auto itrDB = db->find(keyname); - auto itrExpire = db->setexpire()->end(); - if (itrDB != nullptr) - itrExpire = db->setexpireUnsafe()->find(itrDB.key()); int expired = 0; - if (itrExpire != db->setexpire()->end()) + if (itrDB != db->end() && itrDB->FExpires()) { - if (itrExpire->when() < start) { + if (itrDB->expire.when() < start) { size_t tried = 0; - expired = activeExpireCycleExpire(g_pserver->db[dbid],*itrExpire,start,tried); + expired = activeExpireCycleExpire(g_pserver->db[dbid],itrDB.key(),itrDB->expire,start,tried); } } @@ -516,7 +636,7 @@ void expireSlaveKeys(void) { * corresponding bit in the new bitmap we set as value. * At the end of the loop if the bitmap is zero, it means we * no longer need to keep track of this key. */ - if (itrExpire != db->setexpire()->end() && !expired) { + if (itrDB != db->end() && itrDB->FExpires() && !expired) { noexpire++; new_dbids |= (uint64_t)1 << dbid; } @@ -694,7 +814,6 @@ void ttlGenericCommand(client *c, int output_ms) { /* The key exists. Return -1 if it has no expire, or the actual * TTL value otherwise. */ - std::unique_lock ul(g_expireLock); expireEntry *pexpire = c->db->getExpire(c->argv[1]); if (c->argc == 2) { @@ -784,18 +903,11 @@ expireEntryFat::~expireEntryFat() } expireEntryFat::expireEntryFat(const expireEntryFat &e) - : m_keyPrimary(e.m_keyPrimary), m_vecexpireEntries(e.m_vecexpireEntries) + : m_vecexpireEntries(e.m_vecexpireEntries) { // Note: dictExpires is not copied } -expireEntryFat::expireEntryFat(expireEntryFat &&e) - : m_keyPrimary(std::move(e.m_keyPrimary)), m_vecexpireEntries(std::move(e.m_vecexpireEntries)) -{ - m_dictIndex = e.m_dictIndex; - e.m_dictIndex = nullptr; -} - void expireEntryFat::createIndex() { serverAssert(m_dictIndex == nullptr); diff --git a/src/expire.h b/src/expire.h index d028a9671..5cdfbc702 100644 --- a/src/expire.h +++ b/src/expire.h @@ -22,9 +22,11 @@ class expireEntryFat {} subexpireEntry(const subexpireEntry &other) - : spsubkey((const char*)sdsdupshared(other.spsubkey.get()), sdsfree) + : spsubkey(nullptr, sdsfree) { when = other.when; + if (other.spsubkey != nullptr) + spsubkey = std::unique_ptr((const char*)sdsdupshared(other.spsubkey.get()), sdsfree); } subexpireEntry(subexpireEntry &&) = default; @@ -41,27 +43,30 @@ class expireEntryFat }; private: - sdsimmutablestring m_keyPrimary; std::vector m_vecexpireEntries; // Note a NULL for the sds portion means the expire is for the primary key dict *m_dictIndex = nullptr; + long long m_whenPrimary = LLONG_MAX; void createIndex(); public: - expireEntryFat(const sdsimmutablestring &keyPrimary) - : m_keyPrimary(keyPrimary) - {} + expireEntryFat() = default; + expireEntryFat(const expireEntryFat &); ~expireEntryFat(); - expireEntryFat(const expireEntryFat &e); - expireEntryFat(expireEntryFat &&e); - long long when() const noexcept { return m_vecexpireEntries.front().when; } - const char *key() const noexcept { return static_cast(m_keyPrimary); } bool operator<(long long when) const noexcept { return this->when() < when; } void expireSubKey(const char *szSubkey, long long when); + bool FGetPrimaryExpire(long long *pwhen) const { + if (m_whenPrimary != LLONG_MAX) { + *pwhen = m_whenPrimary; + return true; + } + return false; + } + bool FEmpty() const noexcept { return m_vecexpireEntries.empty(); } const subexpireEntry &nextExpireEntry() const noexcept { return m_vecexpireEntries.front(); } void popfrontExpireEntry(); @@ -70,19 +75,11 @@ class expireEntryFat }; class expireEntry { - struct - { - sdsimmutablestring m_key; - expireEntryFat *m_pfatentry = nullptr; - } u; - long long m_when; // bit wise and with FFatMask means this is a fat entry and we should use the pointer - - /* Mask to check if an entry is Fat, most significant bit of m_when being set means it is Fat otherwise it is not */ - long long FFatMask() const noexcept { - return (1LL) << (sizeof(long long)*CHAR_BIT - 1); - } - - expireEntry() = default; + struct { + uint64_t m_whenAndPtrUnion : 63, + fFat : 1; + } s; + static_assert(sizeof(expireEntryFat*) <= sizeof(int64_t), "The pointer must fit in the union"); public: class iter { @@ -118,93 +115,108 @@ class expireEntry { const iter &operator*() const { return *this; } }; - expireEntry(sds key, const char *subkey, long long when) + expireEntry() + { + s.fFat = 0; + s.m_whenAndPtrUnion = 0; + } + + expireEntry(const char *subkey, long long when) { if (subkey != nullptr) { - m_when = FFatMask() | INVALID_EXPIRE; - u.m_pfatentry = new (MALLOC_LOCAL) expireEntryFat(sdsimmutablestring(sdsdupshared(key))); - u.m_pfatentry->expireSubKey(subkey, when); + auto pfatentry = new (MALLOC_LOCAL) expireEntryFat(); + pfatentry->expireSubKey(subkey, when); + s.m_whenAndPtrUnion = reinterpret_cast(pfatentry); + s.fFat = true; } else { - u.m_key = sdsimmutablestring(sdsdupshared(key)); - m_when = when; + s.m_whenAndPtrUnion = when; + s.fFat = false; } } - expireEntry(const expireEntry &e) + expireEntry(expireEntryFat *pfatentry) { - *this = e; + assert(pfatentry != nullptr); + s.m_whenAndPtrUnion = reinterpret_cast(pfatentry); + s.fFat = true; } + + expireEntry(const expireEntry &e) { + if (e.FFat()) { + s.m_whenAndPtrUnion = reinterpret_cast(new expireEntryFat(*e.pfatentry())); + s.fFat = true; + } else { + s = e.s; + } + } + expireEntry(expireEntry &&e) { - u.m_key = std::move(e.u.m_key); - u.m_pfatentry = std::move(e.u.m_pfatentry); - m_when = e.m_when; - e.m_when = 0; - e.u.m_pfatentry = nullptr; + s = e.s; } - expireEntry(expireEntryFat *pfatentry) + expireEntry &operator=(expireEntry &&e) { - u.m_pfatentry = pfatentry; - m_when = FFatMask() | INVALID_EXPIRE; - for (auto itr : *this) - { - if (itr.subkey() == nullptr) - { - m_when = FFatMask() | itr.when(); - break; - } + if (FFat()) + delete pfatentry(); + s = e.s; + e.s.m_whenAndPtrUnion = 0; + e.s.fFat = false; + return *this; + } + + expireEntry &operator=(expireEntry &e) { + if (FFat()) + delete pfatentry(); + if (e.FFat()) { + s.m_whenAndPtrUnion = reinterpret_cast(new expireEntryFat(*e.pfatentry())); + s.fFat = true; + } else { + s = e.s; } + return *this; } // Duplicate the expire, note this is intended to be passed directly to setExpire expireEntry duplicate() const { expireEntry dst; - dst.m_when = m_when; if (FFat()) { - dst.u.m_pfatentry = new expireEntryFat(*u.m_pfatentry); + auto pfatentry = new expireEntryFat(*expireEntry::pfatentry()); + dst.s.m_whenAndPtrUnion = reinterpret_cast(pfatentry); + dst.s.fFat = true; } else { - dst.u.m_key = u.m_key; + dst.s.m_whenAndPtrUnion = s.m_whenAndPtrUnion; + dst.s.fFat = false; } return dst; } - ~expireEntry() - { + void reset() { if (FFat()) - delete u.m_pfatentry; - } - - expireEntry &operator=(const expireEntry &e) - { - u.m_key = e.u.m_key; - m_when = e.m_when; - if (e.FFat()) - u.m_pfatentry = new (MALLOC_LOCAL) expireEntryFat(*e.u.m_pfatentry); - return *this; + delete pfatentry(); + s.fFat = false; + s.m_whenAndPtrUnion = 0; } - void setKeyUnsafe(sds key) + ~expireEntry() { if (FFat()) - u.m_pfatentry->m_keyPrimary = sdsimmutablestring(sdsdupshared(key)); - else - u.m_key = sdsimmutablestring(sdsdupshared(key)); + delete pfatentry(); } - inline bool FFat() const noexcept { return m_when & FFatMask(); } - expireEntryFat *pfatentry() { assert(FFat()); return u.m_pfatentry; } - const expireEntryFat *pfatentry() const { assert(FFat()); return u.m_pfatentry; } - - - bool operator==(const sdsview &key) const noexcept - { - return key == this->key(); + inline bool FFat() const noexcept { return s.fFat; } + expireEntryFat *pfatentry() { + assert(FFat()); + return reinterpret_cast(s.m_whenAndPtrUnion); + } + const expireEntryFat *pfatentry() const { + return const_cast(this)->pfatentry(); } + bool operator<(const expireEntry &e) const noexcept { return when() < e.when(); @@ -214,17 +226,11 @@ class expireEntry { return this->when() < when; } - const char *key() const noexcept - { - if (FFat()) - return u.m_pfatentry->key(); - return static_cast(u.m_key); - } long long when() const noexcept { if (FFat()) - return u.m_pfatentry->when(); - return FGetPrimaryExpire(); + return pfatentry()->when(); + return s.m_whenAndPtrUnion; } void update(const char *subkey, long long when) @@ -233,30 +239,27 @@ class expireEntry { { if (subkey == nullptr) { - m_when = when; + s.m_whenAndPtrUnion = when; return; } else { // we have to upgrade to a fat entry - long long whenT = m_when; - sdsimmutablestring keyPrimary = u.m_key; - m_when |= FFatMask(); - u.m_pfatentry = new (MALLOC_LOCAL) expireEntryFat(keyPrimary); - u.m_pfatentry->expireSubKey(nullptr, whenT); + auto pfatentry = new (MALLOC_LOCAL) expireEntryFat(); + pfatentry->expireSubKey(nullptr, s.m_whenAndPtrUnion); + s.m_whenAndPtrUnion = reinterpret_cast(pfatentry); + s.fFat = true; // at this point we're fat so fall through } } - if (subkey == nullptr) - m_when = when | FFatMask(); - u.m_pfatentry->expireSubKey(subkey, when); + pfatentry()->expireSubKey(subkey, when); } iter begin() const { return iter(this, 0); } iter end() const { if (FFat()) - return iter(this, u.m_pfatentry->size()); + return iter(this, pfatentry()->size()); return iter(this, 1); } @@ -268,26 +271,39 @@ class expireEntry { pfatentry()->m_vecexpireEntries.begin() + itr.m_idx); } - size_t size() const - { + size_t size() const { if (FFat()) - return u.m_pfatentry->size(); + return pfatentry()->size(); return 1; } - long long FGetPrimaryExpire() const noexcept + bool FGetPrimaryExpire(long long *pwhen) const noexcept { - return m_when & (~FFatMask()); + if (FFat()) { + return pfatentry()->FGetPrimaryExpire(pwhen); + } else { + *pwhen = s.m_whenAndPtrUnion; + return true; + } } - bool FGetPrimaryExpire(long long *pwhen) const noexcept - { - *pwhen = FGetPrimaryExpire(); - return *pwhen != INVALID_EXPIRE; + void *release_as_void() { + uint64_t whenT = s.m_whenAndPtrUnion; + whenT |= static_cast(s.fFat) << 63; + s.m_whenAndPtrUnion = 0; + s.fFat = 0; + return reinterpret_cast(whenT); + } + + static expireEntry *from_void(void **src) { + uintptr_t llV = reinterpret_cast(src); + return reinterpret_cast(llV); + } + static const expireEntry *from_void(void *const*src) { + uintptr_t llV = reinterpret_cast(src); + return reinterpret_cast(llV); } - explicit operator sdsview() const noexcept { return key(); } explicit operator long long() const noexcept { return when(); } }; -typedef semiorderedset expireset; -extern fastlock g_expireLock; \ No newline at end of file +static_assert(sizeof(expireEntry) == sizeof(long long), "This must fit in a long long so it can be put in a dictEntry"); diff --git a/src/lazyfree.cpp b/src/lazyfree.cpp index fe76b2f4a..90c2e26cb 100644 --- a/src/lazyfree.cpp +++ b/src/lazyfree.cpp @@ -20,11 +20,9 @@ void lazyfreeFreeObject(void *args[]) { * when the database was logically deleted. */ void lazyfreeFreeDatabase(void *args[]) { dict *ht1 = (dict *) args[0]; - expireset *setexpire = (expireset *) args[1]; size_t numkeys = dictSize(ht1); dictRelease(ht1); - delete setexpire; atomicDecr(lazyfree_objects,numkeys); atomicIncr(lazyfreed_objects,numkeys); } @@ -217,17 +215,15 @@ void freeObjAsync(robj *key, robj *obj) { * create a new empty set of hash tables and scheduling the old ones for * lazy freeing. */ void redisDbPersistentData::emptyDbAsync() { - std::unique_lock ul(g_expireLock); dict *oldht1 = m_pdict; - auto *set = m_setexpire; - m_setexpire = new (MALLOC_LOCAL) expireset(); m_pdict = dictCreate(&dbDictType,this); if (m_spstorage != nullptr) m_spstorage->clearAsync(); if (m_fTrackingChanges) m_fAllChanged = true; atomicIncr(lazyfree_objects,dictSize(oldht1)); - bioCreateLazyFreeJob(lazyfreeFreeDatabase,2,oldht1,set); + m_numexpires = 0; + bioCreateLazyFreeJob(lazyfreeFreeDatabase,2,oldht1,nullptr); } /* Release the radix tree mapping Redis Cluster keys to slots asynchronously. */ diff --git a/src/module.cpp b/src/module.cpp index eceb80c75..990268245 100644 --- a/src/module.cpp +++ b/src/module.cpp @@ -690,8 +690,9 @@ void moduleHandlePropagationAfterCommandCallback(RedisModuleCtx *ctx) { } /* Free the context after the user function was called. */ -void moduleFreeContext(RedisModuleCtx *ctx) { - moduleHandlePropagationAfterCommandCallback(ctx); +void moduleFreeContext(RedisModuleCtx *ctx, bool propogate) { + if (propogate) + moduleHandlePropagationAfterCommandCallback(ctx); autoMemoryCollect(ctx); poolAllocRelease(ctx); if (ctx->postponed_arrays) { @@ -2442,11 +2443,10 @@ int RM_UnlinkKey(RedisModuleKey *key) { * If no TTL is associated with the key or if the key is empty, * REDISMODULE_NO_EXPIRE is returned. */ mstime_t RM_GetExpire(RedisModuleKey *key) { - std::unique_lock ul(g_expireLock); - expireEntry *pexpire = key->db->getExpire(key->key); + auto itr = key->db->find(key->key); mstime_t expire = INVALID_EXPIRE; - if (pexpire != nullptr) - pexpire->FGetPrimaryExpire(&expire); + if (itr->FExpires()) + itr->expire.FGetPrimaryExpire(&expire); if (expire == INVALID_EXPIRE || key->value == NULL) return REDISMODULE_NO_EXPIRE; diff --git a/src/modules/Makefile b/src/modules/Makefile index 3db19e79a..64014afc8 100644 --- a/src/modules/Makefile +++ b/src/modules/Makefile @@ -13,7 +13,10 @@ endif .SUFFIXES: .c .so .xo .o -all: helloworld.so hellotype.so helloblock.so hellocluster.so hellotimer.so hellodict.so hellohook.so helloacl.so +all: helloworld.so hellotype.so helloblock.so hellocluster.so hellotimer.so hellodict.so hellohook.so helloacl.so build-keydb_modstatsd + +build-keydb_modstatsd: + $(MAKE) -C keydb_modstatsd .c.xo: $(CC) -I. $(CFLAGS) $(SHOBJ_CFLAGS) -fPIC -c $< -o $@ @@ -60,3 +63,4 @@ helloacl.so: helloacl.xo clean: rm -rf *.xo *.so + $(MAKE) -C keydb_modstatsd clean diff --git a/src/modules/keydb_modstatsd/Makefile b/src/modules/keydb_modstatsd/Makefile new file mode 100644 index 000000000..f0ccea39f --- /dev/null +++ b/src/modules/keydb_modstatsd/Makefile @@ -0,0 +1,13 @@ +MODULE_FLAGS := -fPIC -O2 -Wall -Werror + +OBJECT_FILES := modmain.o +MODSNAP_CXX_FLAGS := -std=gnu++14 + +%.o: %.cpp + $(CXX) -o $@ -c $< $(MODULE_FLAGS) -I../../../deps/cpp-statsd-client/include $(MODSNAP_CXX_FLAGS) -g + +modstatsd.so: $(OBJECT_FILES) + $(CXX) -shared $(OBJECT_FILES) -o modstatsd.so + +clean: + rm -f $(OBJECT_FILES) modstatsd.so diff --git a/src/modules/keydb_modstatsd/modmain.cpp b/src/modules/keydb_modstatsd/modmain.cpp new file mode 100644 index 000000000..5feafc284 --- /dev/null +++ b/src/modules/keydb_modstatsd/modmain.cpp @@ -0,0 +1,722 @@ +#include "redismodule.h" +#include +#include +#include +#include +#include +#ifdef __linux__ +#include +#include +#endif +#include +#include +#include +#include +#include +#include + +using namespace Statsd; + +class StatsdClientWrapper +{ +private: + StatsdClient *m_stats; + StatsdClient *m_stats_noprefix; + +public: + StatsdClientWrapper(const std::string& host, + const uint16_t port, + const std::string& prefix, + const uint64_t batchsize, + const uint64_t sendInterval) { + m_stats = new StatsdClient(host, port, prefix, batchsize, sendInterval); + m_stats_noprefix = new StatsdClient(host, port, "keydb", batchsize, sendInterval); + } + + ~StatsdClientWrapper() { + delete m_stats; + delete m_stats_noprefix; + } + + void increment(const std::string& key, const bool prefixOnly = true) { + m_stats->increment(key); + if (!prefixOnly) m_stats_noprefix->increment(key); + } + + void decrement(const std::string& key, const bool prefixOnly = true) { + m_stats->decrement(key); + if (!prefixOnly) m_stats_noprefix->decrement(key); + } + + void count(const std::string& key, const int delta, const bool prefixOnly = true) { + m_stats->count(key, delta); + if (!prefixOnly) m_stats_noprefix->count(key, delta); + } + + template + void gauge(const std::string& key, const T value, const bool prefixOnly = true) { + m_stats->gauge(key, value); + if (!prefixOnly) m_stats_noprefix->gauge(key, value); + } + + void timing(const std::string& key, const unsigned int ms, const bool prefixOnly = true) { + m_stats->timing(key, ms); + if (!prefixOnly) m_stats_noprefix->timing(key, ms); + } +}; + +/* constants */ +static time_t c_infoUpdateSeconds = 10; +// the current Redis Cluster setup we configure replication factor as 2, each non-empty master node should have 2 replicas, given that there are 3 zones in each regions +static const int EXPECTED_NUMBER_OF_REPLICAS = 2; + +StatsdClientWrapper *g_stats = nullptr; +std::string m_strPrefix { "keydb" }; + +const std::regex g_replica_or_db_info_regex { "^(slave|db)(\\d+)" }; +const char *g_string_counter_separator = "__"; +const uint64_t g_stats_buffer_size_bytes = 1600; +std::string nodeName; +int unameResult; + +enum class StatsD_Type { + STATSD_GAUGE_LONGLONG, + STATSD_GAUGE_FLOAT, + STATSD_GAUGE_BYTES, + STATSD_DELTA, + STATSD_COUNTER_STRING +}; + +struct StatsRecord { + StatsD_Type type; + bool prefixOnly = true; + const char *szAlternate = nullptr; + + /* Dynamic Values */ + long long prevVal = 0; +}; + +std::unordered_map g_mapInfoFields = { + // info + { "used_memory", { StatsD_Type::STATSD_GAUGE_BYTES, false /* prefixOnly */}}, + { "used_memory_rss", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "maxmemory", { StatsD_Type::STATSD_GAUGE_BYTES, false /* prefixOnly */}}, + { "used_memory_dataset_perc", { StatsD_Type::STATSD_GAUGE_FLOAT }}, + { "avg_lock_contention", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "repl_backlog_size", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "connected_slaves", { StatsD_Type::STATSD_GAUGE_LONGLONG, true, "connected_replicas" }}, + { "errorstat_ERR", { StatsD_Type::STATSD_DELTA }}, + { "connected_clients", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "cluster_connections", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "instantaneous_ops_per_sec", { StatsD_Type::STATSD_GAUGE_LONGLONG, false /* prefixOnly */}}, + { "instantaneous_input_kbps", { StatsD_Type::STATSD_GAUGE_FLOAT, false /* prefixOnly */}}, + { "instantaneous_output_kbps", { StatsD_Type::STATSD_GAUGE_FLOAT, false /* prefixOnly */}}, + { "server_threads", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "mvcc_depth", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "sync_full", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "sync_partial_ok", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "sync_partial_err", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "rdb_bgsave_in_progress", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "rdb_last_bgsave_time_sec", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "used_memory_overhead", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "master_sync_in_progress", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "uptime_in_seconds", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "hz", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "configured_hz", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "maxclients", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "client_recent_max_input_buffer", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "client_recent_max_output_buffer", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "blocked_clients", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "tracking_clients", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "clients_in_timeout_table", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "used_memory_peak", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "used_memory_startup", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "used_memory_dataset", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "allocator_allocated", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "allocator_active", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "allocator_resident", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "total_system_memory", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "used_memory_lua", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "used_memory_scripts", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "number_of_cached_scripts", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "allocator_frag_ratio", { StatsD_Type::STATSD_GAUGE_FLOAT }}, + { "allocator_frag_bytes", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "allocator_rss_ratio", { StatsD_Type::STATSD_GAUGE_FLOAT }}, + { "allocator_rss_bytes", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "rss_overhead_ratio", { StatsD_Type::STATSD_GAUGE_FLOAT }}, + { "rss_overhead_bytes", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "mem_fragmentation_ratio", { StatsD_Type::STATSD_GAUGE_FLOAT }}, + { "mem_fragmentation_bytes", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "mem_not_counted_for_evict", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "mem_replication_backlog", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "mem_clients_slaves", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "mem_clients_normal", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "mem_aof_buffer", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "active_defrag_running", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "lazyfree_pending_objects", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "lazyfreed_objects", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "loading", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "current_cow_peak", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "current_cow_size", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "current_cow_size_age", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "current_fork_perc", { StatsD_Type::STATSD_GAUGE_FLOAT }}, + { "current_save_keys_processed", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "current_save_keys_total", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "rdb_changes_since_last_save", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "rdb_last_save_time", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "rdb_last_cow_size", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "aof_enabled", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "aof_rewrite_in_progress", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "aof_rewrite_scheduled", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "aof_last_cow_size", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "module_fork_in_progress", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "module_fork_last_cow_size", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "aof_current_size", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "aof_base_size", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "aof_pending_rewrite", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "aof_buffer_length", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "aof_rewrite_buffer_length", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "aof_pending_bio_fsync", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "aof_delayed_fsync", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "total_connections_received", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "total_commands_processed", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "total_net_input_bytes", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "total_net_output_bytes", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "rejected_connections", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "expired_keys", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "expired_stale_perc", { StatsD_Type::STATSD_GAUGE_FLOAT }}, + { "expired_time_cap_reached_count", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "expire_cycle_cpu_milliseconds", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "evicted_keys", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "keyspace_hits", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "keyspace_misses", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "pubsub_channels", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "pubsub_patterns", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "latest_fork_usec", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "total_forks", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "migrate_cached_sockets", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "slave_expires_tracked_keys", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "active_defrag_hits", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "active_defrag_misses", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "active_defrag_key_hits", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "active_defrag_key_misses", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "tracking_total_keys", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "tracking_total_items", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "tracking_total_prefixes", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "unexpected_error_replies", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "total_error_replies", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "dump_payload_sanitizations", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "total_reads_processed", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "total_writes_processed", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "instantaneous_lock_contention", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "avg_lock_contention", { StatsD_Type::STATSD_GAUGE_FLOAT }}, + { "storage_provider_read_hits", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "storage_provider_read_misses", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "repl_backlog_active", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "repl_backlog_size", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "repl_backlog_first_byte_offset", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "repl_backlog_histlen", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "used_cpu_sys", { StatsD_Type::STATSD_GAUGE_FLOAT }}, + { "used_cpu_user", { StatsD_Type::STATSD_GAUGE_FLOAT }}, + { "used_cpu_sys_children", { StatsD_Type::STATSD_GAUGE_FLOAT }}, + { "used_cpu_user_children", { StatsD_Type::STATSD_GAUGE_FLOAT }}, + { "used_cpu_user_children", { StatsD_Type::STATSD_GAUGE_FLOAT }}, + { "long_lock_waits", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "used_cpu_sys_main_thread", { StatsD_Type::STATSD_GAUGE_FLOAT }}, + { "used_cpu_user_main_thread", { StatsD_Type::STATSD_GAUGE_FLOAT }}, + { "master_sync_total_bytes", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "master_sync_read_bytes", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "master_sync_last_io_seconds_ago", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "master_link_down_since_seconds", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "maxmemory_policy", { StatsD_Type::STATSD_COUNTER_STRING }}, + { "role", { StatsD_Type::STATSD_COUNTER_STRING }}, + { "master_global_link_status", { StatsD_Type::STATSD_COUNTER_STRING }}, + { "master_link_status", { StatsD_Type::STATSD_COUNTER_STRING }}, + { "master_failover_state", { StatsD_Type::STATSD_COUNTER_STRING }}, + { "rdb_last_bgsave_status", { StatsD_Type::STATSD_COUNTER_STRING }}, + { "rdb_saves", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "instantaneous_input_repl_kbps", { StatsD_Type::STATSD_GAUGE_FLOAT }}, + { "instantaneous_output_repl_kbps", { StatsD_Type::STATSD_GAUGE_FLOAT }}, + { "master_host", { StatsD_Type::STATSD_COUNTER_STRING }}, + { "master_repl_offset", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "second_repl_offset", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "slave_repl_offset", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "redis_version", { StatsD_Type::STATSD_COUNTER_STRING }}, + { "redis_git_sha1", { StatsD_Type::STATSD_COUNTER_STRING }}, + // cluster info + { "cluster_state", { StatsD_Type::STATSD_COUNTER_STRING }}, + { "cluster_slots_assigned", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "cluster_slots_ok", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "cluster_slots_pfail", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "cluster_slots_fail", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "cluster_known_nodes", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "cluster_size", { StatsD_Type::STATSD_GAUGE_LONGLONG }}, + { "storage_flash_available_bytes", { StatsD_Type::STATSD_GAUGE_BYTES }}, + { "storage_flash_total_bytes", { StatsD_Type::STATSD_GAUGE_BYTES }}, +}; + +/* Globals */ +static uint64_t g_cclients = 0; + +long long ustime(void) { + struct timeval tv; + long long ust; + + gettimeofday(&tv, NULL); + ust = ((long long)tv.tv_sec)*1000000; + ust += tv.tv_usec; + return ust; +} + +void event_client_change_handler(struct RedisModuleCtx *ctx, RedisModuleEvent eid, uint64_t subevent, void *data) { + if (eid.id != REDISMODULE_EVENT_CLIENT_CHANGE) + return; + + uint64_t clientsStart = g_cclients; + switch (subevent) { + case REDISMODULE_SUBEVENT_CLIENT_CHANGE_CONNECTED: + ++g_cclients; + g_stats->increment("clients_added"); + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "Emitting metric increment for \"clients_added\""); + break; + + case REDISMODULE_SUBEVENT_CLIENT_CHANGE_DISCONNECTED: + --g_cclients; + g_stats->increment("clients_disconnected"); + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "Emitting metric increment for \"clients_disconnected\""); + break; + } + + if (g_cclients != clientsStart) { + g_stats->gauge("clients", g_cclients); + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "Emitting metric \"clients\": %" PRIu64, g_cclients); + } +} + +void handleStatItem(struct RedisModuleCtx *ctx, std::string name, StatsRecord &record, const char *pchValue) { + switch (record.type) { + case StatsD_Type::STATSD_GAUGE_LONGLONG: { + long long val = strtoll(pchValue, nullptr, 10); + g_stats->gauge(name, val, record.prefixOnly); + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "Emitting metric \"%s\": %lld", name.c_str(), val); + break; + } + + case StatsD_Type::STATSD_GAUGE_FLOAT: { + double val = strtod(pchValue, nullptr); + g_stats->gauge(name, val, record.prefixOnly); + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "Emitting metric \"%s\": %f", name.c_str(), val); + break; + } + + case StatsD_Type::STATSD_GAUGE_BYTES: { + long long val = strtoll(pchValue, nullptr, 10); + g_stats->gauge(name + "_MB", val / 1024/1024, record.prefixOnly); + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "Emitting metric \"%s\": %llu", (name + "_MB").c_str(), val / 1024/1024); + break; + } + + case StatsD_Type::STATSD_DELTA: { + long long val = strtoll(pchValue, nullptr, 10); + g_stats->count(name, val - record.prevVal, record.prefixOnly); + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "Emitting metric count for \"%s\": %lld", name.c_str() , val - record.prevVal); + record.prevVal = val; + break; + } + + case StatsD_Type::STATSD_COUNTER_STRING: { + // parse val string + const char *pNewLine = strchr(pchValue, '\r'); + if (pNewLine == nullptr) { + pNewLine = strchr(pchValue, '\n'); + } + if (pNewLine == nullptr) { + g_stats->increment("STATSD_COUNTER_STRING_failed", 1); + return; + } + std::string val(pchValue, pNewLine - pchValue); + std::replace(val.begin(), val.end(), '.', '-'); + // metrics emit + std::string metricsName = name + g_string_counter_separator + val; + g_stats->increment(metricsName, 1); + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "Emitting metric \"%s\"", metricsName.c_str()); + break; + } + + default: + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_WARNING, "Unknown stats record type for the key \"%s\": %u", name.c_str(), (unsigned)record.type); + break; + } +} + +void handleErrorStatItem(struct RedisModuleCtx *ctx, std::string name, std::string rest) { + size_t idx = rest.find('='); + if (idx != std::string::npos) { + std::string statValue = rest.substr(idx + 1); + long long val = strtoll(statValue.c_str(), nullptr, 10); + g_stats->gauge(name, val); + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "Emitting metric \"%s\": %lld", name.c_str(), val); + } else { + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_WARNING, "Unexpected errorstat line format returned by \"INFO\" command: \"%s\"", (name + rest).c_str()); + } +} + +void handleReplicaOrDbInfoItem(struct RedisModuleCtx *ctx, std::string name, std::string rest) { + //use a stringstream to extract each metric of the form = + std::stringstream metrics(rest); + while (metrics.good()) { + std::string metric; + std::getline(metrics, metric, ','); + size_t idx = metric.find('='); + if (idx != std::string::npos) { + std::string stat = metric.substr(0, idx); + std::string statName = name + '-' + stat; + //idx + 1 to ignore the = sign + std::string statValue = metric.substr(idx + 1); + // string values + if (stat == "ip" || stat == "state") { + std::replace(statValue.begin(), statValue.end(), '.', '-'); + statName += g_string_counter_separator + statValue; + g_stats->increment(statName, 1); + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "Emitting metric \"%s\"", statName.c_str()); + } else { + long long val = strtoll(statValue.c_str(), nullptr, 10); + g_stats->gauge(statName, val); + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "Emitting metric \"%s\": %lld", statName.c_str(), val); + } + } + } +} + +void handle_info_response(struct RedisModuleCtx *ctx, const char *szReply, size_t len, const char *command) { + + #define SAFETY_CHECK_POINTER(_p) ((_p) < (szReply + len)) + + // Parse the INFO reply string line by line + const char *pchLineStart = szReply; + + while (SAFETY_CHECK_POINTER(pchLineStart) && *pchLineStart != '\0') { + // Loop Each Line + const char *pchColon = pchLineStart; + while (SAFETY_CHECK_POINTER(pchColon) && *pchColon != ':' && *pchColon != '\r') { + ++pchColon; + } + if (!SAFETY_CHECK_POINTER(pchColon)) { + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_WARNING, "Unexpected line termination when parsing response from %s command: \"%s\"", command, pchLineStart); + break; // BUG + } + const char *pchLineEnd = pchColon; + while (SAFETY_CHECK_POINTER(pchLineEnd) && *pchLineEnd != '\n') + ++pchLineEnd; + + std::string strCheck(pchLineStart, pchColon - pchLineStart); + if (strCheck.find("errorstat_") != std::string::npos) { + std::string remainder(pchColon + 1, pchLineEnd - (pchColon + 1)); + handleErrorStatItem(ctx, strCheck, remainder); + } else if (std::regex_match(strCheck, g_replica_or_db_info_regex)) { + std::string remainder(pchColon + 1, pchLineEnd - (pchColon + 1)); + handleReplicaOrDbInfoItem(ctx, strCheck, remainder); + } else { + auto itr = g_mapInfoFields.find(strCheck); + if (itr != g_mapInfoFields.end()) { + // This is an info field we care about + if (itr->second.szAlternate != nullptr) + strCheck = itr->second.szAlternate; + handleStatItem(ctx, strCheck, itr->second, pchColon+1); + } + } + + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "INFO response line: \"%s\"", std::string(pchLineStart, pchLineEnd - pchLineStart).c_str()); + pchLineStart = pchLineEnd + 1; // start of next line, if we're over the loop will catch it + } + + #undef SAFETY_CHECK_POINTER +} + +void handle_cluster_nodes_response(struct RedisModuleCtx *ctx, const char *szReply, size_t len) { + #define SAFETY_CHECK_POINTER(_p) ((_p) < (szReply + len)) + const char *pchLineStart = szReply; + long long primaries = 0; + long long replicas = 0; + while (SAFETY_CHECK_POINTER(pchLineStart) && *pchLineStart != '\0') { + // Loop Each Line + const char *pchLineEnd = pchLineStart; + while (SAFETY_CHECK_POINTER(pchLineEnd) && (*pchLineEnd != '\r') && (*pchLineEnd != '\n')) { + ++pchLineEnd; + } + std::string line(pchLineStart, pchLineEnd - pchLineStart); + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "Cluster Nodes Line: \"%s\"", line.c_str()); + if (std::string::npos != line.find("master")) { + ++primaries; + } else if (std::string::npos != line.find("slave")) { + ++replicas; + } else { + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_WARNING, "Unexpected NODE format returned by \"CLUSTER NODES\" command: \"%s\"", line.c_str()); + } + // emit myself stat + if (line.find("myself") != std::string::npos) { + size_t firstSpaceIdx = line.find(' '); + // emit cluster node id + if (firstSpaceIdx != std::string::npos) { + std::string nodeIdStat = "cluster_node_id"; + nodeIdStat += g_string_counter_separator + line.substr(0, firstSpaceIdx); + g_stats->increment(nodeIdStat); + } + // emit node ip + size_t firstColonIdx = line.find(':'); + if (firstColonIdx != std::string::npos) { + std::string nodeIpStat = "cluster_node_ip"; + std::string nodeIP = line.substr(firstSpaceIdx+1, firstColonIdx-firstSpaceIdx-1); + std::replace(nodeIP.begin(), nodeIP.end(), '.', '-'); + nodeIpStat += g_string_counter_separator + nodeIP; + g_stats->increment(nodeIpStat); + } + } + pchLineStart = pchLineEnd; + while (SAFETY_CHECK_POINTER(pchLineStart) && ((*pchLineStart == '\r') || (*pchLineStart == '\n'))) { + ++pchLineStart; + } + } + g_stats->gauge("primaries", primaries); + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "Emitting metric \"primaries\": %llu", primaries); + g_stats->gauge("replicas", replicas); + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "Emitting metric \"replicas\": %llu", replicas); + #undef SAFETY_CHECK_POINTER +} + +void handle_client_list_response(struct RedisModuleCtx *ctx, const char *szReply, size_t len) { + size_t totalClientOutputBuffer = 0; + size_t totalReplicaClientOutputBuffer = 0; + #define SAFETY_CHECK_POINTER(_p) ((_p) < (szReply + len)) + const char *pchLineStart = szReply; + while (SAFETY_CHECK_POINTER(pchLineStart) && *pchLineStart != '\0') { + // Loop Each Line + const char *pchLineEnd = pchLineStart; + while (SAFETY_CHECK_POINTER(pchLineEnd) && (*pchLineEnd != '\r') && (*pchLineEnd != '\n')) { + ++pchLineEnd; + } + std::string line(pchLineStart, pchLineEnd - pchLineStart); + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "Client List Line: \"%s\"", line.c_str()); + + // recover output buffer size for client + bool lineFailed = false; + bool replica = line.find("flags=S") != std::string::npos; + size_t idx = line.find("omem"); + if (!(lineFailed = (idx == std::string::npos))) { + std::string rest = line.substr(idx); + size_t startIdx = rest.find("="); + if (!(lineFailed = (startIdx == std::string::npos))) { + size_t endIdx = rest.find(" "); + if (!(lineFailed = (endIdx == std::string::npos))) { + // use startIdx + 1 and endIdx - 1 to exclude the '=' and ' ' characters + std::string valueString = rest.substr(startIdx + 1, (endIdx - 1) - (startIdx + 1)); + size_t value = strtoll(valueString.c_str(), nullptr, 10); + totalClientOutputBuffer += value; + if (replica) { + totalReplicaClientOutputBuffer += value; + } + } + } + } + + if (lineFailed) { + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_WARNING, "Unexpected CLIENT format returned by \"CLIENT LIST\" command: \"%s\"", line.c_str()); + } + + pchLineStart = pchLineEnd; + while (SAFETY_CHECK_POINTER(pchLineStart) && ((*pchLineStart == '\r') || (*pchLineStart == '\n'))) { + ++pchLineStart; + } + } + #undef SAFETY_CHECK_POINTER + g_stats->gauge("total_client_output_buffer", totalClientOutputBuffer); + g_stats->gauge("total_replica_client_output_buffer", totalReplicaClientOutputBuffer); +} + +void emit_system_free_memory() { + std::ifstream meminfo("/proc/meminfo"); + std::string line; + while (std::getline(meminfo, line)) { + if (line.find("MemAvailable:") != std::string::npos) { + unsigned long memAvailableInKB; + std::sscanf(line.c_str(), "MemAvailable: %lu kB", &memAvailableInKB); + g_stats->gauge("systemAvailableMemory_MB", memAvailableInKB / 1024); + return; + } + } +} + +void emit_metrics_for_insufficient_replicas(struct RedisModuleCtx *ctx, long long keys) { + // non-empty + if (keys <= 0) { + return; + } + RedisModuleCallReply *reply = RedisModule_Call(ctx, "ROLE", ""); + if (RedisModule_CallReplyType(reply) != REDISMODULE_REPLY_ARRAY) { + RedisModule_FreeCallReply(reply); + return; + } + RedisModuleCallReply *roleReply = RedisModule_CallReplyArrayElement(reply, 0); + if (RedisModule_CallReplyType(roleReply) != REDISMODULE_REPLY_STRING) { + RedisModule_FreeCallReply(reply); + return; + } + size_t len; + const char *role = RedisModule_CallReplyStringPtr(roleReply, &len); + // check if the current node is a primary + if (strncmp(role, "master", len) == 0) { + RedisModuleCallReply *replicasReply = RedisModule_CallReplyArrayElement(reply, 2); + // check if there are less than 2 connected replicas + if (RedisModule_CallReplyLength(replicasReply) < EXPECTED_NUMBER_OF_REPLICAS) { + g_stats->increment("lessThanExpectedReplicas_error", 1); + } + } + RedisModule_FreeCallReply(reply); +} + +void event_cron_handler(struct RedisModuleCtx *ctx, RedisModuleEvent eid, uint64_t subevent, void *data) { + static time_t lastTime = 0; + time_t curTime = time(nullptr); + + if ((curTime - lastTime) > c_infoUpdateSeconds) { + size_t startTime = ustime(); + +#ifdef __linux__ + /* Log CPU Usage */ + static long long s_mscpuLast = 0; + struct rusage self_ru; + getrusage(RUSAGE_SELF, &self_ru); + + long long mscpuCur = (self_ru.ru_utime.tv_sec * 1000) + (self_ru.ru_utime.tv_usec / 1000) + + (self_ru.ru_stime.tv_sec * 1000) + (self_ru.ru_stime.tv_usec / 1000); + + + g_stats->gauge("cpu_load_perc", ((double)(mscpuCur - s_mscpuLast) / ((curTime - lastTime)*1000))*100, false /* prefixOnly */); + s_mscpuLast = mscpuCur; +#endif + + /* Log clients */ + g_stats->gauge("clients", g_cclients); + + /* node name */ + if (unameResult == 0) { + g_stats->increment("node_name" + std::string(g_string_counter_separator) + nodeName); + } + + /* Log INFO Fields */ + size_t commandStartTime = ustime(); + RedisModuleCallReply *reply = RedisModule_Call(ctx, "INFO", ""); + size_t len = 0; + const char *szReply = RedisModule_CallReplyStringPtr(reply, &len); + g_stats->timing("info_time_taken_us", ustime() - commandStartTime); + commandStartTime = ustime(); + handle_info_response(ctx, szReply, len, "INFO"); + g_stats->timing("handle_info_time_taken_us", ustime() - commandStartTime); + RedisModule_FreeCallReply(reply); + + /* Log CLUSTER INFO Fields */ + commandStartTime = ustime(); + reply = RedisModule_Call(ctx, "CLUSTER", "c", "INFO"); + szReply = RedisModule_CallReplyStringPtr(reply, &len); + g_stats->timing("cluster_info_time_taken_us", ustime() - commandStartTime); + commandStartTime = ustime(); + handle_info_response(ctx, szReply, len, "CLUSTER INFO"); + g_stats->timing("handle_cluster_info_time_taken_us", ustime() - commandStartTime); + RedisModule_FreeCallReply(reply); + + /* Log Cluster Topology */ + commandStartTime = ustime(); + reply = RedisModule_Call(ctx, "CLUSTER", "c", "NODES"); + szReply = RedisModule_CallReplyStringPtr(reply, &len); + g_stats->timing("cluster_nodes_time_taken_us", ustime() - commandStartTime); + commandStartTime = ustime(); + handle_cluster_nodes_response(ctx, szReply, len); + g_stats->timing("handle_cluster_nodes_time_taken_us", ustime() - commandStartTime); + RedisModule_FreeCallReply(reply); + + /* Log Client Info */ + // commandStartTime = ustime(); + // reply = RedisModule_Call(ctx, "CLIENT", "c", "LIST"); + // szReply = RedisModule_CallReplyStringPtr(reply, &len); + // g_stats->timing("client_info_time_taken_us", ustime() - commandStartTime); + // commandStartTime = ustime(); + // handle_client_list_response(ctx, szReply, len); + // g_stats->timing("handle_client_info_time_taken_us", ustime() - commandStartTime); + // RedisModule_FreeCallReply(reply); + + commandStartTime = ustime(); + emit_system_free_memory(); + g_stats->timing("emit_free_system_memory_time_taken_us", ustime() - commandStartTime); + + /* Log Keys */ + commandStartTime = ustime(); + reply = RedisModule_Call(ctx, "dbsize", ""); + long long keys = RedisModule_CallReplyInteger(reply); + RedisModule_FreeCallReply(reply); + g_stats->gauge("keys", keys, false /* prefixOnly */); + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_DEBUG, "Emitting metric \"keys\": %llu", keys); + g_stats->timing("emit_keys_metric_time_taken_us", ustime() - commandStartTime); + + emit_metrics_for_insufficient_replicas(ctx, keys); + + g_stats->timing("metrics_time_taken_us", ustime() - startTime); + + lastTime = curTime; + } +} + +extern "C" int RedisModule_OnLoad(RedisModuleCtx *ctx, RedisModuleString **argv, int argc) { + if (RedisModule_Init(ctx,"statsd",1,REDISMODULE_APIVER_1) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + RedisModule_AutoMemory(ctx); + /* Use pod name if available*/ + const char *podName = getenv("POD_NAME"); + utsname sysName; + unameResult = uname(&sysName); + if (unameResult == 0) { + nodeName = std::string(sysName.nodename); + std::replace(nodeName.begin(), nodeName.end(), '.', '-'); + } + if (podName != nullptr) { + m_strPrefix = podName; + std::replace(m_strPrefix.begin(), m_strPrefix.end(), '.', '-'); + } + else if (unameResult == 0) { + m_strPrefix = nodeName; + unameResult = 1; + } + + for (int iarg = 0; iarg < argc; ++iarg) { + size_t len = 0; + const char *rgchArg = RedisModule_StringPtrLen(argv[iarg], &len); + if (len == 6 && memcmp(rgchArg, "prefix", 6) == 0) { + if ((iarg+1) >= argc) { + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_WARNING, "Expected a value after 'prefix'"); + return REDISMODULE_ERR; + } + ++iarg; + size_t lenPrefix = 0; + const char *rgchPrefix = RedisModule_StringPtrLen(argv[iarg], &lenPrefix); + m_strPrefix = std::string(rgchPrefix, lenPrefix); + } else { + RedisModule_Log(ctx, REDISMODULE_LOGLEVEL_WARNING, "Unrecognized configuration flag"); + return REDISMODULE_ERR; + } + } + + g_stats = new StatsdClientWrapper("localhost", 8125, m_strPrefix, g_stats_buffer_size_bytes, c_infoUpdateSeconds * 1000); + + if (RedisModule_SubscribeToServerEvent(ctx, RedisModuleEvent_ClientChange, event_client_change_handler) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + if (RedisModule_SubscribeToServerEvent(ctx, RedisModuleEvent_CronLoop, event_cron_handler) == REDISMODULE_ERR) + return REDISMODULE_ERR; + + return REDISMODULE_OK; +} + +extern "C" int RedisModule_OnUnload(RedisModuleCtx *ctx) { + delete g_stats; + return REDISMODULE_OK; +} diff --git a/src/modules/keydb_modstatsd/redismodule.h b/src/modules/keydb_modstatsd/redismodule.h new file mode 100644 index 000000000..4313aee01 --- /dev/null +++ b/src/modules/keydb_modstatsd/redismodule.h @@ -0,0 +1,1146 @@ +#ifndef REDISMODULE_H +#define REDISMODULE_H + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* ---------------- Defines common between core and modules --------------- */ + +/* Error status return values. */ +#define REDISMODULE_OK 0 +#define REDISMODULE_ERR 1 + +/* API versions. */ +#define REDISMODULE_APIVER_1 1 + +/* Version of the RedisModuleTypeMethods structure. Once the RedisModuleTypeMethods + * structure is changed, this version number needs to be changed synchronistically. */ +#define REDISMODULE_TYPE_METHOD_VERSION 3 + +/* API flags and constants */ +#define REDISMODULE_READ (1<<0) +#define REDISMODULE_WRITE (1<<1) + +/* RedisModule_OpenKey extra flags for the 'mode' argument. + * Avoid touching the LRU/LFU of the key when opened. */ +#define REDISMODULE_OPEN_KEY_NOTOUCH (1<<16) + +#define REDISMODULE_LIST_HEAD 0 +#define REDISMODULE_LIST_TAIL 1 + +/* Key types. */ +#define REDISMODULE_KEYTYPE_EMPTY 0 +#define REDISMODULE_KEYTYPE_STRING 1 +#define REDISMODULE_KEYTYPE_LIST 2 +#define REDISMODULE_KEYTYPE_HASH 3 +#define REDISMODULE_KEYTYPE_SET 4 +#define REDISMODULE_KEYTYPE_ZSET 5 +#define REDISMODULE_KEYTYPE_MODULE 6 +#define REDISMODULE_KEYTYPE_STREAM 7 + +/* Reply types. */ +#define REDISMODULE_REPLY_UNKNOWN -1 +#define REDISMODULE_REPLY_STRING 0 +#define REDISMODULE_REPLY_ERROR 1 +#define REDISMODULE_REPLY_INTEGER 2 +#define REDISMODULE_REPLY_ARRAY 3 +#define REDISMODULE_REPLY_NULL 4 + +/* Postponed array length. */ +#define REDISMODULE_POSTPONED_ARRAY_LEN -1 + +/* Expire */ +#define REDISMODULE_NO_EXPIRE -1 + +/* Sorted set API flags. */ +#define REDISMODULE_ZADD_XX (1<<0) +#define REDISMODULE_ZADD_NX (1<<1) +#define REDISMODULE_ZADD_ADDED (1<<2) +#define REDISMODULE_ZADD_UPDATED (1<<3) +#define REDISMODULE_ZADD_NOP (1<<4) +#define REDISMODULE_ZADD_GT (1<<5) +#define REDISMODULE_ZADD_LT (1<<6) + +/* Hash API flags. */ +#define REDISMODULE_HASH_NONE 0 +#define REDISMODULE_HASH_NX (1<<0) +#define REDISMODULE_HASH_XX (1<<1) +#define REDISMODULE_HASH_CFIELDS (1<<2) +#define REDISMODULE_HASH_EXISTS (1<<3) +#define REDISMODULE_HASH_COUNT_ALL (1<<4) + +/* StreamID type. */ +typedef struct RedisModuleStreamID { + uint64_t ms; + uint64_t seq; +} RedisModuleStreamID; + +/* StreamAdd() flags. */ +#define REDISMODULE_STREAM_ADD_AUTOID (1<<0) +/* StreamIteratorStart() flags. */ +#define REDISMODULE_STREAM_ITERATOR_EXCLUSIVE (1<<0) +#define REDISMODULE_STREAM_ITERATOR_REVERSE (1<<1) +/* StreamIteratorTrim*() flags. */ +#define REDISMODULE_STREAM_TRIM_APPROX (1<<0) + +/* Context Flags: Info about the current context returned by + * RM_GetContextFlags(). */ + +/* The command is running in the context of a Lua script */ +#define REDISMODULE_CTX_FLAGS_LUA (1<<0) +/* The command is running inside a Redis transaction */ +#define REDISMODULE_CTX_FLAGS_MULTI (1<<1) +/* The instance is a master */ +#define REDISMODULE_CTX_FLAGS_MASTER (1<<2) +/* The instance is a replica */ +#define REDISMODULE_CTX_FLAGS_SLAVE (1<<3) +/* The instance is read-only (usually meaning it's a replica as well) */ +#define REDISMODULE_CTX_FLAGS_READONLY (1<<4) +/* The instance is running in cluster mode */ +#define REDISMODULE_CTX_FLAGS_CLUSTER (1<<5) +/* The instance has AOF enabled */ +#define REDISMODULE_CTX_FLAGS_AOF (1<<6) +/* The instance has RDB enabled */ +#define REDISMODULE_CTX_FLAGS_RDB (1<<7) +/* The instance has Maxmemory set */ +#define REDISMODULE_CTX_FLAGS_MAXMEMORY (1<<8) +/* Maxmemory is set and has an eviction policy that may delete keys */ +#define REDISMODULE_CTX_FLAGS_EVICT (1<<9) +/* Redis is out of memory according to the maxmemory flag. */ +#define REDISMODULE_CTX_FLAGS_OOM (1<<10) +/* Less than 25% of memory available according to maxmemory. */ +#define REDISMODULE_CTX_FLAGS_OOM_WARNING (1<<11) +/* The command was sent over the replication link. */ +#define REDISMODULE_CTX_FLAGS_REPLICATED (1<<12) +/* Redis is currently loading either from AOF or RDB. */ +#define REDISMODULE_CTX_FLAGS_LOADING (1<<13) +/* The replica has no link with its master, note that + * there is the inverse flag as well: + * + * REDISMODULE_CTX_FLAGS_REPLICA_IS_ONLINE + * + * The two flags are exclusive, one or the other can be set. */ +#define REDISMODULE_CTX_FLAGS_REPLICA_IS_STALE (1<<14) +/* The replica is trying to connect with the master. + * (REPL_STATE_CONNECT and REPL_STATE_CONNECTING states) */ +#define REDISMODULE_CTX_FLAGS_REPLICA_IS_CONNECTING (1<<15) +/* THe replica is receiving an RDB file from its master. */ +#define REDISMODULE_CTX_FLAGS_REPLICA_IS_TRANSFERRING (1<<16) +/* The replica is online, receiving updates from its master. */ +#define REDISMODULE_CTX_FLAGS_REPLICA_IS_ONLINE (1<<17) +/* There is currently some background process active. */ +#define REDISMODULE_CTX_FLAGS_ACTIVE_CHILD (1<<18) +/* The next EXEC will fail due to dirty CAS (touched keys). */ +#define REDISMODULE_CTX_FLAGS_MULTI_DIRTY (1<<19) +/* Redis is currently running inside background child process. */ +#define REDISMODULE_CTX_FLAGS_IS_CHILD (1<<20) +/* The current client does not allow blocking, either called from + * within multi, lua, or from another module using RM_Call */ +#define REDISMODULE_CTX_FLAGS_DENY_BLOCKING (1<<21) + +/* Next context flag, must be updated when adding new flags above! +This flag should not be used directly by the module. + * Use RedisModule_GetContextFlagsAll instead. */ +#define _REDISMODULE_CTX_FLAGS_NEXT (1<<22) + +/* Keyspace changes notification classes. Every class is associated with a + * character for configuration purposes. + * NOTE: These have to be in sync with NOTIFY_* in server.h */ +#define REDISMODULE_NOTIFY_KEYSPACE (1<<0) /* K */ +#define REDISMODULE_NOTIFY_KEYEVENT (1<<1) /* E */ +#define REDISMODULE_NOTIFY_GENERIC (1<<2) /* g */ +#define REDISMODULE_NOTIFY_STRING (1<<3) /* $ */ +#define REDISMODULE_NOTIFY_LIST (1<<4) /* l */ +#define REDISMODULE_NOTIFY_SET (1<<5) /* s */ +#define REDISMODULE_NOTIFY_HASH (1<<6) /* h */ +#define REDISMODULE_NOTIFY_ZSET (1<<7) /* z */ +#define REDISMODULE_NOTIFY_EXPIRED (1<<8) /* x */ +#define REDISMODULE_NOTIFY_EVICTED (1<<9) /* e */ +#define REDISMODULE_NOTIFY_STREAM (1<<10) /* t */ +#define REDISMODULE_NOTIFY_KEY_MISS (1<<11) /* m (Note: This one is excluded from REDISMODULE_NOTIFY_ALL on purpose) */ +#define REDISMODULE_NOTIFY_LOADED (1<<12) /* module only key space notification, indicate a key loaded from rdb */ +#define REDISMODULE_NOTIFY_MODULE (1<<13) /* d, module key space notification */ + +/* Next notification flag, must be updated when adding new flags above! +This flag should not be used directly by the module. + * Use RedisModule_GetKeyspaceNotificationFlagsAll instead. */ +#define _REDISMODULE_NOTIFY_NEXT (1<<14) + +#define REDISMODULE_NOTIFY_ALL (REDISMODULE_NOTIFY_GENERIC | REDISMODULE_NOTIFY_STRING | REDISMODULE_NOTIFY_LIST | REDISMODULE_NOTIFY_SET | REDISMODULE_NOTIFY_HASH | REDISMODULE_NOTIFY_ZSET | REDISMODULE_NOTIFY_EXPIRED | REDISMODULE_NOTIFY_EVICTED | REDISMODULE_NOTIFY_STREAM | REDISMODULE_NOTIFY_MODULE) /* A */ + +/* A special pointer that we can use between the core and the module to signal + * field deletion, and that is impossible to be a valid pointer. */ +#define REDISMODULE_HASH_DELETE ((RedisModuleString*)(long)1) + +/* Error messages. */ +#define REDISMODULE_ERRORMSG_WRONGTYPE "WRONGTYPE Operation against a key holding the wrong kind of value" + +#define REDISMODULE_POSITIVE_INFINITE (1.0/0.0) +#define REDISMODULE_NEGATIVE_INFINITE (-1.0/0.0) + +/* Cluster API defines. */ +#define REDISMODULE_NODE_ID_LEN 40 +#define REDISMODULE_NODE_MYSELF (1<<0) +#define REDISMODULE_NODE_MASTER (1<<1) +#define REDISMODULE_NODE_SLAVE (1<<2) +#define REDISMODULE_NODE_PFAIL (1<<3) +#define REDISMODULE_NODE_FAIL (1<<4) +#define REDISMODULE_NODE_NOFAILOVER (1<<5) + +#define REDISMODULE_CLUSTER_FLAG_NONE 0 +#define REDISMODULE_CLUSTER_FLAG_NO_FAILOVER (1<<1) +#define REDISMODULE_CLUSTER_FLAG_NO_REDIRECTION (1<<2) + +#define REDISMODULE_NOT_USED(V) ((void) V) + +/* Logging level strings */ +#define REDISMODULE_LOGLEVEL_DEBUG "debug" +#define REDISMODULE_LOGLEVEL_VERBOSE "verbose" +#define REDISMODULE_LOGLEVEL_NOTICE "notice" +#define REDISMODULE_LOGLEVEL_WARNING "warning" + +/* Bit flags for aux_save_triggers and the aux_load and aux_save callbacks */ +#define REDISMODULE_AUX_BEFORE_RDB (1<<0) +#define REDISMODULE_AUX_AFTER_RDB (1<<1) + +/* This type represents a timer handle, and is returned when a timer is + * registered and used in order to invalidate a timer. It's just a 64 bit + * number, because this is how each timer is represented inside the radix tree + * of timers that are going to expire, sorted by expire time. */ +typedef uint64_t RedisModuleTimerID; + +/* CommandFilter Flags */ + +/* Do filter RedisModule_Call() commands initiated by module itself. */ +#define REDISMODULE_CMDFILTER_NOSELF (1<<0) + +/* Declare that the module can handle errors with RedisModule_SetModuleOptions. */ +#define REDISMODULE_OPTIONS_HANDLE_IO_ERRORS (1<<0) +/* When set, Redis will not call RedisModule_SignalModifiedKey(), implicitly in + * RedisModule_CloseKey, and the module needs to do that when manually when keys + * are modified from the user's sperspective, to invalidate WATCH. */ +#define REDISMODULE_OPTION_NO_IMPLICIT_SIGNAL_MODIFIED (1<<1) + +/* Server events definitions. + * Those flags should not be used directly by the module, instead + * the module should use RedisModuleEvent_* variables */ +#define REDISMODULE_EVENT_REPLICATION_ROLE_CHANGED 0 +#define REDISMODULE_EVENT_PERSISTENCE 1 +#define REDISMODULE_EVENT_FLUSHDB 2 +#define REDISMODULE_EVENT_LOADING 3 +#define REDISMODULE_EVENT_CLIENT_CHANGE 4 +#define REDISMODULE_EVENT_SHUTDOWN 5 +#define REDISMODULE_EVENT_REPLICA_CHANGE 6 +#define REDISMODULE_EVENT_MASTER_LINK_CHANGE 7 +#define REDISMODULE_EVENT_CRON_LOOP 8 +#define REDISMODULE_EVENT_MODULE_CHANGE 9 +#define REDISMODULE_EVENT_LOADING_PROGRESS 10 +#define REDISMODULE_EVENT_SWAPDB 11 +#define REDISMODULE_EVENT_REPL_BACKUP 12 +#define REDISMODULE_EVENT_FORK_CHILD 13 +#define _REDISMODULE_EVENT_NEXT 14 /* Next event flag, should be updated if a new event added. */ + +typedef struct RedisModuleEvent { + uint64_t id; /* REDISMODULE_EVENT_... defines. */ + uint64_t dataver; /* Version of the structure we pass as 'data'. */ +} RedisModuleEvent; + +struct RedisModuleCtx; +struct RedisModuleDefragCtx; +typedef void (*RedisModuleEventCallback)(struct RedisModuleCtx *ctx, RedisModuleEvent eid, uint64_t subevent, void *data); + +static const RedisModuleEvent + RedisModuleEvent_ReplicationRoleChanged = { + REDISMODULE_EVENT_REPLICATION_ROLE_CHANGED, + 1 + }, + RedisModuleEvent_Persistence = { + REDISMODULE_EVENT_PERSISTENCE, + 1 + }, + RedisModuleEvent_FlushDB = { + REDISMODULE_EVENT_FLUSHDB, + 1 + }, + RedisModuleEvent_Loading = { + REDISMODULE_EVENT_LOADING, + 1 + }, + RedisModuleEvent_ClientChange = { + REDISMODULE_EVENT_CLIENT_CHANGE, + 1 + }, + RedisModuleEvent_Shutdown = { + REDISMODULE_EVENT_SHUTDOWN, + 1 + }, + RedisModuleEvent_ReplicaChange = { + REDISMODULE_EVENT_REPLICA_CHANGE, + 1 + }, + RedisModuleEvent_CronLoop = { + REDISMODULE_EVENT_CRON_LOOP, + 1 + }, + RedisModuleEvent_MasterLinkChange = { + REDISMODULE_EVENT_MASTER_LINK_CHANGE, + 1 + }, + RedisModuleEvent_ModuleChange = { + REDISMODULE_EVENT_MODULE_CHANGE, + 1 + }, + RedisModuleEvent_LoadingProgress = { + REDISMODULE_EVENT_LOADING_PROGRESS, + 1 + }, + RedisModuleEvent_SwapDB = { + REDISMODULE_EVENT_SWAPDB, + 1 + }, + RedisModuleEvent_ReplBackup = { + REDISMODULE_EVENT_REPL_BACKUP, + 1 + }, + RedisModuleEvent_ForkChild = { + REDISMODULE_EVENT_FORK_CHILD, + 1 + }; + +/* Those are values that are used for the 'subevent' callback argument. */ +#define REDISMODULE_SUBEVENT_PERSISTENCE_RDB_START 0 +#define REDISMODULE_SUBEVENT_PERSISTENCE_AOF_START 1 +#define REDISMODULE_SUBEVENT_PERSISTENCE_SYNC_RDB_START 2 +#define REDISMODULE_SUBEVENT_PERSISTENCE_ENDED 3 +#define REDISMODULE_SUBEVENT_PERSISTENCE_FAILED 4 +#define _REDISMODULE_SUBEVENT_PERSISTENCE_NEXT 5 + +#define REDISMODULE_SUBEVENT_LOADING_RDB_START 0 +#define REDISMODULE_SUBEVENT_LOADING_AOF_START 1 +#define REDISMODULE_SUBEVENT_LOADING_REPL_START 2 +#define REDISMODULE_SUBEVENT_LOADING_ENDED 3 +#define REDISMODULE_SUBEVENT_LOADING_FAILED 4 +#define _REDISMODULE_SUBEVENT_LOADING_NEXT 5 + +#define REDISMODULE_SUBEVENT_CLIENT_CHANGE_CONNECTED 0 +#define REDISMODULE_SUBEVENT_CLIENT_CHANGE_DISCONNECTED 1 +#define _REDISMODULE_SUBEVENT_CLIENT_CHANGE_NEXT 2 + +#define REDISMODULE_SUBEVENT_MASTER_LINK_UP 0 +#define REDISMODULE_SUBEVENT_MASTER_LINK_DOWN 1 +#define _REDISMODULE_SUBEVENT_MASTER_NEXT 2 + +#define REDISMODULE_SUBEVENT_REPLICA_CHANGE_ONLINE 0 +#define REDISMODULE_SUBEVENT_REPLICA_CHANGE_OFFLINE 1 +#define _REDISMODULE_SUBEVENT_REPLICA_CHANGE_NEXT 2 + +#define REDISMODULE_EVENT_REPLROLECHANGED_NOW_MASTER 0 +#define REDISMODULE_EVENT_REPLROLECHANGED_NOW_REPLICA 1 +#define _REDISMODULE_EVENT_REPLROLECHANGED_NEXT 2 + +#define REDISMODULE_SUBEVENT_FLUSHDB_START 0 +#define REDISMODULE_SUBEVENT_FLUSHDB_END 1 +#define _REDISMODULE_SUBEVENT_FLUSHDB_NEXT 2 + +#define REDISMODULE_SUBEVENT_MODULE_LOADED 0 +#define REDISMODULE_SUBEVENT_MODULE_UNLOADED 1 +#define _REDISMODULE_SUBEVENT_MODULE_NEXT 2 + +#define REDISMODULE_SUBEVENT_LOADING_PROGRESS_RDB 0 +#define REDISMODULE_SUBEVENT_LOADING_PROGRESS_AOF 1 +#define _REDISMODULE_SUBEVENT_LOADING_PROGRESS_NEXT 2 + +#define REDISMODULE_SUBEVENT_REPL_BACKUP_CREATE 0 +#define REDISMODULE_SUBEVENT_REPL_BACKUP_RESTORE 1 +#define REDISMODULE_SUBEVENT_REPL_BACKUP_DISCARD 2 +#define _REDISMODULE_SUBEVENT_REPL_BACKUP_NEXT 3 + +#define REDISMODULE_SUBEVENT_FORK_CHILD_BORN 0 +#define REDISMODULE_SUBEVENT_FORK_CHILD_DIED 1 +#define _REDISMODULE_SUBEVENT_FORK_CHILD_NEXT 2 + +#define _REDISMODULE_SUBEVENT_SHUTDOWN_NEXT 0 +#define _REDISMODULE_SUBEVENT_CRON_LOOP_NEXT 0 +#define _REDISMODULE_SUBEVENT_SWAPDB_NEXT 0 + +/* RedisModuleClientInfo flags. */ +#define REDISMODULE_CLIENTINFO_FLAG_SSL (1<<0) +#define REDISMODULE_CLIENTINFO_FLAG_PUBSUB (1<<1) +#define REDISMODULE_CLIENTINFO_FLAG_BLOCKED (1<<2) +#define REDISMODULE_CLIENTINFO_FLAG_TRACKING (1<<3) +#define REDISMODULE_CLIENTINFO_FLAG_UNIXSOCKET (1<<4) +#define REDISMODULE_CLIENTINFO_FLAG_MULTI (1<<5) + +/* Here we take all the structures that the module pass to the core + * and the other way around. Notably the list here contains the structures + * used by the hooks API RedisModule_RegisterToServerEvent(). + * + * The structures always start with a 'version' field. This is useful + * when we want to pass a reference to the structure to the core APIs, + * for the APIs to fill the structure. In that case, the structure 'version' + * field is initialized before passing it to the core, so that the core is + * able to cast the pointer to the appropriate structure version. In this + * way we obtain ABI compatibility. + * + * Here we'll list all the structure versions in case they evolve over time, + * however using a define, we'll make sure to use the last version as the + * public name for the module to use. */ + +#define REDISMODULE_CLIENTINFO_VERSION 1 +typedef struct RedisModuleClientInfo { + uint64_t version; /* Version of this structure for ABI compat. */ + uint64_t flags; /* REDISMODULE_CLIENTINFO_FLAG_* */ + uint64_t id; /* Client ID. */ + char addr[46]; /* IPv4 or IPv6 address. */ + uint16_t port; /* TCP port. */ + uint16_t db; /* Selected DB. */ +} RedisModuleClientInfoV1; + +#define RedisModuleClientInfo RedisModuleClientInfoV1 + +#define REDISMODULE_REPLICATIONINFO_VERSION 1 +typedef struct RedisModuleReplicationInfo { + uint64_t version; /* Not used since this structure is never passed + from the module to the core right now. Here + for future compatibility. */ + int master; /* true if master, false if replica */ + const char *masterhost; /* master instance hostname for NOW_REPLICA */ + int masterport; /* master instance port for NOW_REPLICA */ + char *replid1; /* Main replication ID */ + char *replid2; /* Secondary replication ID */ + uint64_t repl1_offset; /* Main replication offset */ + uint64_t repl2_offset; /* Offset of replid2 validity */ +} RedisModuleReplicationInfoV1; + +#define RedisModuleReplicationInfo RedisModuleReplicationInfoV1 + +#define REDISMODULE_FLUSHINFO_VERSION 1 +typedef struct RedisModuleFlushInfo { + uint64_t version; /* Not used since this structure is never passed + from the module to the core right now. Here + for future compatibility. */ + int32_t sync; /* Synchronous or threaded flush?. */ + int32_t dbnum; /* Flushed database number, -1 for ALL. */ +} RedisModuleFlushInfoV1; + +#define RedisModuleFlushInfo RedisModuleFlushInfoV1 + +#define REDISMODULE_MODULE_CHANGE_VERSION 1 +typedef struct RedisModuleModuleChange { + uint64_t version; /* Not used since this structure is never passed + from the module to the core right now. Here + for future compatibility. */ + const char* module_name;/* Name of module loaded or unloaded. */ + int32_t module_version; /* Module version. */ +} RedisModuleModuleChangeV1; + +#define RedisModuleModuleChange RedisModuleModuleChangeV1 + +#define REDISMODULE_CRON_LOOP_VERSION 1 +typedef struct RedisModuleCronLoopInfo { + uint64_t version; /* Not used since this structure is never passed + from the module to the core right now. Here + for future compatibility. */ + int32_t hz; /* Approximate number of events per second. */ +} RedisModuleCronLoopV1; + +#define RedisModuleCronLoop RedisModuleCronLoopV1 + +#define REDISMODULE_LOADING_PROGRESS_VERSION 1 +typedef struct RedisModuleLoadingProgressInfo { + uint64_t version; /* Not used since this structure is never passed + from the module to the core right now. Here + for future compatibility. */ + int32_t hz; /* Approximate number of events per second. */ + int32_t progress; /* Approximate progress between 0 and 1024, or -1 + * if unknown. */ +} RedisModuleLoadingProgressV1; + +#define RedisModuleLoadingProgress RedisModuleLoadingProgressV1 + +#define REDISMODULE_SWAPDBINFO_VERSION 1 +typedef struct RedisModuleSwapDbInfo { + uint64_t version; /* Not used since this structure is never passed + from the module to the core right now. Here + for future compatibility. */ + int32_t dbnum_first; /* Swap Db first dbnum */ + int32_t dbnum_second; /* Swap Db second dbnum */ +} RedisModuleSwapDbInfoV1; + +#define RedisModuleSwapDbInfo RedisModuleSwapDbInfoV1 + +/* ------------------------- End of common defines ------------------------ */ + +#ifndef REDISMODULE_CORE + +typedef long long mstime_t; + +/* Macro definitions specific to individual compilers */ +#ifndef REDISMODULE_ATTR_UNUSED +# ifdef __GNUC__ +# define REDISMODULE_ATTR_UNUSED __attribute__((unused)) +# else +# define REDISMODULE_ATTR_UNUSED +# endif +#endif + +#ifndef REDISMODULE_ATTR_PRINTF +# ifdef __GNUC__ +# define REDISMODULE_ATTR_PRINTF(idx,cnt) __attribute__((format(printf,idx,cnt))) +# else +# define REDISMODULE_ATTR_PRINTF(idx,cnt) +# endif +#endif + +#ifndef REDISMODULE_ATTR_COMMON +# if defined(__GNUC__) && !defined(__clang__) +# define REDISMODULE_ATTR_COMMON __attribute__((__common__)) +# else +# define REDISMODULE_ATTR_COMMON +# endif +#endif + +/* Incomplete structures for compiler checks but opaque access. */ +typedef struct RedisModuleCtx RedisModuleCtx; +typedef struct RedisModuleKey RedisModuleKey; +typedef struct RedisModuleString RedisModuleString; +typedef struct RedisModuleCallReply RedisModuleCallReply; +typedef struct RedisModuleIO RedisModuleIO; +typedef struct RedisModuleType RedisModuleType; +typedef struct RedisModuleDigest RedisModuleDigest; +typedef struct RedisModuleBlockedClient RedisModuleBlockedClient; +typedef struct RedisModuleClusterInfo RedisModuleClusterInfo; +typedef struct RedisModuleDict RedisModuleDict; +typedef struct RedisModuleDictIter RedisModuleDictIter; +typedef struct RedisModuleCommandFilterCtx RedisModuleCommandFilterCtx; +typedef struct RedisModuleCommandFilter RedisModuleCommandFilter; +typedef struct RedisModuleInfoCtx RedisModuleInfoCtx; +typedef struct RedisModuleServerInfoData RedisModuleServerInfoData; +typedef struct RedisModuleScanCursor RedisModuleScanCursor; +typedef struct RedisModuleDefragCtx RedisModuleDefragCtx; +typedef struct RedisModuleUser RedisModuleUser; + +typedef int (*RedisModuleCmdFunc)(RedisModuleCtx *ctx, RedisModuleString **argv, int argc); +typedef void (*RedisModuleDisconnectFunc)(RedisModuleCtx *ctx, RedisModuleBlockedClient *bc); +typedef int (*RedisModuleNotificationFunc)(RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key); +typedef void *(*RedisModuleTypeLoadFunc)(RedisModuleIO *rdb, int encver); +typedef void (*RedisModuleTypeSaveFunc)(RedisModuleIO *rdb, void *value); +typedef int (*RedisModuleTypeAuxLoadFunc)(RedisModuleIO *rdb, int encver, int when); +typedef void (*RedisModuleTypeAuxSaveFunc)(RedisModuleIO *rdb, int when); +typedef void (*RedisModuleTypeRewriteFunc)(RedisModuleIO *aof, RedisModuleString *key, void *value); +typedef size_t (*RedisModuleTypeMemUsageFunc)(const void *value); +typedef void (*RedisModuleTypeDigestFunc)(RedisModuleDigest *digest, void *value); +typedef void (*RedisModuleTypeFreeFunc)(void *value); +typedef size_t (*RedisModuleTypeFreeEffortFunc)(RedisModuleString *key, const void *value); +typedef void (*RedisModuleTypeUnlinkFunc)(RedisModuleString *key, const void *value); +typedef void *(*RedisModuleTypeCopyFunc)(RedisModuleString *fromkey, RedisModuleString *tokey, const void *value); +typedef int (*RedisModuleTypeDefragFunc)(RedisModuleDefragCtx *ctx, RedisModuleString *key, void **value); +typedef void (*RedisModuleClusterMessageReceiver)(RedisModuleCtx *ctx, const char *sender_id, uint8_t type, const unsigned char *payload, uint32_t len); +typedef void (*RedisModuleTimerProc)(RedisModuleCtx *ctx, void *data); +typedef void (*RedisModuleCommandFilterFunc) (RedisModuleCommandFilterCtx *filter); +typedef void (*RedisModuleForkDoneHandler) (int exitcode, int bysignal, void *user_data); +typedef void (*RedisModuleInfoFunc)(RedisModuleInfoCtx *ctx, int for_crash_report); +typedef void (*RedisModuleScanCB)(RedisModuleCtx *ctx, RedisModuleString *keyname, RedisModuleKey *key, void *privdata); +typedef void (*RedisModuleScanKeyCB)(RedisModuleKey *key, RedisModuleString *field, RedisModuleString *value, void *privdata); +typedef void (*RedisModuleUserChangedFunc) (uint64_t client_id, void *privdata); +typedef int (*RedisModuleDefragFunc)(RedisModuleDefragCtx *ctx); + +typedef struct RedisModuleTypeMethods { + uint64_t version; + RedisModuleTypeLoadFunc rdb_load; + RedisModuleTypeSaveFunc rdb_save; + RedisModuleTypeRewriteFunc aof_rewrite; + RedisModuleTypeMemUsageFunc mem_usage; + RedisModuleTypeDigestFunc digest; + RedisModuleTypeFreeFunc free; + RedisModuleTypeAuxLoadFunc aux_load; + RedisModuleTypeAuxSaveFunc aux_save; + int aux_save_triggers; + RedisModuleTypeFreeEffortFunc free_effort; + RedisModuleTypeUnlinkFunc unlink; + RedisModuleTypeCopyFunc copy; + RedisModuleTypeDefragFunc defrag; +} RedisModuleTypeMethods; + +#define REDISMODULE_GET_API(name) \ + RedisModule_GetApi("RedisModule_" #name, ((void **)&RedisModule_ ## name)) + +/* Default API declaration prefix (not 'extern' for backwards compatibility) */ +#ifndef REDISMODULE_API +#define REDISMODULE_API +#endif + +/* Default API declaration suffix (compiler attributes) */ +#ifndef REDISMODULE_ATTR +#define REDISMODULE_ATTR REDISMODULE_ATTR_COMMON +#endif + +REDISMODULE_API void * (*RedisModule_Alloc)(size_t bytes) REDISMODULE_ATTR; +REDISMODULE_API void * (*RedisModule_Realloc)(void *ptr, size_t bytes) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_Free)(void *ptr) REDISMODULE_ATTR; +REDISMODULE_API void * (*RedisModule_Calloc)(size_t nmemb, size_t size) REDISMODULE_ATTR; +REDISMODULE_API char * (*RedisModule_Strdup)(const char *str) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_GetApi)(const char *, void *) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_CreateCommand)(RedisModuleCtx *ctx, const char *name, RedisModuleCmdFunc cmdfunc, const char *strflags, int firstkey, int lastkey, int keystep) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_SetModuleAttribs)(RedisModuleCtx *ctx, const char *name, int ver, int apiver) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_IsModuleNameBusy)(const char *name) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_WrongArity)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ReplyWithLongLong)(RedisModuleCtx *ctx, long long ll) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_GetSelectedDb)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_SelectDb)(RedisModuleCtx *ctx, int newid) REDISMODULE_ATTR; +REDISMODULE_API void * (*RedisModule_OpenKey)(RedisModuleCtx *ctx, RedisModuleString *keyname, int mode) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_CloseKey)(RedisModuleKey *kp) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_KeyType)(RedisModuleKey *kp) REDISMODULE_ATTR; +REDISMODULE_API size_t (*RedisModule_ValueLength)(RedisModuleKey *kp) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ListPush)(RedisModuleKey *kp, int where, RedisModuleString *ele) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_ListPop)(RedisModuleKey *key, int where) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleCallReply * (*RedisModule_Call)(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...) REDISMODULE_ATTR; +REDISMODULE_API const char * (*RedisModule_CallReplyProto)(RedisModuleCallReply *reply, size_t *len) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_FreeCallReply)(RedisModuleCallReply *reply) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_CallReplyType)(RedisModuleCallReply *reply) REDISMODULE_ATTR; +REDISMODULE_API long long (*RedisModule_CallReplyInteger)(RedisModuleCallReply *reply) REDISMODULE_ATTR; +REDISMODULE_API size_t (*RedisModule_CallReplyLength)(RedisModuleCallReply *reply) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleCallReply * (*RedisModule_CallReplyArrayElement)(RedisModuleCallReply *reply, size_t idx) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_CreateString)(RedisModuleCtx *ctx, const char *ptr, size_t len) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromLongLong)(RedisModuleCtx *ctx, long long ll) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromDouble)(RedisModuleCtx *ctx, double d) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromLongDouble)(RedisModuleCtx *ctx, long double ld, int humanfriendly) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromString)(RedisModuleCtx *ctx, const RedisModuleString *str) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromStreamID)(RedisModuleCtx *ctx, const RedisModuleStreamID *id) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringPrintf)(RedisModuleCtx *ctx, const char *fmt, ...) REDISMODULE_ATTR_PRINTF(2,3) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_FreeString)(RedisModuleCtx *ctx, RedisModuleString *str) REDISMODULE_ATTR; +REDISMODULE_API const char * (*RedisModule_StringPtrLen)(const RedisModuleString *str, size_t *len) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ReplyWithError)(RedisModuleCtx *ctx, const char *err) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ReplyWithSimpleString)(RedisModuleCtx *ctx, const char *msg) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ReplyWithArray)(RedisModuleCtx *ctx, long len) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ReplyWithNullArray)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ReplyWithEmptyArray)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_ReplySetArrayLength)(RedisModuleCtx *ctx, long len) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ReplyWithStringBuffer)(RedisModuleCtx *ctx, const char *buf, size_t len) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ReplyWithCString)(RedisModuleCtx *ctx, const char *buf) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ReplyWithString)(RedisModuleCtx *ctx, RedisModuleString *str) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ReplyWithEmptyString)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ReplyWithVerbatimString)(RedisModuleCtx *ctx, const char *buf, size_t len) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ReplyWithNull)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ReplyWithDouble)(RedisModuleCtx *ctx, double d) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ReplyWithLongDouble)(RedisModuleCtx *ctx, long double d) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ReplyWithCallReply)(RedisModuleCtx *ctx, RedisModuleCallReply *reply) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StringToLongLong)(const RedisModuleString *str, long long *ll) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StringToDouble)(const RedisModuleString *str, double *d) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StringToLongDouble)(const RedisModuleString *str, long double *d) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StringToStreamID)(const RedisModuleString *str, RedisModuleStreamID *id) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_AutoMemory)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_Replicate)(RedisModuleCtx *ctx, const char *cmdname, const char *fmt, ...) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ReplicateVerbatim)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API const char * (*RedisModule_CallReplyStringPtr)(RedisModuleCallReply *reply, size_t *len) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_CreateStringFromCallReply)(RedisModuleCallReply *reply) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_DeleteKey)(RedisModuleKey *key) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_UnlinkKey)(RedisModuleKey *key) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StringSet)(RedisModuleKey *key, RedisModuleString *str) REDISMODULE_ATTR; +REDISMODULE_API char * (*RedisModule_StringDMA)(RedisModuleKey *key, size_t *len, int mode) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StringTruncate)(RedisModuleKey *key, size_t newlen) REDISMODULE_ATTR; +REDISMODULE_API mstime_t (*RedisModule_GetExpire)(RedisModuleKey *key) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_SetExpire)(RedisModuleKey *key, mstime_t expire) REDISMODULE_ATTR; +REDISMODULE_API mstime_t (*RedisModule_GetAbsExpire)(RedisModuleKey *key) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_SetAbsExpire)(RedisModuleKey *key, mstime_t expire) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_ResetDataset)(int restart_aof, int async) REDISMODULE_ATTR; +REDISMODULE_API unsigned long long (*RedisModule_DbSize)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_RandomKey)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ZsetAdd)(RedisModuleKey *key, double score, RedisModuleString *ele, int *flagsptr) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ZsetIncrby)(RedisModuleKey *key, double score, RedisModuleString *ele, int *flagsptr, double *newscore) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ZsetScore)(RedisModuleKey *key, RedisModuleString *ele, double *score) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ZsetRem)(RedisModuleKey *key, RedisModuleString *ele, int *deleted) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_ZsetRangeStop)(RedisModuleKey *key) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ZsetFirstInScoreRange)(RedisModuleKey *key, double min, double max, int minex, int maxex) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ZsetLastInScoreRange)(RedisModuleKey *key, double min, double max, int minex, int maxex) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ZsetFirstInLexRange)(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ZsetLastInLexRange)(RedisModuleKey *key, RedisModuleString *min, RedisModuleString *max) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_ZsetRangeCurrentElement)(RedisModuleKey *key, double *score) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ZsetRangeNext)(RedisModuleKey *key) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ZsetRangePrev)(RedisModuleKey *key) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ZsetRangeEndReached)(RedisModuleKey *key) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_HashSet)(RedisModuleKey *key, int flags, ...) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_HashGet)(RedisModuleKey *key, int flags, ...) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StreamAdd)(RedisModuleKey *key, int flags, RedisModuleStreamID *id, RedisModuleString **argv, int64_t numfields) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StreamDelete)(RedisModuleKey *key, RedisModuleStreamID *id) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StreamIteratorStart)(RedisModuleKey *key, int flags, RedisModuleStreamID *startid, RedisModuleStreamID *endid) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StreamIteratorStop)(RedisModuleKey *key) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StreamIteratorNextID)(RedisModuleKey *key, RedisModuleStreamID *id, long *numfields) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StreamIteratorNextField)(RedisModuleKey *key, RedisModuleString **field_ptr, RedisModuleString **value_ptr) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StreamIteratorDelete)(RedisModuleKey *key) REDISMODULE_ATTR; +REDISMODULE_API long long (*RedisModule_StreamTrimByLength)(RedisModuleKey *key, int flags, long long length) REDISMODULE_ATTR; +REDISMODULE_API long long (*RedisModule_StreamTrimByID)(RedisModuleKey *key, int flags, RedisModuleStreamID *id) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_IsKeysPositionRequest)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_KeyAtPos)(RedisModuleCtx *ctx, int pos) REDISMODULE_ATTR; +REDISMODULE_API unsigned long long (*RedisModule_GetClientId)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_GetClientUserNameById)(RedisModuleCtx *ctx, uint64_t id) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_GetClientInfoById)(void *ci, uint64_t id) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_PublishMessage)(RedisModuleCtx *ctx, RedisModuleString *channel, RedisModuleString *message) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_GetContextFlags)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_AvoidReplicaTraffic)() REDISMODULE_ATTR; +REDISMODULE_API void * (*RedisModule_PoolAlloc)(RedisModuleCtx *ctx, size_t bytes) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleType * (*RedisModule_CreateDataType)(RedisModuleCtx *ctx, const char *name, int encver, RedisModuleTypeMethods *typemethods) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ModuleTypeSetValue)(RedisModuleKey *key, RedisModuleType *mt, void *value) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ModuleTypeReplaceValue)(RedisModuleKey *key, RedisModuleType *mt, void *new_value, void **old_value) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleType * (*RedisModule_ModuleTypeGetType)(RedisModuleKey *key) REDISMODULE_ATTR; +REDISMODULE_API void * (*RedisModule_ModuleTypeGetValue)(RedisModuleKey *key) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_IsIOError)(RedisModuleIO *io) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_SetModuleOptions)(RedisModuleCtx *ctx, int options) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_SignalModifiedKey)(RedisModuleCtx *ctx, RedisModuleString *keyname) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_SaveUnsigned)(RedisModuleIO *io, uint64_t value) REDISMODULE_ATTR; +REDISMODULE_API uint64_t (*RedisModule_LoadUnsigned)(RedisModuleIO *io) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_SaveSigned)(RedisModuleIO *io, int64_t value) REDISMODULE_ATTR; +REDISMODULE_API int64_t (*RedisModule_LoadSigned)(RedisModuleIO *io) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_EmitAOF)(RedisModuleIO *io, const char *cmdname, const char *fmt, ...) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_SaveString)(RedisModuleIO *io, RedisModuleString *s) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_SaveStringBuffer)(RedisModuleIO *io, const char *str, size_t len) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_LoadString)(RedisModuleIO *io) REDISMODULE_ATTR; +REDISMODULE_API char * (*RedisModule_LoadStringBuffer)(RedisModuleIO *io, size_t *lenptr) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_SaveDouble)(RedisModuleIO *io, double value) REDISMODULE_ATTR; +REDISMODULE_API double (*RedisModule_LoadDouble)(RedisModuleIO *io) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_SaveFloat)(RedisModuleIO *io, float value) REDISMODULE_ATTR; +REDISMODULE_API float (*RedisModule_LoadFloat)(RedisModuleIO *io) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_SaveLongDouble)(RedisModuleIO *io, long double value) REDISMODULE_ATTR; +REDISMODULE_API long double (*RedisModule_LoadLongDouble)(RedisModuleIO *io) REDISMODULE_ATTR; +REDISMODULE_API void * (*RedisModule_LoadDataTypeFromString)(const RedisModuleString *str, const RedisModuleType *mt) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_SaveDataTypeToString)(RedisModuleCtx *ctx, void *data, const RedisModuleType *mt) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_Log)(RedisModuleCtx *ctx, const char *level, const char *fmt, ...) REDISMODULE_ATTR REDISMODULE_ATTR_PRINTF(3,4); +REDISMODULE_API void (*RedisModule_LogIOError)(RedisModuleIO *io, const char *levelstr, const char *fmt, ...) REDISMODULE_ATTR REDISMODULE_ATTR_PRINTF(3,4); +REDISMODULE_API void (*RedisModule__Assert)(const char *estr, const char *file, int line) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_LatencyAddSample)(const char *event, mstime_t latency) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StringAppendBuffer)(RedisModuleCtx *ctx, RedisModuleString *str, const char *buf, size_t len) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_RetainString)(RedisModuleCtx *ctx, RedisModuleString *str) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_HoldString)(RedisModuleCtx *ctx, RedisModuleString *str) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StringCompare)(RedisModuleString *a, RedisModuleString *b) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleCtx * (*RedisModule_GetContextFromIO)(RedisModuleIO *io) REDISMODULE_ATTR; +REDISMODULE_API const RedisModuleString * (*RedisModule_GetKeyNameFromIO)(RedisModuleIO *io) REDISMODULE_ATTR; +REDISMODULE_API const RedisModuleString * (*RedisModule_GetKeyNameFromModuleKey)(RedisModuleKey *key) REDISMODULE_ATTR; +REDISMODULE_API long long (*RedisModule_Milliseconds)(void) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_DigestAddStringBuffer)(RedisModuleDigest *md, unsigned char *ele, size_t len) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_DigestAddLongLong)(RedisModuleDigest *md, long long ele) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_DigestEndSequence)(RedisModuleDigest *md) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleDict * (*RedisModule_CreateDict)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_FreeDict)(RedisModuleCtx *ctx, RedisModuleDict *d) REDISMODULE_ATTR; +REDISMODULE_API uint64_t (*RedisModule_DictSize)(RedisModuleDict *d) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_DictSetC)(RedisModuleDict *d, void *key, size_t keylen, void *ptr) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_DictReplaceC)(RedisModuleDict *d, void *key, size_t keylen, void *ptr) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_DictSet)(RedisModuleDict *d, RedisModuleString *key, void *ptr) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_DictReplace)(RedisModuleDict *d, RedisModuleString *key, void *ptr) REDISMODULE_ATTR; +REDISMODULE_API void * (*RedisModule_DictGetC)(RedisModuleDict *d, void *key, size_t keylen, int *nokey) REDISMODULE_ATTR; +REDISMODULE_API void * (*RedisModule_DictGet)(RedisModuleDict *d, RedisModuleString *key, int *nokey) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_DictDelC)(RedisModuleDict *d, void *key, size_t keylen, void *oldval) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_DictDel)(RedisModuleDict *d, RedisModuleString *key, void *oldval) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleDictIter * (*RedisModule_DictIteratorStartC)(RedisModuleDict *d, const char *op, void *key, size_t keylen) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleDictIter * (*RedisModule_DictIteratorStart)(RedisModuleDict *d, const char *op, RedisModuleString *key) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_DictIteratorStop)(RedisModuleDictIter *di) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_DictIteratorReseekC)(RedisModuleDictIter *di, const char *op, void *key, size_t keylen) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_DictIteratorReseek)(RedisModuleDictIter *di, const char *op, RedisModuleString *key) REDISMODULE_ATTR; +REDISMODULE_API void * (*RedisModule_DictNextC)(RedisModuleDictIter *di, size_t *keylen, void **dataptr) REDISMODULE_ATTR; +REDISMODULE_API void * (*RedisModule_DictPrevC)(RedisModuleDictIter *di, size_t *keylen, void **dataptr) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_DictNext)(RedisModuleCtx *ctx, RedisModuleDictIter *di, void **dataptr) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_DictPrev)(RedisModuleCtx *ctx, RedisModuleDictIter *di, void **dataptr) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_DictCompareC)(RedisModuleDictIter *di, const char *op, void *key, size_t keylen) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_DictCompare)(RedisModuleDictIter *di, const char *op, RedisModuleString *key) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_RegisterInfoFunc)(RedisModuleCtx *ctx, RedisModuleInfoFunc cb) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_InfoAddSection)(RedisModuleInfoCtx *ctx, char *name) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_InfoBeginDictField)(RedisModuleInfoCtx *ctx, char *name) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_InfoEndDictField)(RedisModuleInfoCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_InfoAddFieldString)(RedisModuleInfoCtx *ctx, char *field, RedisModuleString *value) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_InfoAddFieldCString)(RedisModuleInfoCtx *ctx, char *field, char *value) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_InfoAddFieldDouble)(RedisModuleInfoCtx *ctx, char *field, double value) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_InfoAddFieldLongLong)(RedisModuleInfoCtx *ctx, char *field, long long value) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_InfoAddFieldULongLong)(RedisModuleInfoCtx *ctx, char *field, unsigned long long value) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleServerInfoData * (*RedisModule_GetServerInfo)(RedisModuleCtx *ctx, const char *section) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_FreeServerInfo)(RedisModuleCtx *ctx, RedisModuleServerInfoData *data) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_ServerInfoGetField)(RedisModuleCtx *ctx, RedisModuleServerInfoData *data, const char* field) REDISMODULE_ATTR; +REDISMODULE_API const char * (*RedisModule_ServerInfoGetFieldC)(RedisModuleServerInfoData *data, const char* field) REDISMODULE_ATTR; +REDISMODULE_API long long (*RedisModule_ServerInfoGetFieldSigned)(RedisModuleServerInfoData *data, const char* field, int *out_err) REDISMODULE_ATTR; +REDISMODULE_API unsigned long long (*RedisModule_ServerInfoGetFieldUnsigned)(RedisModuleServerInfoData *data, const char* field, int *out_err) REDISMODULE_ATTR; +REDISMODULE_API double (*RedisModule_ServerInfoGetFieldDouble)(RedisModuleServerInfoData *data, const char* field, int *out_err) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_SubscribeToServerEvent)(RedisModuleCtx *ctx, RedisModuleEvent event, RedisModuleEventCallback callback) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_SetLRU)(RedisModuleKey *key, mstime_t lru_idle) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_GetLRU)(RedisModuleKey *key, mstime_t *lru_idle) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_SetLFU)(RedisModuleKey *key, long long lfu_freq) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_GetLFU)(RedisModuleKey *key, long long *lfu_freq) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleBlockedClient * (*RedisModule_BlockClientOnKeys)(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(RedisModuleCtx*,void*), long long timeout_ms, RedisModuleString **keys, int numkeys, void *privdata) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_SignalKeyAsReady)(RedisModuleCtx *ctx, RedisModuleString *key) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_GetBlockedClientReadyKey)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleScanCursor * (*RedisModule_ScanCursorCreate)() REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_ScanCursorRestart)(RedisModuleScanCursor *cursor) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_ScanCursorDestroy)(RedisModuleScanCursor *cursor) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_Scan)(RedisModuleCtx *ctx, RedisModuleScanCursor *cursor, RedisModuleScanCB fn, void *privdata) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ScanKey)(RedisModuleKey *key, RedisModuleScanCursor *cursor, RedisModuleScanKeyCB fn, void *privdata) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_GetContextFlagsAll)() REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_GetKeyspaceNotificationFlagsAll)() REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_IsSubEventSupported)(RedisModuleEvent event, uint64_t subevent) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_GetServerVersion)() REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_GetTypeMethodVersion)() REDISMODULE_ATTR; + +/* Experimental APIs */ +#ifdef REDISMODULE_EXPERIMENTAL_API +#define REDISMODULE_EXPERIMENTAL_API_VERSION 3 +REDISMODULE_API RedisModuleBlockedClient * (*RedisModule_BlockClient)(RedisModuleCtx *ctx, RedisModuleCmdFunc reply_callback, RedisModuleCmdFunc timeout_callback, void (*free_privdata)(RedisModuleCtx*,void*), long long timeout_ms) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_UnblockClient)(RedisModuleBlockedClient *bc, void *privdata) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_IsBlockedReplyRequest)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_IsBlockedTimeoutRequest)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API void * (*RedisModule_GetBlockedClientPrivateData)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleBlockedClient * (*RedisModule_GetBlockedClientHandle)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_AbortBlock)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_BlockedClientMeasureTimeStart)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_BlockedClientMeasureTimeEnd)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleCtx * (*RedisModule_GetThreadSafeContext)(RedisModuleBlockedClient *bc) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleCtx * (*RedisModule_GetDetachedThreadSafeContext)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_FreeThreadSafeContext)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_ThreadSafeContextLock)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ThreadSafeContextTryLock)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_ThreadSafeContextUnlock)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_SubscribeToKeyspaceEvents)(RedisModuleCtx *ctx, int types, RedisModuleNotificationFunc cb) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_NotifyKeyspaceEvent)(RedisModuleCtx *ctx, int type, const char *event, RedisModuleString *key) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_GetNotifyKeyspaceEvents)() REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_BlockedClientDisconnected)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_RegisterClusterMessageReceiver)(RedisModuleCtx *ctx, uint8_t type, RedisModuleClusterMessageReceiver callback) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_SendClusterMessage)(RedisModuleCtx *ctx, char *target_id, uint8_t type, unsigned char *msg, uint32_t len) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_GetClusterNodeInfo)(RedisModuleCtx *ctx, const char *id, char *ip, char *master_id, int *port, int *flags) REDISMODULE_ATTR; +REDISMODULE_API char ** (*RedisModule_GetClusterNodesList)(RedisModuleCtx *ctx, size_t *numnodes) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_FreeClusterNodesList)(char **ids) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleTimerID (*RedisModule_CreateTimer)(RedisModuleCtx *ctx, mstime_t period, RedisModuleTimerProc callback, void *data) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_StopTimer)(RedisModuleCtx *ctx, RedisModuleTimerID id, void **data) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_GetTimerInfo)(RedisModuleCtx *ctx, RedisModuleTimerID id, uint64_t *remaining, void **data) REDISMODULE_ATTR; +REDISMODULE_API const char * (*RedisModule_GetMyClusterID)(void) REDISMODULE_ATTR; +REDISMODULE_API size_t (*RedisModule_GetClusterSize)(void) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_GetRandomBytes)(unsigned char *dst, size_t len) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_GetRandomHexChars)(char *dst, size_t len) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_SetDisconnectCallback)(RedisModuleBlockedClient *bc, RedisModuleDisconnectFunc callback) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_SetClusterFlags)(RedisModuleCtx *ctx, uint64_t flags) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ExportSharedAPI)(RedisModuleCtx *ctx, const char *apiname, void *func) REDISMODULE_ATTR; +REDISMODULE_API void * (*RedisModule_GetSharedAPI)(RedisModuleCtx *ctx, const char *apiname) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleCommandFilter * (*RedisModule_RegisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilterFunc cb, int flags) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_UnregisterCommandFilter)(RedisModuleCtx *ctx, RedisModuleCommandFilter *filter) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_CommandFilterArgsCount)(RedisModuleCommandFilterCtx *fctx) REDISMODULE_ATTR; +REDISMODULE_API const RedisModuleString * (*RedisModule_CommandFilterArgGet)(RedisModuleCommandFilterCtx *fctx, int pos) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_CommandFilterArgInsert)(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_CommandFilterArgReplace)(RedisModuleCommandFilterCtx *fctx, int pos, RedisModuleString *arg) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_CommandFilterArgDelete)(RedisModuleCommandFilterCtx *fctx, int pos) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_Fork)(RedisModuleForkDoneHandler cb, void *user_data) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_SendChildHeartbeat)(double progress) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_ExitFromChild)(int retcode) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_KillForkChild)(int child_pid) REDISMODULE_ATTR; +REDISMODULE_API float (*RedisModule_GetUsedMemoryRatio)() REDISMODULE_ATTR; +REDISMODULE_API size_t (*RedisModule_MallocSize)(void* ptr) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleUser * (*RedisModule_CreateModuleUser)(const char *name) REDISMODULE_ATTR; +REDISMODULE_API void (*RedisModule_FreeModuleUser)(RedisModuleUser *user) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_SetModuleUserACL)(RedisModuleUser *user, const char* acl) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_AuthenticateClientWithACLUser)(RedisModuleCtx *ctx, const char *name, size_t len, RedisModuleUserChangedFunc callback, void *privdata, uint64_t *client_id) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_AuthenticateClientWithUser)(RedisModuleCtx *ctx, RedisModuleUser *user, RedisModuleUserChangedFunc callback, void *privdata, uint64_t *client_id) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_DeauthenticateAndCloseClient)(RedisModuleCtx *ctx, uint64_t client_id) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString * (*RedisModule_GetClientCertificate)(RedisModuleCtx *ctx, uint64_t id) REDISMODULE_ATTR; +REDISMODULE_API int *(*RedisModule_GetCommandKeys)(RedisModuleCtx *ctx, RedisModuleString **argv, int argc, int *num_keys) REDISMODULE_ATTR; +REDISMODULE_API const char *(*RedisModule_GetCurrentCommandName)(RedisModuleCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_RegisterDefragFunc)(RedisModuleCtx *ctx, RedisModuleDefragFunc func) REDISMODULE_ATTR; +REDISMODULE_API void *(*RedisModule_DefragAlloc)(RedisModuleDefragCtx *ctx, void *ptr) REDISMODULE_ATTR; +REDISMODULE_API RedisModuleString *(*RedisModule_DefragRedisModuleString)(RedisModuleDefragCtx *ctx, RedisModuleString *str) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_DefragShouldStop)(RedisModuleDefragCtx *ctx) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_DefragCursorSet)(RedisModuleDefragCtx *ctx, unsigned long cursor) REDISMODULE_ATTR; +REDISMODULE_API int (*RedisModule_DefragCursorGet)(RedisModuleDefragCtx *ctx, unsigned long *cursor) REDISMODULE_ATTR; +#endif + +#define RedisModule_IsAOFClient(id) ((id) == UINT64_MAX) + +/* This is included inline inside each Redis module. */ +static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) REDISMODULE_ATTR_UNUSED; +static int RedisModule_Init(RedisModuleCtx *ctx, const char *name, int ver, int apiver) { + void *getapifuncptr = ((void**)ctx)[0]; + RedisModule_GetApi = (int (*)(const char *, void *)) (unsigned long)getapifuncptr; + REDISMODULE_GET_API(Alloc); + REDISMODULE_GET_API(Calloc); + REDISMODULE_GET_API(Free); + REDISMODULE_GET_API(Realloc); + REDISMODULE_GET_API(Strdup); + REDISMODULE_GET_API(CreateCommand); + REDISMODULE_GET_API(SetModuleAttribs); + REDISMODULE_GET_API(IsModuleNameBusy); + REDISMODULE_GET_API(WrongArity); + REDISMODULE_GET_API(ReplyWithLongLong); + REDISMODULE_GET_API(ReplyWithError); + REDISMODULE_GET_API(ReplyWithSimpleString); + REDISMODULE_GET_API(ReplyWithArray); + REDISMODULE_GET_API(ReplyWithNullArray); + REDISMODULE_GET_API(ReplyWithEmptyArray); + REDISMODULE_GET_API(ReplySetArrayLength); + REDISMODULE_GET_API(ReplyWithStringBuffer); + REDISMODULE_GET_API(ReplyWithCString); + REDISMODULE_GET_API(ReplyWithString); + REDISMODULE_GET_API(ReplyWithEmptyString); + REDISMODULE_GET_API(ReplyWithVerbatimString); + REDISMODULE_GET_API(ReplyWithNull); + REDISMODULE_GET_API(ReplyWithCallReply); + REDISMODULE_GET_API(ReplyWithDouble); + REDISMODULE_GET_API(ReplyWithLongDouble); + REDISMODULE_GET_API(GetSelectedDb); + REDISMODULE_GET_API(SelectDb); + REDISMODULE_GET_API(OpenKey); + REDISMODULE_GET_API(CloseKey); + REDISMODULE_GET_API(KeyType); + REDISMODULE_GET_API(ValueLength); + REDISMODULE_GET_API(ListPush); + REDISMODULE_GET_API(ListPop); + REDISMODULE_GET_API(StringToLongLong); + REDISMODULE_GET_API(StringToDouble); + REDISMODULE_GET_API(StringToLongDouble); + REDISMODULE_GET_API(StringToStreamID); + REDISMODULE_GET_API(Call); + REDISMODULE_GET_API(CallReplyProto); + REDISMODULE_GET_API(FreeCallReply); + REDISMODULE_GET_API(CallReplyInteger); + REDISMODULE_GET_API(CallReplyType); + REDISMODULE_GET_API(CallReplyLength); + REDISMODULE_GET_API(CallReplyArrayElement); + REDISMODULE_GET_API(CallReplyStringPtr); + REDISMODULE_GET_API(CreateStringFromCallReply); + REDISMODULE_GET_API(CreateString); + REDISMODULE_GET_API(CreateStringFromLongLong); + REDISMODULE_GET_API(CreateStringFromDouble); + REDISMODULE_GET_API(CreateStringFromLongDouble); + REDISMODULE_GET_API(CreateStringFromString); + REDISMODULE_GET_API(CreateStringFromStreamID); + REDISMODULE_GET_API(CreateStringPrintf); + REDISMODULE_GET_API(FreeString); + REDISMODULE_GET_API(StringPtrLen); + REDISMODULE_GET_API(AutoMemory); + REDISMODULE_GET_API(Replicate); + REDISMODULE_GET_API(ReplicateVerbatim); + REDISMODULE_GET_API(DeleteKey); + REDISMODULE_GET_API(UnlinkKey); + REDISMODULE_GET_API(StringSet); + REDISMODULE_GET_API(StringDMA); + REDISMODULE_GET_API(StringTruncate); + REDISMODULE_GET_API(GetExpire); + REDISMODULE_GET_API(SetExpire); + REDISMODULE_GET_API(GetAbsExpire); + REDISMODULE_GET_API(SetAbsExpire); + REDISMODULE_GET_API(ResetDataset); + REDISMODULE_GET_API(DbSize); + REDISMODULE_GET_API(RandomKey); + REDISMODULE_GET_API(ZsetAdd); + REDISMODULE_GET_API(ZsetIncrby); + REDISMODULE_GET_API(ZsetScore); + REDISMODULE_GET_API(ZsetRem); + REDISMODULE_GET_API(ZsetRangeStop); + REDISMODULE_GET_API(ZsetFirstInScoreRange); + REDISMODULE_GET_API(ZsetLastInScoreRange); + REDISMODULE_GET_API(ZsetFirstInLexRange); + REDISMODULE_GET_API(ZsetLastInLexRange); + REDISMODULE_GET_API(ZsetRangeCurrentElement); + REDISMODULE_GET_API(ZsetRangeNext); + REDISMODULE_GET_API(ZsetRangePrev); + REDISMODULE_GET_API(ZsetRangeEndReached); + REDISMODULE_GET_API(HashSet); + REDISMODULE_GET_API(HashGet); + REDISMODULE_GET_API(StreamAdd); + REDISMODULE_GET_API(StreamDelete); + REDISMODULE_GET_API(StreamIteratorStart); + REDISMODULE_GET_API(StreamIteratorStop); + REDISMODULE_GET_API(StreamIteratorNextID); + REDISMODULE_GET_API(StreamIteratorNextField); + REDISMODULE_GET_API(StreamIteratorDelete); + REDISMODULE_GET_API(StreamTrimByLength); + REDISMODULE_GET_API(StreamTrimByID); + REDISMODULE_GET_API(IsKeysPositionRequest); + REDISMODULE_GET_API(KeyAtPos); + REDISMODULE_GET_API(GetClientId); + REDISMODULE_GET_API(GetClientUserNameById); + REDISMODULE_GET_API(GetContextFlags); + REDISMODULE_GET_API(AvoidReplicaTraffic); + REDISMODULE_GET_API(PoolAlloc); + REDISMODULE_GET_API(CreateDataType); + REDISMODULE_GET_API(ModuleTypeSetValue); + REDISMODULE_GET_API(ModuleTypeReplaceValue); + REDISMODULE_GET_API(ModuleTypeGetType); + REDISMODULE_GET_API(ModuleTypeGetValue); + REDISMODULE_GET_API(IsIOError); + REDISMODULE_GET_API(SetModuleOptions); + REDISMODULE_GET_API(SignalModifiedKey); + REDISMODULE_GET_API(SaveUnsigned); + REDISMODULE_GET_API(LoadUnsigned); + REDISMODULE_GET_API(SaveSigned); + REDISMODULE_GET_API(LoadSigned); + REDISMODULE_GET_API(SaveString); + REDISMODULE_GET_API(SaveStringBuffer); + REDISMODULE_GET_API(LoadString); + REDISMODULE_GET_API(LoadStringBuffer); + REDISMODULE_GET_API(SaveDouble); + REDISMODULE_GET_API(LoadDouble); + REDISMODULE_GET_API(SaveFloat); + REDISMODULE_GET_API(LoadFloat); + REDISMODULE_GET_API(SaveLongDouble); + REDISMODULE_GET_API(LoadLongDouble); + REDISMODULE_GET_API(SaveDataTypeToString); + REDISMODULE_GET_API(LoadDataTypeFromString); + REDISMODULE_GET_API(EmitAOF); + REDISMODULE_GET_API(Log); + REDISMODULE_GET_API(LogIOError); + REDISMODULE_GET_API(_Assert); + REDISMODULE_GET_API(LatencyAddSample); + REDISMODULE_GET_API(StringAppendBuffer); + REDISMODULE_GET_API(RetainString); + REDISMODULE_GET_API(HoldString); + REDISMODULE_GET_API(StringCompare); + REDISMODULE_GET_API(GetContextFromIO); + REDISMODULE_GET_API(GetKeyNameFromIO); + REDISMODULE_GET_API(GetKeyNameFromModuleKey); + REDISMODULE_GET_API(Milliseconds); + REDISMODULE_GET_API(DigestAddStringBuffer); + REDISMODULE_GET_API(DigestAddLongLong); + REDISMODULE_GET_API(DigestEndSequence); + REDISMODULE_GET_API(CreateDict); + REDISMODULE_GET_API(FreeDict); + REDISMODULE_GET_API(DictSize); + REDISMODULE_GET_API(DictSetC); + REDISMODULE_GET_API(DictReplaceC); + REDISMODULE_GET_API(DictSet); + REDISMODULE_GET_API(DictReplace); + REDISMODULE_GET_API(DictGetC); + REDISMODULE_GET_API(DictGet); + REDISMODULE_GET_API(DictDelC); + REDISMODULE_GET_API(DictDel); + REDISMODULE_GET_API(DictIteratorStartC); + REDISMODULE_GET_API(DictIteratorStart); + REDISMODULE_GET_API(DictIteratorStop); + REDISMODULE_GET_API(DictIteratorReseekC); + REDISMODULE_GET_API(DictIteratorReseek); + REDISMODULE_GET_API(DictNextC); + REDISMODULE_GET_API(DictPrevC); + REDISMODULE_GET_API(DictNext); + REDISMODULE_GET_API(DictPrev); + REDISMODULE_GET_API(DictCompare); + REDISMODULE_GET_API(DictCompareC); + REDISMODULE_GET_API(RegisterInfoFunc); + REDISMODULE_GET_API(InfoAddSection); + REDISMODULE_GET_API(InfoBeginDictField); + REDISMODULE_GET_API(InfoEndDictField); + REDISMODULE_GET_API(InfoAddFieldString); + REDISMODULE_GET_API(InfoAddFieldCString); + REDISMODULE_GET_API(InfoAddFieldDouble); + REDISMODULE_GET_API(InfoAddFieldLongLong); + REDISMODULE_GET_API(InfoAddFieldULongLong); + REDISMODULE_GET_API(GetServerInfo); + REDISMODULE_GET_API(FreeServerInfo); + REDISMODULE_GET_API(ServerInfoGetField); + REDISMODULE_GET_API(ServerInfoGetFieldC); + REDISMODULE_GET_API(ServerInfoGetFieldSigned); + REDISMODULE_GET_API(ServerInfoGetFieldUnsigned); + REDISMODULE_GET_API(ServerInfoGetFieldDouble); + REDISMODULE_GET_API(GetClientInfoById); + REDISMODULE_GET_API(PublishMessage); + REDISMODULE_GET_API(SubscribeToServerEvent); + REDISMODULE_GET_API(SetLRU); + REDISMODULE_GET_API(GetLRU); + REDISMODULE_GET_API(SetLFU); + REDISMODULE_GET_API(GetLFU); + REDISMODULE_GET_API(BlockClientOnKeys); + REDISMODULE_GET_API(SignalKeyAsReady); + REDISMODULE_GET_API(GetBlockedClientReadyKey); + REDISMODULE_GET_API(ScanCursorCreate); + REDISMODULE_GET_API(ScanCursorRestart); + REDISMODULE_GET_API(ScanCursorDestroy); + REDISMODULE_GET_API(Scan); + REDISMODULE_GET_API(ScanKey); + REDISMODULE_GET_API(GetContextFlagsAll); + REDISMODULE_GET_API(GetKeyspaceNotificationFlagsAll); + REDISMODULE_GET_API(IsSubEventSupported); + REDISMODULE_GET_API(GetServerVersion); + REDISMODULE_GET_API(GetTypeMethodVersion); + +#ifdef REDISMODULE_EXPERIMENTAL_API + REDISMODULE_GET_API(GetThreadSafeContext); + REDISMODULE_GET_API(GetDetachedThreadSafeContext); + REDISMODULE_GET_API(FreeThreadSafeContext); + REDISMODULE_GET_API(ThreadSafeContextLock); + REDISMODULE_GET_API(ThreadSafeContextTryLock); + REDISMODULE_GET_API(ThreadSafeContextUnlock); + REDISMODULE_GET_API(BlockClient); + REDISMODULE_GET_API(UnblockClient); + REDISMODULE_GET_API(IsBlockedReplyRequest); + REDISMODULE_GET_API(IsBlockedTimeoutRequest); + REDISMODULE_GET_API(GetBlockedClientPrivateData); + REDISMODULE_GET_API(GetBlockedClientHandle); + REDISMODULE_GET_API(AbortBlock); + REDISMODULE_GET_API(BlockedClientMeasureTimeStart); + REDISMODULE_GET_API(BlockedClientMeasureTimeEnd); + REDISMODULE_GET_API(SetDisconnectCallback); + REDISMODULE_GET_API(SubscribeToKeyspaceEvents); + REDISMODULE_GET_API(NotifyKeyspaceEvent); + REDISMODULE_GET_API(GetNotifyKeyspaceEvents); + REDISMODULE_GET_API(BlockedClientDisconnected); + REDISMODULE_GET_API(RegisterClusterMessageReceiver); + REDISMODULE_GET_API(SendClusterMessage); + REDISMODULE_GET_API(GetClusterNodeInfo); + REDISMODULE_GET_API(GetClusterNodesList); + REDISMODULE_GET_API(FreeClusterNodesList); + REDISMODULE_GET_API(CreateTimer); + REDISMODULE_GET_API(StopTimer); + REDISMODULE_GET_API(GetTimerInfo); + REDISMODULE_GET_API(GetMyClusterID); + REDISMODULE_GET_API(GetClusterSize); + REDISMODULE_GET_API(GetRandomBytes); + REDISMODULE_GET_API(GetRandomHexChars); + REDISMODULE_GET_API(SetClusterFlags); + REDISMODULE_GET_API(ExportSharedAPI); + REDISMODULE_GET_API(GetSharedAPI); + REDISMODULE_GET_API(RegisterCommandFilter); + REDISMODULE_GET_API(UnregisterCommandFilter); + REDISMODULE_GET_API(CommandFilterArgsCount); + REDISMODULE_GET_API(CommandFilterArgGet); + REDISMODULE_GET_API(CommandFilterArgInsert); + REDISMODULE_GET_API(CommandFilterArgReplace); + REDISMODULE_GET_API(CommandFilterArgDelete); + REDISMODULE_GET_API(Fork); + REDISMODULE_GET_API(SendChildHeartbeat); + REDISMODULE_GET_API(ExitFromChild); + REDISMODULE_GET_API(KillForkChild); + REDISMODULE_GET_API(GetUsedMemoryRatio); + REDISMODULE_GET_API(MallocSize); + REDISMODULE_GET_API(CreateModuleUser); + REDISMODULE_GET_API(FreeModuleUser); + REDISMODULE_GET_API(SetModuleUserACL); + REDISMODULE_GET_API(DeauthenticateAndCloseClient); + REDISMODULE_GET_API(AuthenticateClientWithACLUser); + REDISMODULE_GET_API(AuthenticateClientWithUser); + REDISMODULE_GET_API(GetClientCertificate); + REDISMODULE_GET_API(GetCommandKeys); + REDISMODULE_GET_API(GetCurrentCommandName); + REDISMODULE_GET_API(RegisterDefragFunc); + REDISMODULE_GET_API(DefragAlloc); + REDISMODULE_GET_API(DefragRedisModuleString); + REDISMODULE_GET_API(DefragShouldStop); + REDISMODULE_GET_API(DefragCursorSet); + REDISMODULE_GET_API(DefragCursorGet); +#endif + + if (RedisModule_IsModuleNameBusy && RedisModule_IsModuleNameBusy(name)) return REDISMODULE_ERR; + RedisModule_SetModuleAttribs(ctx,name,ver,apiver); + return REDISMODULE_OK; +} + +#define RedisModule_Assert(_e) ((_e)?(void)0 : (RedisModule__Assert(#_e,__FILE__,__LINE__),exit(1))) + +#define RMAPI_FUNC_SUPPORTED(func) (func != NULL) + +#else + +/* Things only defined for the modules core, not exported to modules + * including this file. */ +#define RedisModuleString robj + +#endif /* REDISMODULE_CORE */ + +#ifdef __cplusplus +} +#endif + +#endif /* REDISMODULE_H */ diff --git a/src/object.cpp b/src/object.cpp index d3122dbbd..fe7b04dfc 100644 --- a/src/object.cpp +++ b/src/object.cpp @@ -46,6 +46,7 @@ robj *createObject(int type, void *ptr) { char *oB = (char*)zcalloc(sizeof(robj)+mvccExtraBytes, MALLOC_SHARED); robj *o = reinterpret_cast(oB + mvccExtraBytes); + new (o) redisObject; o->type = type; o->encoding = OBJ_ENCODING_RAW; o->m_ptr = ptr; @@ -418,6 +419,7 @@ void decrRefCount(robj_roptr o) { case OBJ_NESTEDHASH: freeNestedHashObject(o); break; default: serverPanic("Unknown object type"); break; } + o->~redisObject(); if (g_pserver->fActiveReplica) { zfree(reinterpret_cast(o.unsafe_robjcast())-1); } else { @@ -1141,10 +1143,7 @@ struct redisMemOverhead *getMemoryOverheadData(void) { mh->db[mh->num_dbs].overhead_ht_main = mem; mem_total+=mem; - std::unique_lock ul(g_expireLock); - mem = db->setexpire()->estimated_bytes_used(); - mh->db[mh->num_dbs].overhead_ht_expires = mem; - mem_total+=mem; + mh->db[mh->num_dbs].overhead_ht_expires = 0; mh->num_dbs++; } @@ -1628,7 +1627,7 @@ robj *deserializeStoredStringObject(const char *data, size_t cb) return newObject; } -robj *deserializeStoredObjectCore(const void *data, size_t cb) +robj *deserializeStoredObject(const void *data, size_t cb) { switch (((char*)data)[0]) { @@ -1665,14 +1664,6 @@ robj *deserializeStoredObjectCore(const void *data, size_t cb) serverPanic("Unknown object type loading from storage"); } -robj *deserializeStoredObject(const redisDbPersistentData *db, const char *key, const void *data, size_t cb) -{ - robj *o = deserializeStoredObjectCore(data, cb); - std::unique_lock ul(g_expireLock); - o->SetFExpires(db->setexpire()->exists(key)); - return o; -} - sds serializeStoredObject(robj_roptr o, sds sdsPrefix) { switch (o->type) diff --git a/src/rdb.cpp b/src/rdb.cpp index 3f84a6b23..997ef67f3 100644 --- a/src/rdb.cpp +++ b/src/rdb.cpp @@ -1237,18 +1237,14 @@ int rdbSaveInfoAuxFields(rio *rdb, int rdbflags, rdbSaveInfo *rsi) { return 1; } -int saveKey(rio *rdb, const redisDbPersistentDataSnapshot *db, int flags, size_t *processed, const char *keystr, robj_roptr o) +int saveKey(rio *rdb, int flags, size_t *processed, const char *keystr, robj_roptr o) { redisObjectStack key; initStaticStringObject(key,(char*)keystr); - std::unique_lock ul(g_expireLock, std::defer_lock); const expireEntry *pexpire = nullptr; - if (o->FExpires()) - { - ul.lock(); - pexpire = db->getExpire(&key); - serverAssert((o->FExpires() && pexpire != nullptr) || (!o->FExpires() && pexpire == nullptr)); + if (o->FExpires()) { + pexpire = &o->expire; } if (rdbSaveKeyValuePair(rdb,&key,o,pexpire) == -1) @@ -1355,7 +1351,7 @@ int rdbSaveRio(rio *rdb, const redisDbPersistentDataSnapshot **rgpdb, int *error if (o->FExpires()) ++ckeysExpired; - if (!saveKey(rdb, db, rdbflags, &processed, keystr, o)) + if (!saveKey(rdb, rdbflags, &processed, keystr, o)) return false; /* Update child info every 1 second (approximately). @@ -2546,7 +2542,7 @@ robj *rdbLoadObject(int rdbtype, rio *rdb, sds key, int *error, uint64_t mvcc_ts * encoding version in the lower 10 bits of the module ID. */ void *ptr = mt->rdb_load(&io,moduleid&1023); if (io.ctx) { - moduleFreeContext(io.ctx); + moduleFreeContext(io.ctx, false /* propogate */); zfree(io.ctx); } diff --git a/src/readwritelock.h b/src/readwritelock.h index 79f0ac710..05385e4e7 100644 --- a/src/readwritelock.h +++ b/src/readwritelock.h @@ -8,38 +8,46 @@ class readWriteLock { int m_readCount = 0; int m_writeCount = 0; bool m_writeWaiting = false; - bool m_notify = true; + bool m_multi = true; public: readWriteLock(const char *name) : m_readLock(name), m_writeLock(name) {} void acquireRead() { - std::unique_lock rm(m_readLock); - while (m_writeCount > 0 || m_writeWaiting) - m_cv.wait(rm); + std::unique_lock rm(m_readLock, std::defer_lock); + if (m_multi) { + rm.lock(); + while (m_writeCount > 0 || m_writeWaiting) + m_cv.wait(rm); + } m_readCount++; } bool tryAcquireRead() { std::unique_lock rm(m_readLock, std::defer_lock); - if (!rm.try_lock()) - return false; - if (m_writeCount > 0 || m_writeWaiting) - return false; + if (m_multi) { + if (!rm.try_lock()) + return false; + if (m_writeCount > 0 || m_writeWaiting) + return false; + } m_readCount++; return true; } void acquireWrite(bool exclusive = true) { - std::unique_lock rm(m_readLock); - m_writeWaiting = true; - while (m_readCount > 0) - m_cv.wait(rm); - if (exclusive) { - /* Another thread might have the write lock while we have the read lock - but won't be able to release it until they can acquire the read lock - so release the read lock and try again instead of waiting to avoid deadlock */ - while(!m_writeLock.try_lock()) + std::unique_lock rm(m_readLock, std::defer_lock); + if (m_multi) { + rm.lock(); + m_writeWaiting = true; + while (m_readCount > 0) m_cv.wait(rm); + if (exclusive) { + /* Another thread might have the write lock while we have the read lock + but won't be able to release it until they can acquire the read lock + so release the read lock and try again instead of waiting to avoid deadlock */ + while(!m_writeLock.try_lock()) + m_cv.wait(rm); + } } m_writeCount++; m_writeWaiting = false; @@ -52,32 +60,38 @@ class readWriteLock { bool tryAcquireWrite(bool exclusive = true) { std::unique_lock rm(m_readLock, std::defer_lock); - if (!rm.try_lock()) - return false; - if (m_readCount > 0) - return false; - if (exclusive) - if (!m_writeLock.try_lock()) + if (m_multi) { + if (!rm.try_lock()) return false; + if (m_readCount > 0) + return false; + if (exclusive) + if (!m_writeLock.try_lock()) + return false; + } m_writeCount++; return true; } void releaseRead() { - std::unique_lock rm(m_readLock); - m_readCount--; - if (m_notify) + std::unique_lock rm(m_readLock, std::defer_lock); + if (m_multi) { + rm.lock(); m_cv.notify_all(); + } + m_readCount--; } void releaseWrite(bool exclusive = true) { - std::unique_lock rm(m_readLock); + std::unique_lock rm(m_readLock, std::defer_lock); serverAssert(m_writeCount > 0); - if (exclusive) - m_writeLock.unlock(); - m_writeCount--; - if (m_notify) + if (m_multi) { + rm.lock(); + if (exclusive) + m_writeLock.unlock(); m_cv.notify_all(); + } + m_writeCount--; } void downgradeWrite(bool exclusive = true) { @@ -85,8 +99,8 @@ class readWriteLock { acquireRead(); } - void setNotify(bool notify) { - m_notify = notify; + void setMulti(bool multi) { + m_multi = multi; } bool hasReader() { diff --git a/src/replication.cpp b/src/replication.cpp index ee3583692..5fc9d2ecd 100644 --- a/src/replication.cpp +++ b/src/replication.cpp @@ -188,6 +188,8 @@ int bg_unlink(const char *filename) { /* ---------------------------------- MASTER -------------------------------- */ bool createDiskBacklog() { + if (g_pserver->repl_backlog_disk != nullptr) + return true; // already exists // Lets create some disk backed pages and add them here std::string path = "./repl-backlog-temp" + std::to_string(gettid()); #if (defined __APPLE__ || defined __FreeBSD__) diff --git a/src/server.cpp b/src/server.cpp index b8dbbf5a8..1ef2344dc 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -1518,6 +1518,16 @@ dictType dbDictType = { dictGCAsyncFree /* async free destructor */ }; +dictType dbExpiresDictType = { + dictSdsHash, /* hash function */ + NULL, /* key dup */ + NULL, /* val dup */ + dictSdsKeyCompare, /* key compare */ + NULL, /* key destructor */ + NULL, /* val destructor */ + dictExpandAllowed /* allow to expand */ + }; + /* db->pdict, keys are sds strings, vals are Redis objects. */ dictType dbTombstoneDictType = { dictSdsHash, /* hash function */ @@ -1550,17 +1560,6 @@ dictType shaScriptObjectDictType = { NULL /* allow to expand */ }; -/* Db->expires */ -dictType dbExpiresDictType = { - dictSdsHash, /* hash function */ - NULL, /* key dup */ - NULL, /* val dup */ - dictSdsKeyCompare, /* key compare */ - NULL, /* key destructor */ - NULL, /* val destructor */ - dictExpandAllowed /* allow to expand */ -}; - /* Command table. sds string -> command struct pointer. */ dictType commandTableDictType = { dictSdsCaseHash, /* hash function */ @@ -1871,6 +1870,9 @@ VersionCompareResult compareVersion(SymVer *pver) if ((symVerThis.major == 0 && symVerThis.minor == 0 && symVerThis.build == 0) || (pver->major == 0 && pver->minor == 0 && pver->build == 0)) return VersionCompareResult::EqualVersion; + + if (pver->major <= 6 && pver->minor <= 3 && pver->build <= 3) + return VersionCompareResult::IncompatibleVersion; for (int iver = 0; iver < 3; ++iver) { @@ -2135,7 +2137,7 @@ void databasesCron(bool fMainThread) { ::dict *dict = g_pserver->db[rehash_db]->dictUnsafeKeyOnly(); /* Are we async rehashing? And if so is it time to re-calibrate? */ /* The recalibration limit is a prime number to ensure balancing across threads */ - if (rehashes_per_ms > 0 && async_rehashes < 131 && !cserver.active_defrag_enabled && cserver.cthreads > 1 && dictSize(dict) > 2048 && dictIsRehashing(dict) && !g_pserver->loading) { + if (g_pserver->enable_async_rehash && rehashes_per_ms > 0 && async_rehashes < 131 && !cserver.active_defrag_enabled && cserver.cthreads > 1 && dictSize(dict) > 2048 && dictIsRehashing(dict) && !g_pserver->loading && aeLockContention() > 1) { serverTL->rehashCtl = dictRehashAsyncStart(dict, rehashes_per_ms * ((1000 / g_pserver->hz) / 10)); // Estimate 10% CPU time spent in lock contention if (serverTL->rehashCtl) ++async_rehashes; @@ -5661,7 +5663,8 @@ sds genRedisInfoString(const char *section) { "lru_clock:%u\r\n" "executable:%s\r\n" "config_file:%s\r\n" - "availability_zone:%s\r\n", + "availability_zone:%s\r\n" + "features:%s\r\n", KEYDB_SET_VERSION, redisGitSHA1(), strtol(redisGitDirty(),NULL,10) > 0, @@ -5688,7 +5691,8 @@ sds genRedisInfoString(const char *section) { lruclock, cserver.executable ? cserver.executable : "", cserver.configfile ? cserver.configfile : "", - g_pserver->sdsAvailabilityZone); + g_pserver->sdsAvailabilityZone, + "cluster_mget"); } /* Clients */ diff --git a/src/server.h b/src/server.h index 9ad1aab8d..7983c6dc7 100644 --- a/src/server.h +++ b/src/server.h @@ -966,6 +966,7 @@ struct redisObjectExtended { typedef struct redisObject { friend redisObject *createEmbeddedStringObject(const char *ptr, size_t len); + friend redisObject *createObject(int type, void *ptr); protected: redisObject() {} @@ -978,6 +979,7 @@ typedef struct redisObject { private: mutable std::atomic refcount {0}; public: + expireEntry expire; void *m_ptr; inline bool FExpires() const { return refcount.load(std::memory_order_relaxed) >> 31; } @@ -988,7 +990,7 @@ typedef struct redisObject { void addref() const { refcount.fetch_add(1, std::memory_order_relaxed); } unsigned release() const { return refcount.fetch_sub(1, std::memory_order_seq_cst) & ~(1U << 31); } } robj; -static_assert(sizeof(redisObject) <= 16, "object size is critical, don't increase"); +static_assert(sizeof(redisObject) <= 24, "object size is critical, don't increase"); class redisObjectStack : public redisObjectExtended, public redisObject { @@ -1144,16 +1146,20 @@ class redisDbPersistentData dict_iter random(); - const expireEntry &random_expire() + const expireEntry *random_expire(sds *key) { - return m_setexpire->random_value(); + auto itr = random(); + if (itr->FExpires()) { + *key = itr.key(); + return &itr->expire; + } + return nullptr; } dict_iter end() { return dict_iter(nullptr, nullptr); } dict_const_iter end() const { return dict_const_iter(nullptr); } void getStats(char *buf, size_t bufsize) { dictGetStats(buf, bufsize, m_pdict); } - void getExpireStats(char *buf, size_t bufsize) { m_setexpire->getstats(buf, bufsize); } bool insert(char *k, robj *o, bool fAssumeNew = false, dict_iter *existing = nullptr); void tryResize(); @@ -1161,16 +1167,15 @@ class redisDbPersistentData void updateValue(dict_iter itr, robj *val); bool syncDelete(robj *key); bool asyncDelete(robj *key); - size_t expireSize() const { return m_setexpire->size(); } + size_t expireSize() const { return m_numexpires; } int removeExpire(robj *key, dict_iter itr); int removeSubkeyExpire(robj *key, robj *subkey); - void resortExpire(expireEntry &e); void clear(void(callback)(void*)); void emptyDbAsync(); // Note: If you do not need the obj then use the objless iterator version. It's faster bool iterate(std::function fn); void setExpire(robj *key, robj *subkey, long long when); - void setExpire(expireEntry &&e); + void setExpire(const char *key, expireEntry &&e); void initialize(); void prepOverwriteForSnapshot(char *key); @@ -1194,9 +1199,6 @@ class redisDbPersistentData // objects stored elsewhere dict *dictUnsafeKeyOnly() { return m_pdict; } - expireset *setexpireUnsafe() { return m_setexpire; } - const expireset *setexpire() const { return m_setexpire; } - const redisDbPersistentDataSnapshot *createSnapshot(uint64_t mvccCheckpoint, bool fOptional); void endSnapshot(const redisDbPersistentDataSnapshot *psnapshot); void endSnapshotAsync(const redisDbPersistentDataSnapshot *psnapshot); @@ -1218,6 +1220,8 @@ class redisDbPersistentData dict_iter find_cached_threadsafe(const char *key) const; + static void activeExpireCycleCore(int type); + protected: uint64_t m_mvccCheckpoint = 0; @@ -1240,7 +1244,7 @@ class redisDbPersistentData std::shared_ptr m_spstorage = nullptr; // Expire - expireset *m_setexpire = nullptr; + size_t m_numexpires = 0; // These two pointers are the same, UNLESS the database has been cleared. // in which case m_pdbSnapshot is NULL and we continue as though we weren' @@ -1310,7 +1314,7 @@ struct redisDb : public redisDbPersistentDataSnapshot friend int removeExpire(redisDb *db, robj *key); friend void setExpire(struct client *c, redisDb *db, robj *key, robj *subkey, long long when); friend void setExpire(client *c, redisDb *db, robj *key, expireEntry &&e); - friend int evictionPoolPopulate(int dbid, redisDb *db, expireset *setexpire, struct evictionPoolEntry *pool); + friend int evictionPoolPopulate(int dbid, redisDb *db, bool fVolatile, struct evictionPoolEntry *pool); friend void activeDefragCycle(void); friend void activeExpireCycle(int); friend void expireSlaveKeys(void); @@ -1319,9 +1323,7 @@ struct redisDb : public redisDbPersistentDataSnapshot typedef ::dict_const_iter const_iter; typedef ::dict_iter iter; - redisDb() - : expireitr(nullptr) - {} + redisDb() = default; void initialize(int id); void storageProviderInitialize(); @@ -1343,7 +1345,6 @@ struct redisDb : public redisDbPersistentDataSnapshot using redisDbPersistentData::random_expire; using redisDbPersistentData::end; using redisDbPersistentData::getStats; - using redisDbPersistentData::getExpireStats; using redisDbPersistentData::insert; using redisDbPersistentData::tryResize; using redisDbPersistentData::incrementallyRehash; @@ -1361,15 +1362,12 @@ struct redisDb : public redisDbPersistentDataSnapshot using redisDbPersistentData::processChanges; using redisDbPersistentData::processChangesAsync; using redisDbPersistentData::commitChanges; - using redisDbPersistentData::setexpireUnsafe; - using redisDbPersistentData::setexpire; using redisDbPersistentData::endSnapshot; using redisDbPersistentData::restoreSnapshot; using redisDbPersistentData::removeAllCachedValues; using redisDbPersistentData::disableKeyCache; using redisDbPersistentData::keycacheIsEnabled; using redisDbPersistentData::dictUnsafeKeyOnly; - using redisDbPersistentData::resortExpire; using redisDbPersistentData::prefetchKeysAsync; using redisDbPersistentData::prepOverwriteForSnapshot; using redisDbPersistentData::FRehashing; @@ -1386,7 +1384,7 @@ struct redisDb : public redisDbPersistentDataSnapshot return psnapshot; } - expireset::setiter expireitr; + unsigned long expires_cursor = 0; dict *blocking_keys; /* Keys with clients waiting for data (BLPOP)*/ dict *ready_keys; /* Blocked keys that received a PUSH */ dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */ @@ -2247,7 +2245,6 @@ struct redisServerConst { int maxidletime; /* Client timeout in seconds */ int tcpkeepalive; /* Set SO_KEEPALIVE if non-zero. */ int active_expire_enabled; /* Can be disabled for testing purposes. */ - int active_expire_effort; /* From 1 (default) to 10, active effort. */ int active_defrag_enabled; int jemalloc_bg_thread; /* Enable jemalloc background thread */ size_t active_defrag_ignore_bytes; /* minimum amount of fragmentation waste to start active defrag */ @@ -2309,6 +2306,7 @@ struct redisServer { rax *errors; /* Errors table */ int activerehashing; /* Incremental rehash in serverCron() */ int active_defrag_running; /* Active defragmentation running (holds current scan aggressiveness) */ + int enable_async_rehash = 1; /* Should we use the async rehash feature? */ int cronloops; /* Number of times the cron function run */ char runid[CONFIG_RUN_ID_SIZE+1]; /* ID always different at every exec. */ int sentinel_mode; /* True if this instance is a Sentinel. */ @@ -2357,6 +2355,7 @@ struct redisServer { unsigned int loading_process_events_interval_keys; int active_expire_enabled; /* Can be disabled for testing purposes. */ + int active_expire_effort; /* From 1 (default) to 10, active effort. */ int replicaIsolationFactor = 1; @@ -2920,7 +2919,7 @@ int moduleGetCommandKeysViaAPI(struct redisCommand *cmd, robj **argv, int argc, moduleType *moduleTypeLookupModuleByID(uint64_t id); void moduleTypeNameByID(char *name, uint64_t moduleid); const char *moduleTypeModuleName(moduleType *mt); -void moduleFreeContext(struct RedisModuleCtx *ctx); +void moduleFreeContext(struct RedisModuleCtx *ctx, bool propogate = true); void unblockClientFromModule(client *c); void moduleHandleBlockedClients(int iel); void moduleBlockedClientTimedOut(client *c); @@ -3180,8 +3179,8 @@ int equalStringObjects(robj *a, robj *b); unsigned long long estimateObjectIdleTime(robj_roptr o); void trimStringObjectIfNeeded(robj *o); -robj *deserializeStoredObject(const redisDbPersistentData *db, const char *key, const void *data, size_t cb); -std::unique_ptr deserializeExpire(sds key, const char *str, size_t cch, size_t *poffset); +robj *deserializeStoredObject(const void *data, size_t cb); +std::unique_ptr deserializeExpire(const char *str, size_t cch, size_t *poffset); sds serializeStoredObject(robj_roptr o, sds sdsPrefix = nullptr); #define sdsEncodedObject(objptr) (objptr->encoding == OBJ_ENCODING_RAW || objptr->encoding == OBJ_ENCODING_EMBSTR) diff --git a/src/snapshot.cpp b/src/snapshot.cpp index dca1071d4..94956ebc2 100644 --- a/src/snapshot.cpp +++ b/src/snapshot.cpp @@ -74,6 +74,7 @@ const redisDbPersistentDataSnapshot *redisDbPersistentData::createSnapshot(uint6 spdb->m_fTrackingChanges = 0; spdb->m_pdict = m_pdict; spdb->m_pdictTombstone = m_pdictTombstone; + spdb->m_numexpires = m_numexpires; // Add a fake iterator so the dicts don't rehash (they need to be read only) dictPauseRehashing(spdb->m_pdict); dictForceRehash(spdb->m_pdictTombstone); // prevent rehashing by finishing the rehash now @@ -83,12 +84,6 @@ const redisDbPersistentDataSnapshot *redisDbPersistentData::createSnapshot(uint6 spdb->m_pdbSnapshot = m_pdbSnapshot; spdb->m_refCount = 1; spdb->m_mvccCheckpoint = getMvccTstamp(); - if (m_setexpire != nullptr) - { - std::unique_lock ul(g_expireLock); - spdb->m_setexpire = new (MALLOC_LOCAL) expireset(*m_setexpire); - spdb->m_setexpire->pause_rehash(); // needs to be const - } if (dictIsRehashing(spdb->m_pdict) || dictIsRehashing(spdb->m_pdictTombstone)) { serverLog(LL_VERBOSE, "NOTICE: Suboptimal snapshot"); @@ -171,11 +166,6 @@ void redisDbPersistentData::restoreSnapshot(const redisDbPersistentDataSnapshot size_t expectedSize = psnapshot->size(); dictEmpty(m_pdict, nullptr); dictEmpty(m_pdictTombstone, nullptr); - { - std::unique_lock ul(g_expireLock); - delete m_setexpire; - m_setexpire = new (MALLOC_LOCAL) expireset(*psnapshot->m_setexpire); - } endSnapshot(psnapshot); serverAssert(size() == expectedSize); } @@ -597,8 +587,12 @@ bool redisDbPersistentDataSnapshot::iterate_threadsafe_core(std::function(data)+offset, cbData-offset); + std::unique_ptr spexpire = deserializeExpire((const char*)data, cbData, &offset); + o = deserializeStoredObject(reinterpret_cast(data)+offset, cbData-offset); + o->SetFExpires(spexpire != nullptr); + if (spexpire != nullptr) { + o->expire = std::move(*spexpire); + } } fContinue = fn(sdsKey, o); if (o != nullptr) diff --git a/src/storage/rocksdb.cpp b/src/storage/rocksdb.cpp index 76eaa133a..f433947ba 100644 --- a/src/storage/rocksdb.cpp +++ b/src/storage/rocksdb.cpp @@ -33,8 +33,8 @@ std::string prefixKey(const char *key, size_t cchKey) return FInternalKey(key, cchKey) ? std::string(key, cchKey) : getPrefix(keyHashSlot(key, cchKey)) + std::string(key, cchKey); } -RocksDBStorageProvider::RocksDBStorageProvider(RocksDBStorageFactory *pfactory, std::shared_ptr &spdb, std::shared_ptr &spcolfam, const rocksdb::Snapshot *psnapshot, size_t count) - : m_pfactory(pfactory), m_spdb(spdb), m_psnapshot(psnapshot), m_spcolfamily(spcolfam), m_count(count) +RocksDBStorageProvider::RocksDBStorageProvider(RocksDBStorageFactory *pfactory, std::shared_ptr &spdb, std::shared_ptr &spcolfam, std::shared_ptr &spexpirecolfam, const rocksdb::Snapshot *psnapshot, size_t count) + : m_pfactory(pfactory), m_spdb(spdb), m_psnapshot(psnapshot), m_spcolfamily(spcolfam), m_spexpirecolfamily(spexpirecolfam), m_count(count) { m_readOptionsTemplate = rocksdb::ReadOptions(); m_readOptionsTemplate.verify_checksums = false; @@ -211,11 +211,79 @@ bool RocksDBStorageProvider::enumerate_hashslot(callback fn, unsigned int hashsl return full_iter; } +void RocksDBStorageProvider::setExpire(const char *key, size_t cchKey, long long expire) +{ + rocksdb::Status status; + std::unique_lock l(m_lock); + std::string prefix((const char *)&expire,sizeof(long long)); + std::string strKey(key, cchKey); + if (m_spbatch != nullptr) + status = m_spbatch->Put(m_spexpirecolfamily.get(), rocksdb::Slice(prefix + strKey), rocksdb::Slice(strKey)); + else + status = m_spdb->Put(WriteOptions(), m_spexpirecolfamily.get(), rocksdb::Slice(prefix + strKey), rocksdb::Slice(strKey)); + if (!status.ok()) + throw status.ToString(); +} + +void RocksDBStorageProvider::removeExpire(const char *key, size_t cchKey, long long expire) +{ + rocksdb::Status status; + std::unique_lock l(m_lock); + std::string prefix((const char *)&expire,sizeof(long long)); + std::string strKey(key, cchKey); + std::string fullKey = prefix + strKey; + if (!FExpireExists(fullKey)) + return; + if (m_spbatch) + status = m_spbatch->Delete(m_spexpirecolfamily.get(), rocksdb::Slice(fullKey)); + else + status = m_spdb->Delete(WriteOptions(), m_spexpirecolfamily.get(), rocksdb::Slice(fullKey)); + if (!status.ok()) + throw status.ToString(); +} + +std::vector RocksDBStorageProvider::getExpirationCandidates(unsigned int count) +{ + std::vector result; + std::unique_ptr it = std::unique_ptr(m_spdb->NewIterator(ReadOptions(), m_spexpirecolfamily.get())); + for (it->SeekToFirst(); it->Valid() && result.size() < count; it->Next()) { + if (FInternalKey(it->key().data(), it->key().size())) + continue; + result.emplace_back(it->value().data(), it->value().size()); + } + return result; +} + +std::string randomHashSlot() { + return getPrefix(genrand64_int63() % (1 << 16)); +} + +std::vector RocksDBStorageProvider::getEvictionCandidates(unsigned int count) +{ + std::vector result; + if (g_pserver->maxmemory_policy & MAXMEMORY_FLAG_ALLKEYS) { + std::unique_ptr it = std::unique_ptr(m_spdb->NewIterator(ReadOptions(), m_spcolfamily.get())); + for (it->Seek(randomHashSlot()); it->Valid() && result.size() < count; it->Next()) { + if (FInternalKey(it->key().data(), it->key().size())) + continue; + result.emplace_back(it->key().data() + 2, it->key().size() - 2); + } + } else { + std::unique_ptr it = std::unique_ptr(m_spdb->NewIterator(ReadOptions(), m_spexpirecolfamily.get())); + for (it->SeekToFirst(); it->Valid() && result.size() < count; it->Next()) { + if (FInternalKey(it->key().data(), it->key().size())) + continue; + result.emplace_back(it->value().data(), it->value().size()); + } + } + return result; +} + const IStorage *RocksDBStorageProvider::clone() const { std::unique_lock l(m_lock); const rocksdb::Snapshot *psnapshot = const_cast(this)->m_spdb->GetSnapshot(); - return new RocksDBStorageProvider(m_pfactory, const_cast(this)->m_spdb, const_cast(this)->m_spcolfamily, psnapshot, m_count); + return new RocksDBStorageProvider(m_pfactory, const_cast(this)->m_spdb, const_cast(this)->m_spcolfamily, const_cast(this)->m_spexpirecolfamily, psnapshot, m_count); } RocksDBStorageProvider::~RocksDBStorageProvider() @@ -268,6 +336,7 @@ void RocksDBStorageProvider::batch_unlock() void RocksDBStorageProvider::flush() { m_spdb->SyncWAL(); + m_spdb->Flush(rocksdb::FlushOptions()); } bool RocksDBStorageProvider::FKeyExists(std::string& key) const @@ -276,4 +345,12 @@ bool RocksDBStorageProvider::FKeyExists(std::string& key) const if (m_spbatch) return m_spbatch->GetFromBatchAndDB(m_spdb.get(), ReadOptions(), m_spcolfamily.get(), rocksdb::Slice(key), &slice).ok(); return m_spdb->Get(ReadOptions(), m_spcolfamily.get(), rocksdb::Slice(key), &slice).ok(); +} + +bool RocksDBStorageProvider::FExpireExists(std::string& key) const +{ + rocksdb::PinnableSlice slice; + if (m_spbatch) + return m_spbatch->GetFromBatchAndDB(m_spdb.get(), ReadOptions(), m_spexpirecolfamily.get(), rocksdb::Slice(key), &slice).ok(); + return m_spdb->Get(ReadOptions(), m_spexpirecolfamily.get(), rocksdb::Slice(key), &slice).ok(); } \ No newline at end of file diff --git a/src/storage/rocksdb.h b/src/storage/rocksdb.h index b78788eb2..dd6196a55 100644 --- a/src/storage/rocksdb.h +++ b/src/storage/rocksdb.h @@ -10,6 +10,7 @@ static const char count_key[] = INTERNAL_KEY_PREFIX "__keydb__count\1"; static const char version_key[] = INTERNAL_KEY_PREFIX "__keydb__version\1"; static const char meta_key[] = INTERNAL_KEY_PREFIX "__keydb__metadata\1"; +static const char last_expire_key[] = INTERNAL_KEY_PREFIX "__keydb__last_expire_time"; class RocksDBStorageFactory; class RocksDBStorageProvider : public IStorage @@ -19,12 +20,13 @@ class RocksDBStorageProvider : public IStorage std::unique_ptr m_spbatch; const rocksdb::Snapshot *m_psnapshot = nullptr; std::shared_ptr m_spcolfamily; + std::shared_ptr m_spexpirecolfamily; rocksdb::ReadOptions m_readOptionsTemplate; size_t m_count = 0; mutable fastlock m_lock {"RocksDBStorageProvider"}; public: - RocksDBStorageProvider(RocksDBStorageFactory *pfactory, std::shared_ptr &spdb, std::shared_ptr &spcolfam, const rocksdb::Snapshot *psnapshot, size_t count); + RocksDBStorageProvider(RocksDBStorageFactory *pfactory, std::shared_ptr &spdb, std::shared_ptr &spcolfam, std::shared_ptr &spexpirecolfam, const rocksdb::Snapshot *psnapshot, size_t count); ~RocksDBStorageProvider(); virtual void insert(const char *key, size_t cchKey, void *data, size_t cb, bool fOverwrite) override; @@ -34,6 +36,11 @@ class RocksDBStorageProvider : public IStorage virtual bool enumerate(callback fn) const override; virtual bool enumerate_hashslot(callback fn, unsigned int hashslot) const override; + virtual std::vector getExpirationCandidates(unsigned int count) override; + virtual std::vector getEvictionCandidates(unsigned int count) override; + virtual void setExpire(const char *key, size_t cchKey, long long expire) override; + virtual void removeExpire(const char *key, size_t cchKey, long long expire) override; + virtual const IStorage *clone() const override; virtual void beginWriteBatch() override; @@ -50,6 +57,7 @@ class RocksDBStorageProvider : public IStorage protected: bool FKeyExists(std::string&) const; + bool FExpireExists(std::string&) const; const rocksdb::ReadOptions &ReadOptions() const { return m_readOptionsTemplate; } rocksdb::WriteOptions WriteOptions() const; diff --git a/src/storage/rocksdbfactor_internal.h b/src/storage/rocksdbfactor_internal.h index dc27f6987..ff545d6ba 100644 --- a/src/storage/rocksdbfactor_internal.h +++ b/src/storage/rocksdbfactor_internal.h @@ -5,6 +5,7 @@ class RocksDBStorageFactory : public IStorageFactory { std::shared_ptr m_spdb; // Note: This must be first so it is deleted last std::vector> m_vecspcols; + std::vector> m_vecspexpirecols; std::shared_ptr m_pfilemanager; std::string m_path; bool m_fCreatedTempFolder = false; diff --git a/src/storage/rocksdbfactory.cpp b/src/storage/rocksdbfactory.cpp index 5c3beeb4b..7087a0136 100644 --- a/src/storage/rocksdbfactory.cpp +++ b/src/storage/rocksdbfactory.cpp @@ -61,8 +61,8 @@ RocksDBStorageFactory::RocksDBStorageFactory(const char *dbfile, int dbnum, cons auto status = rocksdb::DB::ListColumnFamilies(rocksdb::Options(), dbfile, &vecT); // RocksDB requires we know the count of col families before opening, if the user only wants to see less // we still have to make room for all column family handles regardless - if (status.ok() && (int)vecT.size() > dbnum) - dbnum = (int)vecT.size(); + if (status.ok() && (int)vecT.size()/2 > dbnum) + dbnum = (int)vecT.size()/2; std::vector veccoldesc; veccoldesc.push_back(rocksdb::ColumnFamilyDescriptor(rocksdb::kDefaultColumnFamilyName, rocksdb::ColumnFamilyOptions())); // ignore default col family @@ -79,6 +79,7 @@ RocksDBStorageFactory::RocksDBStorageFactory(const char *dbfile, int dbnum, cons rocksdb::ColumnFamilyOptions cf_options(options); cf_options.level_compaction_dynamic_level_bytes = true; veccoldesc.push_back(rocksdb::ColumnFamilyDescriptor(std::to_string(idb), cf_options)); + veccoldesc.push_back(rocksdb::ColumnFamilyDescriptor(std::to_string(idb) + "_expires", cf_options)); } if (rgchConfig != nullptr) @@ -100,23 +101,29 @@ RocksDBStorageFactory::RocksDBStorageFactory(const char *dbfile, int dbnum, cons m_spdb = std::shared_ptr(db); for (auto handle : handles) { - std::string strVersion; - auto status = m_spdb->Get(rocksdb::ReadOptions(), handle, rocksdb::Slice(version_key, sizeof(version_key)), &strVersion); - if (!status.ok()) - { - setVersion(handle); - } - else - { - SymVer ver = parseVersion(strVersion.c_str()); - auto cmp = compareVersion(&ver); - if (cmp == NewerVersion) - throw "Cannot load FLASH database created by newer version of KeyDB"; - if (cmp == OlderVersion) + if (handle->GetName().size() > 7 && !strncmp(handle->GetName().substr(handle->GetName().size() - 7).c_str(), "expires", 7)) { + m_vecspexpirecols.emplace_back(handle); + } else { + std::string strVersion; + auto status = m_spdb->Get(rocksdb::ReadOptions(), handle, rocksdb::Slice(version_key, sizeof(version_key)), &strVersion); + if (!status.ok()) + { setVersion(handle); + } + else + { + SymVer ver = parseVersion(strVersion.c_str()); + auto cmp = compareVersion(&ver); + if (cmp == NewerVersion) + throw "Cannot load FLASH database created by newer version of KeyDB"; + if (cmp == IncompatibleVersion) + throw "Cannot load FLASH database from before 6.3.4"; + if (cmp == OlderVersion) + setVersion(handle); + } + m_vecspcols.emplace_back(handle); } - m_vecspcols.emplace_back(handle); - } + } } RocksDBStorageFactory::~RocksDBStorageFactory() @@ -156,6 +163,7 @@ IStorage *RocksDBStorageFactory::create(int db, key_load_iterator iter, void *pr { ++db; // skip default col family std::shared_ptr spcolfamily(m_vecspcols[db].release()); + std::shared_ptr spexpirecolfamily(m_vecspexpirecols[db].release()); size_t count = 0; bool fUnclean = false; @@ -192,7 +200,7 @@ IStorage *RocksDBStorageFactory::create(int db, key_load_iterator iter, void *pr ++count; } } - return new RocksDBStorageProvider(this, m_spdb, spcolfamily, nullptr, count); + return new RocksDBStorageProvider(this, m_spdb, spcolfamily, spexpirecolfamily, nullptr, count); } const char *RocksDBStorageFactory::name() const diff --git a/src/storage/teststorageprovider.h b/src/storage/teststorageprovider.h index cb8c384f1..f95dbbe6a 100644 --- a/src/storage/teststorageprovider.h +++ b/src/storage/teststorageprovider.h @@ -1,6 +1,7 @@ #include "../IStorage.h" #include #include +#include class TestStorageFactory : public IStorageFactory { @@ -28,6 +29,11 @@ class TestStorageProvider final : public IStorage virtual bool enumerate_hashslot(callback fn, unsigned int hashslot) const override; virtual size_t count() const override; + virtual std::vector getExpirationCandidates(unsigned int) override { return std::vector(); } + virtual std::vector getEvictionCandidates(unsigned int) override { return std::vector(); } + virtual void setExpire(const char *, size_t, long long) override {} + virtual void removeExpire(const char *, size_t, long long) override {} + virtual void flush() override; /* This is permitted to be a shallow clone */ diff --git a/src/version.h b/src/version.h index a747c45a6..26c498813 100644 --- a/src/version.h +++ b/src/version.h @@ -7,6 +7,7 @@ enum VersionCompareResult EqualVersion, OlderVersion, NewerVersion, + IncompatibleVersion, }; struct SymVer diff --git a/tests/unit/cron.tcl b/tests/unit/cron.tcl index fcbe90301..69cb61e1e 100644 --- a/tests/unit/cron.tcl +++ b/tests/unit/cron.tcl @@ -18,7 +18,7 @@ start_server {tags {"CRON"} overrides {hz 100} } { test {keydb.cron repeat works} { r flushall - r keydb.cron testjob repeat 0 600 {redis.call("incr","testkey")} + r keydb.cron testjob repeat 0 900 {redis.call("incr","testkey")} after 1000 assert_equal 2 [r get testkey] } diff --git a/tests/unit/flash.tcl b/tests/unit/flash.tcl index e66248fb2..52154f804 100644 --- a/tests/unit/flash.tcl +++ b/tests/unit/flash.tcl @@ -74,8 +74,11 @@ if {$::flash_enabled} { r set testkey foo ex 1 r flushall cache assert_equal {1} [r dbsize] - after 1500 - assert_equal {0} [r dbsize] + wait_for_condition 50 1000 { + [r dbsize] == 0 + } else { + fail "key is not expired" + } } test { SUBKEY EXPIRE persists after cache flush } { @@ -140,17 +143,17 @@ if {$::flash_enabled} { r set $numkeys xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx incr numkeys if {[s used_memory]+1024 >= $limit} { - break + break } } # Add additional keys to force eviction # should still be under the limit for maxmemory, however all keys set should still exist between flash and memory # check same number of keys exist in addition to values of first and last keys set err 0 - set extra_keys [expr floor([expr ($limit * 0.4) / 1024])] + set extra_keys [expr floor([expr ($limit * 0.4) / 1024])] for {set j 0} {$j < $extra_keys} {incr j} { catch { - r set p2$j xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + r set p2$j xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx } err assert {$err == {OK}} } @@ -165,7 +168,49 @@ if {$::flash_enabled} { assert {[r get last] == {val}} r flushall } - } + test "FLASH - is flash eviction working? (policy $policy)" { + # Get the current memory limit and calculate a new limit. + # Set limit to 100M. + set used [s used_memory] + set limit [expr {$used+60*1024*1024}] + r config set maxmemory $limit + r config set maxmemory-policy $policy + # Now add keys equivalent to 1024b until the limit is almost reached. + set numkeys 0 + r set first val + while 1 { + r set $numkeys xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + incr numkeys + if {[s used_memory]+1024 >= $limit} { + break + } + } + # Add additional keys to force eviction + # should still be under the limit for maxmemory, however all keys set should still exist between flash and memory + # check same number of keys exist in addition to values of first and last keys + set err 0 + set extra_keys [expr floor([expr ($limit * 0.4) / 1024])] + for {set j 0} {$j < $extra_keys} {incr j} { + catch { + r set p2$j xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + } err + assert {$err == {OK}} + } + if {[log_file_matches [srv 0 stdout] "*Failed to evict*"]} { + fail "Server did not evict cleanly (detected full flush)" + } + r set last val + r debug flush-storage + r config set maxstorage 1 + r config set maxmemory 1 + set dbsize [r dbsize] + # after setting maxstorage and memory below used amount we should evict from storage provider + assert {$dbsize < $numkeys+$extra_keys+2} + r config set maxstorage 0 + r config set maxmemory 0 + r flushall + } + } } } diff --git a/tests/unit/moduleapi/load.tcl b/tests/unit/moduleapi/load.tcl index 853b9aebb..12ca22402 100644 --- a/tests/unit/moduleapi/load.tcl +++ b/tests/unit/moduleapi/load.tcl @@ -1,7 +1,7 @@ set testmodule [file normalize tests/modules/load.so] if {$::flash_enabled} { - start_server [list tags [list "modules"] overrides [list storage-provider {flash ./rocks.db.master} databases 256 loadmodule $testmodule]] { + start_server [list tags [list "modules"] overrides [list storage-provider {flash ./rocks.db.master.load.test} databases 256 loadmodule $testmodule]] { test "Module is notified of keys loaded from flash" { r flushall r set foo bar