diff --git a/docs/en/banyandb/ttl.md b/docs/en/banyandb/ttl.md new file mode 100644 index 000000000000..3a52696bf85e --- /dev/null +++ b/docs/en/banyandb/ttl.md @@ -0,0 +1,50 @@ +# Native TTL + +BanyanDB employs a Time-To-Live (TTL) mechanism to automatically delete data older than the specified duration. When using BanyanDB as the storage backend, the `recordDataTTL` and `metricsDataTTL` configurations are deprecated. Instead, TTL settings should be configured directly within `storage.banyandb`. + +For detailed information, please refer to the [Storage BanyanDB](storages/banyandb.md) documentation. + +## Segment Interval and TTL + +BanyanDB's data rotation mechanism manages data storage based on **Segment Interval** and **TTL** settings: + +- **Segment Interval (`SIDays`)**: Specifies the time interval in days for creating a new data segment. Segments are time-based, facilitating efficient data retention and querying. +- **TTL (`TTLDays`)**: Defines the time-to-live for data within a group, in days. Data that exceeds the TTL will be automatically deleted. + +### Best Practices for Setting `SIDays` and `TTLDays` + +- **Data Retention Requirements**: Set the TTL based on how long you need to retain your data. For instance, to retain data for 30 days, set the TTL to 30 days. +- **Segment Management**: Avoid generating too many segments, as this increases the overhead for data management and querying. +- **Query Requirements**: Align segment intervals with your query patterns. For example: + - If you frequently query data for the last 30 minutes, set `SIDays` to 1 day. + - For querying data from the last 7 days, set `SIDays` to 7 days. + +## Configuration Guidelines + +### Record Data + +For both standard and super datasets: + +- **Recommended `SIDays`**: `1` + - Most queries are performed within a day. +- **`TTLDays`**: Set according to your data retention needs. + +### Metrics Data + +Configure `SIDays` and `TTLDays` based on data retention and query requirements. Recommended settings include: + +| Group | `SIDays` | `TTLDays` | +|----------------|----------|-----------| +| Minute (`gmMinute`) | 1 | 7 | +| Hour (`gmHour`) | 5 | 15 | +| Day (`gmDay`) | 15 | 15 | +| Index (`gmIndex`) | 15 | 15 | + +**Group Descriptions:** + +- **Minute (`gmMinute`)**: Stores metrics with a 1-minute granularity. Suitable for recent data queries requiring minute-level detail. Consequently, it has shorter `SIDays` and `TTLDays` compared to other groups. +- **Hour (`gmHour`)**: Stores metrics with a 1-hour granularity. Designed for queries that need hour-level detail over a longer period than minute-level data. +- **Day (`gmDay`)**: Stores metrics with a 1-day granularity. This group handles the longest segment intervals and TTLs among all granularity groups. +- **Index (`gmIndex`)**: Stores metrics used solely for indexing without value columns. Since queries often scan all segments in the `index` group, it shares the same `SIDays` and `TTLDays` as the `day` group to optimize performance. This group's `TTL` must be set to the **max** value of all groups. + +For more details on configuring `segmentIntervalDays` and `ttlDays`, refer to the [BanyanDB Rotation](https://skywalking.apache.org/docs/skywalking-banyandb/latest/concept/rotation/) documentation. diff --git a/docs/en/changes/changes.md b/docs/en/changes/changes.md index 90fda3fb08a9..1116c62a4850 100644 --- a/docs/en/changes/changes.md +++ b/docs/en/changes/changes.md @@ -40,5 +40,6 @@ #### Documentation * Update release document to adopt newly added revision-based process. +* Improve BanyanDB documentation. All issues and pull requests are [here](https://github.com/apache/skywalking/milestone/224?closed=1) diff --git a/docs/en/setup/backend/storages/banyandb.md b/docs/en/setup/backend/storages/banyandb.md index 601ed1565fa9..3253b17eb72a 100644 --- a/docs/en/setup/backend/storages/banyandb.md +++ b/docs/en/setup/backend/storages/banyandb.md @@ -14,42 +14,31 @@ storage: # If BanyanDB is deployed as a standalone server, the target should be the IP address or domain name and port of the BanyanDB server. # If BanyanDB is deployed in a cluster, the targets should be the IP address or domain name and port of the `liaison` nodes, separated by commas. targets: ${SW_STORAGE_BANYANDB_TARGETS:127.0.0.1:17912} - # The maximum number of records in a bulk write request. # A larger value can improve write performance but also increases OAP and BanyanDB Server memory usage. maxBulkSize: ${SW_STORAGE_BANYANDB_MAX_BULK_SIZE:10000} - # The minimum seconds between two bulk flushes. # If the data in a bulk is less than maxBulkSize, the data will be flushed after this period. # If the data in a bulk exceeds maxBulkSize, the data will be flushed immediately. # A larger value can reduce write pressure on BanyanDB Server but increase data latency. flushInterval: ${SW_STORAGE_BANYANDB_FLUSH_INTERVAL:15} - # The timeout in seconds for a bulk flush. flushTimeout: ${SW_STORAGE_BANYANDB_FLUSH_TIMEOUT:10} - # The number of threads that write data to BanyanDB concurrently. # A higher value can improve write performance but also increases CPU usage on both OAP and BanyanDB Server. concurrentWriteThreads: ${SW_STORAGE_BANYANDB_CONCURRENT_WRITE_THREADS:15} - # The maximum size of the dataset when the OAP loads cache, such as network aliases. resultWindowMaxSize: ${SW_STORAGE_BANYANDB_QUERY_MAX_WINDOW_SIZE:10000} - # The maximum size of metadata per query. metadataQueryMaxSize: ${SW_STORAGE_BANYANDB_QUERY_MAX_SIZE:10000} - # The maximum number of trace segments per query. segmentQueryMaxSize: ${SW_STORAGE_BANYANDB_QUERY_SEGMENT_SIZE:200} - # The maximum number of profile task queries in a request. profileTaskQueryMaxSize: ${SW_STORAGE_BANYANDB_QUERY_PROFILE_TASK_SIZE:200} - # The batch size for querying profile data. profileDataQueryBatchSize: ${SW_STORAGE_BANYANDB_QUERY_PROFILE_DATA_BATCH_SIZE:100} - # If the BanyanDB server is configured with TLS, configure the TLS cert file path and enable TLS connection. sslTrustCAPath: ${SW_STORAGE_BANYANDB_SSL_TRUST_CA_PATH:""} - # The group settings of record. # `gr` is the short name of the group settings of record. # @@ -63,7 +52,6 @@ storage: grSuperShardNum: ${SW_STORAGE_BANYANDB_GR_SUPER_SHARD_NUM:2} grSuperSIDays: ${SW_STORAGE_BANYANDB_GR_SUPER_SI_DAYS:1} grSuperTTLDays: ${SW_STORAGE_BANYANDB_GR_SUPER_TTL_DAYS:3} - # The group settings of metrics. # `gm` is the short name of the group settings of metrics. # @@ -75,18 +63,18 @@ storage: gmMinuteSIDays: ${SW_STORAGE_BANYANDB_GM_MINUTE_SI_DAYS:1} gmMinuteTTLDays: ${SW_STORAGE_BANYANDB_GM_MINUTE_TTL_DAYS:7} gmHourShardNum: ${SW_STORAGE_BANYANDB_GM_HOUR_SHARD_NUM:1} - gmHourSIDays: ${SW_STORAGE_BANYANDB_GM_HOUR_SI_DAYS:1} + gmHourSIDays: ${SW_STORAGE_BANYANDB_GM_HOUR_SI_DAYS:5} gmHourTTLDays: ${SW_STORAGE_BANYANDB_GM_HOUR_TTL_DAYS:15} gmDayShardNum: ${SW_STORAGE_BANYANDB_GM_DAY_SHARD_NUM:1} - gmDaySIDays: ${SW_STORAGE_BANYANDB_GM_DAY_SI_DAYS:1} - gmDayTTLDays: ${SW_STORAGE_BANYANDB_GM_DAY_TTL_DAYS:30} + gmDaySIDays: ${SW_STORAGE_BANYANDB_GM_DAY_SI_DAYS:15} + gmDayTTLDays: ${SW_STORAGE_BANYANDB_GM_DAY_TTL_DAYS:15} # If the metrics is marked as "index_mode", the metrics will be stored in the "index" group. # The "index" group is designed to store metrics that are used for indexing without value columns. # Such as `service_traffic`, `network_address_alias`, etc. # "index_mode" requires BanyanDB *0.8.0* or later. - gmIndexShardNum: ${SW_STORAGE_BANYANDB_GM_INDEX_SHARD_NUM:1} - gmIndexSIDays: ${SW_STORAGE_BANYANDB_GM_INDEX_SI_DAYS:1} - gmIndexTTLDays: ${SW_STORAGE_BANYANDB_GM_INDEX_TTL_DAYS:30} + gmIndexShardNum: ${SW_STORAGE_BANYANDB_GM_INDEX_SHARD_NUM:2} + gmIndexSIDays: ${SW_STORAGE_BANYANDB_GM_INDEX_SI_DAYS:15} + gmIndexTTLDays: ${SW_STORAGE_BANYANDB_GM_INDEX_TTL_DAYS:15} ``` @@ -110,6 +98,20 @@ BanyanDB supports **group settings** to configure storage groups, shards, segmen - `SIDays`: Interval in days for creating a new segment. Segments are time-based, allowing efficient data retention and querying. `SI` stands for Segment Interval. - `TTLDays`: Time-to-live for the data in the group, in days. Data exceeding the TTL will be deleted. -For more details on setting `segmentIntervalDays` and `ttlDays`, refer to the [BanyanDB Rotation](https://skywalking.apache.org/docs/skywalking-banyandb/latest/concept/rotation/) documentation. +For more details on setting `segmentIntervalDays` and `ttlDays`, refer to the [BanyanDB TTL](../../../banyandb/ttl.md) documentation. + +#### Record Group Settings + +The `gr` prefix is used for record group settings. The `normal` and `super` sections are used to define settings for normal and super datasets, respectively. + +Super datasets are used to store trace or log data that is too large for normal datasets. Each super dataset is stored in a separate group in BanyanDB. The settings defined in the `super` section are applied to all super datasets. + +Normal datasets are stored in a single group named `normal`. The settings defined in the `normal` section are applied to all normal datasets. + +#### Metrics Group Settings + +The `gm` prefix is used for metrics group settings. The `minute`, `hour`, and `day` sections are used to define settings for metrics stored based on granularity. + +The `index` group is designed to store metrics used for indexing without value columns. For example, `service_traffic`, `network_address_alias`, etc. For more details, refer to the documentation of [BanyanDB](https://skywalking.apache.org/docs/skywalking-banyandb/latest/readme/) and the [BanyanDB Java Client](https://github.com/apache/skywalking-banyandb-java-client) subprojects. diff --git a/docs/en/setup/backend/ttl.md b/docs/en/setup/backend/ttl.md index 1fc599f4b048..c0c7a9b19782 100644 --- a/docs/en/setup/backend/ttl.md +++ b/docs/en/setup/backend/ttl.md @@ -10,3 +10,8 @@ These are the settings for the different types: metricsDataTTL: ${SW_CORE_METRICS_DATA_TTL:7} # Unit is day ``` +## BanyanDB TTL + +BanyanDB has a TTL mechanism to automatically delete data that is older than the specified time. When you use BanyanDB as the storage backend, `recordDataTTL` and `metricsDataTTL` are not used. Instead, you should configure the TTL settings in `storage.banyandb`. + +Please refer to the [Storage BanyanDB](storages/banyandb.md) document for more information. diff --git a/docs/menu.yml b/docs/menu.yml index 2b2fea4d74ba..3e17fa1b8a93 100644 --- a/docs/menu.yml +++ b/docs/menu.yml @@ -198,6 +198,10 @@ catalog: path: "/en/setup/backend/backend-telemetry" - name: "OAP Health Check" path: "/en/setup/backend/backend-health-check" + - name: "BanyanDB Exclusive Setup" + catalog: + - name: "Native TTL" + path: "/en/banyandb/ttl" - name: "Tracing" catalog: - name: "Trace Sampling" diff --git a/oap-server/server-starter/src/main/resources/application.yml b/oap-server/server-starter/src/main/resources/application.yml index 09a2ef75f2d0..038477fc3375 100644 --- a/oap-server/server-starter/src/main/resources/application.yml +++ b/oap-server/server-starter/src/main/resources/application.yml @@ -223,42 +223,31 @@ storage: # If BanyanDB is deployed as a standalone server, the target should be the IP address or domain name and port of the BanyanDB server. # If BanyanDB is deployed in a cluster, the targets should be the IP address or domain name and port of the `liaison` nodes, separated by commas. targets: ${SW_STORAGE_BANYANDB_TARGETS:127.0.0.1:17912} - # The maximum number of records in a bulk write request. # A larger value can improve write performance but also increases OAP and BanyanDB Server memory usage. maxBulkSize: ${SW_STORAGE_BANYANDB_MAX_BULK_SIZE:10000} - # The minimum seconds between two bulk flushes. # If the data in a bulk is less than maxBulkSize, the data will be flushed after this period. # If the data in a bulk exceeds maxBulkSize, the data will be flushed immediately. # A larger value can reduce write pressure on BanyanDB Server but increase data latency. flushInterval: ${SW_STORAGE_BANYANDB_FLUSH_INTERVAL:15} - # The timeout in seconds for a bulk flush. flushTimeout: ${SW_STORAGE_BANYANDB_FLUSH_TIMEOUT:10} - # The number of threads that write data to BanyanDB concurrently. # A higher value can improve write performance but also increases CPU usage on both OAP and BanyanDB Server. concurrentWriteThreads: ${SW_STORAGE_BANYANDB_CONCURRENT_WRITE_THREADS:15} - # The maximum size of the dataset when the OAP loads cache, such as network aliases. resultWindowMaxSize: ${SW_STORAGE_BANYANDB_QUERY_MAX_WINDOW_SIZE:10000} - # The maximum size of metadata per query. metadataQueryMaxSize: ${SW_STORAGE_BANYANDB_QUERY_MAX_SIZE:10000} - # The maximum number of trace segments per query. segmentQueryMaxSize: ${SW_STORAGE_BANYANDB_QUERY_SEGMENT_SIZE:200} - # The maximum number of profile task queries in a request. profileTaskQueryMaxSize: ${SW_STORAGE_BANYANDB_QUERY_PROFILE_TASK_SIZE:200} - # The batch size for querying profile data. profileDataQueryBatchSize: ${SW_STORAGE_BANYANDB_QUERY_PROFILE_DATA_BATCH_SIZE:100} - # If the BanyanDB server is configured with TLS, configure the TLS cert file path and enable TLS connection. sslTrustCAPath: ${SW_STORAGE_BANYANDB_SSL_TRUST_CA_PATH:""} - # The group settings of record. # `gr` is the short name of the group settings of record. # @@ -272,7 +261,6 @@ storage: grSuperShardNum: ${SW_STORAGE_BANYANDB_GR_SUPER_SHARD_NUM:2} grSuperSIDays: ${SW_STORAGE_BANYANDB_GR_SUPER_SI_DAYS:1} grSuperTTLDays: ${SW_STORAGE_BANYANDB_GR_SUPER_TTL_DAYS:3} - # The group settings of metrics. # `gm` is the short name of the group settings of metrics. # @@ -281,21 +269,21 @@ storage: # Non-"minute" are governed by the "core.downsampling" setting. # For example, if "core.downsampling" is set to "hour", the "hour" will be used, while "day" are ignored. gmMinuteShardNum: ${SW_STORAGE_BANYANDB_GM_MINUTE_SHARD_NUM:2} - gmMinuteSIDays: ${SW_STORAGE_BANYANDB_GM_MINUTE_SI_DAYS:7} + gmMinuteSIDays: ${SW_STORAGE_BANYANDB_GM_MINUTE_SI_DAYS:1} gmMinuteTTLDays: ${SW_STORAGE_BANYANDB_GM_MINUTE_TTL_DAYS:7} gmHourShardNum: ${SW_STORAGE_BANYANDB_GM_HOUR_SHARD_NUM:1} - gmHourSIDays: ${SW_STORAGE_BANYANDB_GM_HOUR_SI_DAYS:1} - gmHourTTLDays: ${SW_STORAGE_BANYANDB_GM_HOUR_TTL_DAYS:7} + gmHourSIDays: ${SW_STORAGE_BANYANDB_GM_HOUR_SI_DAYS:5} + gmHourTTLDays: ${SW_STORAGE_BANYANDB_GM_HOUR_TTL_DAYS:15} gmDayShardNum: ${SW_STORAGE_BANYANDB_GM_DAY_SHARD_NUM:1} - gmDaySIDays: ${SW_STORAGE_BANYANDB_GM_DAY_SI_DAYS:1} - gmDayTTLDays: ${SW_STORAGE_BANYANDB_GM_DAY_TTL_DAYS:30} + gmDaySIDays: ${SW_STORAGE_BANYANDB_GM_DAY_SI_DAYS:15} + gmDayTTLDays: ${SW_STORAGE_BANYANDB_GM_DAY_TTL_DAYS:15} # If the metrics is marked as "index_mode", the metrics will be stored in the "index" group. # The "index" group is designed to store metrics that are used for indexing without value columns. # Such as `service_traffic`, `network_address_alias`, etc. # "index_mode" requires BanyanDB *0.8.0* or later. - gmIndexShardNum: ${SW_STORAGE_BANYANDB_GM_INDEX_SHARD_NUM:1} - gmIndexSIDays: ${SW_STORAGE_BANYANDB_GM_INDEX_SI_DAYS:1} - gmIndexTTLDays: ${SW_STORAGE_BANYANDB_GM_INDEX_TTL_DAYS:30} + gmIndexShardNum: ${SW_STORAGE_BANYANDB_GM_INDEX_SHARD_NUM:2} + gmIndexSIDays: ${SW_STORAGE_BANYANDB_GM_INDEX_SI_DAYS:15} + gmIndexTTLDays: ${SW_STORAGE_BANYANDB_GM_INDEX_TTL_DAYS:15} agent-analyzer: selector: ${SW_AGENT_ANALYZER:default} diff --git a/oap-server/server-storage-plugin/storage-banyandb-plugin/src/main/java/org/apache/skywalking/oap/server/storage/plugin/banyandb/BanyanDBStorageProvider.java b/oap-server/server-storage-plugin/storage-banyandb-plugin/src/main/java/org/apache/skywalking/oap/server/storage/plugin/banyandb/BanyanDBStorageProvider.java index 807ac5c6166e..58e6e314b1db 100644 --- a/oap-server/server-storage-plugin/storage-banyandb-plugin/src/main/java/org/apache/skywalking/oap/server/storage/plugin/banyandb/BanyanDBStorageProvider.java +++ b/oap-server/server-storage-plugin/storage-banyandb-plugin/src/main/java/org/apache/skywalking/oap/server/storage/plugin/banyandb/BanyanDBStorageProvider.java @@ -121,6 +121,15 @@ public void onInitialized(final BanyanDBStorageConfig initialized) { @Override public void prepare() throws ServiceNotProvidedException, ModuleStartException { + if (config.getGmDayTTLDays() > config.getGmIndexTTLDays()) { + throw new ModuleStartException("gmDayTTLDays must be less than or equal to gmIndexTTLDays"); + } + if (config.getGmHourTTLDays() > config.getGmIndexTTLDays()) { + throw new ModuleStartException("gmHourTTLDays must be less than or equal to gmIndexTTLDays"); + } + if (config.getGmMinuteTTLDays() > config.getGmIndexTTLDays()) { + throw new ModuleStartException("gmMinuteTTLDays must be less than or equal to gmIndexTTLDays"); + } this.registerServiceImplementation(StorageBuilderFactory.class, new StorageBuilderFactory.Default()); this.client = new BanyanDBStorageClient(config);