-
Notifications
You must be signed in to change notification settings - Fork 23
/
Copy pathstorage.proto
520 lines (412 loc) · 22.2 KB
/
storage.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
syntax = "proto2";
package NCloud.NFileStore.NProto;
import "cloud/storage/core/protos/authorization_mode.proto";
import "cloud/storage/core/protos/certificate.proto";
import "cloud/storage/core/protos/config_dispatcher_settings.proto";
option go_package = "github.com/ydb-platform/nbs/cloud/filestore/config";
////////////////////////////////////////////////////////////////////////////////
enum EBlobIndexOpsPriority
{
BIOP_CLEANUP_FIRST = 0;
BIOP_COMPACTION_FIRST = 1;
BIOP_FAIR = 2;
}
////////////////////////////////////////////////////////////////////////////////
//
// !!!IMPORTANT!!!
// Even though StorageConfig is usually stored in textual format, it may be
// overridden for some tablets and in this case it's stored in binary format
// in the tablet's localdb. So binary backward-compatibility should be
// maintained.
//
////////////////////////////////////////////////////////////////////////////////
message TStorageConfig
{
// Schemeshard directory for tablets.
optional string SchemeShardDir = 1;
// Number of reties before pipe client reports failure.
optional uint32 PipeClientRetryCount = 2;
// Minimum timeout before pipe client reconnect attempt.
optional uint32 PipeClientMinRetryTime = 3;
// Maximum timeout before pipe client reconnect attempt.
optional uint32 PipeClientMaxRetryTime = 4;
// Timeout before automatically removing session.
optional uint32 IdleSessionTimeout = 5;
// Timeout for establishing session.
optional uint32 EstablishSessionTimeout = 6;
// Write batching timeout.
optional bool WriteBatchEnabled = 7;
// Write batching timeout.
optional uint32 WriteBatchTimeout = 8;
// Minimum write request size (in bytes) that lets us write the data
// directly to blobstorage (as a mixed/merged blob).
optional uint32 WriteBlobThreshold = 9;
// The size of data (in bytes) in the fresh blocks table that triggers
// flushing.
optional uint32 FlushThreshold = 10;
// Thresholds for background ops.
optional uint32 CleanupThreshold = 11;
optional uint32 CompactionThreshold = 12;
optional uint32 CollectGarbageThreshold = 13;
// The size of data (in bytes) in the fresh bytes table that triggers
// flushing.
optional uint64 FlushBytesThreshold = 14;
// Size of allocation unit for HDD drives (in GiB).
optional uint32 AllocationUnitHDD = 15;
// Size of allocation unit for SSD drives (in GiB).
optional uint32 AllocationUnitSSD = 16;
// Channel configuration for HDD.
optional string HDDSystemChannelPoolKind = 17;
optional string HDDLogChannelPoolKind = 18;
optional string HDDIndexChannelPoolKind = 19;
optional string HDDFreshChannelPoolKind = 20;
optional string HDDMixedChannelPoolKind = 21;
// Channel configuration for SSD.
optional string SSDSystemChannelPoolKind = 22;
optional string SSDLogChannelPoolKind = 23;
optional string SSDIndexChannelPoolKind = 24;
optional string SSDFreshChannelPoolKind = 25;
optional string SSDMixedChannelPoolKind = 26;
// Performance per allocation unit for HDD.
optional uint32 HDDUnitReadBandwidth = 27; // in MiB/s
optional uint32 HDDUnitWriteBandwidth = 28; // in MiB/s
optional uint32 HDDMaxReadBandwidth = 29; // in MiB/s
optional uint32 HDDMaxWriteBandwidth = 30; // in MiB/s
optional uint32 HDDUnitReadIops = 31;
optional uint32 HDDUnitWriteIops = 32;
optional uint32 HDDMaxReadIops = 33;
optional uint32 HDDMaxWriteIops = 34;
// Performance per allocation unit for SSD.
optional uint32 SSDUnitReadBandwidth = 35; // in MiB/s
optional uint32 SSDUnitWriteBandwidth = 36; // in MiB/s
optional uint32 SSDMaxReadBandwidth = 37; // in MiB/s
optional uint32 SSDMaxWriteBandwidth = 38; // in MiB/s
optional uint32 SSDUnitReadIops = 39;
optional uint32 SSDUnitWriteIops = 40;
optional uint32 SSDMaxReadIops = 41;
optional uint32 SSDMaxWriteIops = 42;
// Minumal channels count for the tablet configuration.
optional uint32 MinChannelCount = 43;
// Maximum number of bytes in response.
optional uint32 MaxResponseBytes = 44;
// Channel configuration for Hybrid.
optional string HybridSystemChannelPoolKind = 45;
optional string HybridLogChannelPoolKind = 46;
optional string HybridIndexChannelPoolKind = 47;
optional string HybridFreshChannelPoolKind = 48;
optional string HybridMixedChannelPoolKind = 49;
// Override storage media kind for too slow HDD.
optional uint32 HDDMediaKindOverride = 50;
// Nodes limit & ratio for the filesystem.
optional uint32 DefaultNodesLimit = 51;
optional uint32 SizeToNodesRatio = 52;
// Do not allow to start tablets on specific nodes.
optional bool DisableLocalService = 53;
// Number of last request ids kept per session for deduplicate cache.
optional uint32 DupCacheEntryCount = 54;
// Process no more than this number of new/garbage blobs per one
// DeleteGarbage tx.
optional uint32 MaxDeleteGarbageBlobsPerTx = 55;
// Do run CollectGarbage at start.
optional bool EnableCollectGarbageAtStart = 56;
// Enables persistent backup for tablet boot infos.
optional string TabletBootInfoBackupFilePath = 57;
// In fallback mode, all requests to Hive are served from cache.
optional bool HiveProxyFallbackMode = 58;
// Thresholds which enable backpressure.
optional uint32 FlushThresholdForBackpressure = 59;
optional uint32 CleanupThresholdForBackpressure = 60;
optional uint32 CompactionThresholdForBackpressure = 61;
optional uint64 FlushBytesThresholdForBackpressure = 62;
// Threshold for blob size in bytes.
optional uint32 MaxBlobSize = 63;
// Enable file system throttling.
optional bool ThrottlingEnabled = 64;
// Max blocks count for a single truncate tx.
optional uint32 MaxBlocksPerTruncateTx = 65;
optional uint32 MaxTruncateTxInflight = 66;
// Maximum number of entries in response.
optional uint32 MaxResponseEntries = 67;
// Performance profile configuration for SSD.
optional bool SSDThrottlingEnabled = 68;
optional uint32 SSDBoostTime = 69; // in ms
optional uint32 SSDBoostRefillTime = 70; // in ms
optional uint32 SSDUnitBoost = 71;
optional uint32 SSDBurstPercentage = 72;
optional uint32 SSDDefaultPostponedRequestWeight = 73;
optional uint32 SSDMaxPostponedWeight = 74;
optional uint32 SSDMaxWriteCostMultiplier = 75;
optional uint32 SSDMaxPostponedTime = 76; // in ms
optional uint32 SSDMaxPostponedCount = 77;
// Performance profile configuration for HDD.
optional bool HDDThrottlingEnabled = 78;
optional uint32 HDDBoostTime = 79; // in ms
optional uint32 HDDBoostRefillTime = 80; // in ms
optional uint32 HDDUnitBoost = 81;
optional uint32 HDDBurstPercentage = 82;
optional uint32 HDDDefaultPostponedRequestWeight = 83;
optional uint32 HDDMaxPostponedWeight = 84;
optional uint32 HDDMaxWriteCostMultiplier = 85;
optional uint32 HDDMaxPostponedTime = 86; // in ms
optional uint32 HDDMaxPostponedCount = 87;
// Timeout in between forced range compaction attempts.
optional uint32 CompactionRetryTimeout = 88; // in ms
// Min percentage of reassignable channels after which reassign requests
// are sent.
optional uint32 ReassignChannelsPercentageThreshold = 89;
optional uint32 CpuLackThreshold = 90;
optional NCloud.NProto.EAuthorizationMode AuthorizationMode = 91;
// FolderId of this NFS instance. Used for authorization.
optional string FolderId = 92;
// Number of last sessions kept in storage
optional uint32 SessionHistoryEntryCount = 93;
// Tenant hive tablet id different from root to be used by hive proxy.
// Should be configured once and for the lifetime of the cluster.
optional uint64 TenantHiveTabletId = 329;
// Max number of compaction ranges loaded per 1 compaction state load
// iteration.
optional uint32 LoadedCompactionRangesPerTx = 330;
// Enables DescribeData + ReadBlob instead of ReadData for reading.
optional bool TwoStageReadEnabled = 331;
// Enables runtime config update tool.
optional bool ConfigsDispatcherServiceEnabled = 332;
// Max inflight for out of order compaction map load requests.
optional uint32 MaxOutOfOrderCompactionMapLoadRequestsInQueue = 333;
// IndexTabletActor will suicide (and thus reboot) after observing this
// number of backpressure errors. Needed to automatically recover after
// various races that may happen during index tablet startup due to bugs.
optional uint32 MaxBackpressureErrorsBeforeSuicide = 334;
// Params that are passed to filestore-vhost via TCreateSessionResponse via
// TFilestore::Features. They do not have any effect on the tablet itself.
optional uint32 EntryTimeout = 335;
optional uint32 NegativeEntryTimeout = 336;
optional uint32 AttrTimeout = 337;
// Threshold for the number of garbage blocks in a compaction range that
// triggers automatic compaction.
optional uint32 GarbageCompactionThreshold = 338;
// Threshold for average CompactionScore for the whole FS.
optional uint32 CompactionThresholdAverage = 339;
// Threshold for average GarbageCompactionScore for the whole FS.
optional uint32 GarbageCompactionThresholdAverage = 340;
// Enables 3 aforementioned thresholds.
optional bool NewCompactionEnabled = 341;
// Threshold for average CleanupScore for the whole FS.
optional uint32 CleanupThresholdAverage = 342;
// Enables the aforementioned threshold.
optional bool NewCleanupEnabled = 343;
// Enables GenerateBlobIds + WriteBlob + AddData instead of WriteBlob
// for writing.
optional bool ThreeStageWriteEnabled = 344;
// When issuing blob ids, the tablet acquires a collect barrier. In order
// to release it in case of a client disconnect, this timeout is used.
optional uint32 GenerateBlobIdsReleaseCollectBarrierTimeout = 345;
// If ThreeStageWriteEnabled is true, writes that exceed this threshold
// will use the three-stage write path. Similar to WriteBlobThreshold
optional uint32 ThreeStageWriteThreshold = 346;
// ReadAhead cache params. Used upon DescribeData requests.
// If ReadAheadCacheRangeSize is not 0 the requested ranges may be widened
// to this value and the results will be cached to serve future describe
// requests. The requested range is widened if the previous 32 DescribeData
// requests represent a pattern which is similar to sequential. The similar
// part is controlled by the ReadAheadMaxGapPercentage parameter which
// regulates the maximum number of skipped bytes in an otherwise sequential
// pattern.
optional uint32 ReadAheadCacheMaxNodes = 347;
optional uint32 ReadAheadCacheMaxResultsPerNode = 348;
optional uint32 ReadAheadCacheRangeSize = 349;
optional uint32 ReadAheadMaxGapPercentage = 350;
optional uint32 ReadAheadCacheMaxHandlesPerNode = 351;
// If set to true, tablets will use the compaction policy, described in their
// schemas. If set to false, tablets will use the old compaction policy, which
// is None, thus making the table use schema, set for this table beforehand.
// If no schema is set, the table will use the default schema.
//
// Thus, enabling `NewLocalDBCompactionPolicyEnabled` flag will make some
// tables use the new, three-generations compaction policy, for all the tablets,
// that have been restarted after the flag was set to true.
//
// Disabling this flag afterwards is not supposed to affect these tables, as
// they will return to using the None compaction policy, which does not trigger
// `SetCompactionPolicy` alter operation.
optional bool NewLocalDBCompactionPolicyEnabled = 352;
// If greater than 0, the node index cache will be used. It caches
// GetNodeAttrRequests to avoid executing GetNodeAttr tx for hot nodes.
optional uint32 NodeIndexCacheMaxNodes = 353;
// By default, the blocksize, provided by the GetNodeAttr request, is set to
// the filestore blocksize. Due to the fact that every 64 blocks are stored in
// the same range, it may be beneficial to increase the reported blocksize x-fold.
// This numeric value is that x.
optional uint32 PreferredBlockSizeMultiplier = 354;
// Enables request forwarding to shards in the StorageServiceActor.
// Forwarding is done based on the NodeId and Handle high bits.
// Disabling this thing shouldn't really be needed apart from tests.
optional bool MultiTabletForwardingEnabled = 355;
// Controls BlobIndexOps' priority over each other.
optional EBlobIndexOpsPriority BlobIndexOpsPriority = 356;
// Allow to destroy filestore with active sessions
optional bool AllowFileStoreForceDestroy = 357;
// Enables usage of GetNodeAttrBatch requests instead of GetNodeAttr when
// appropriate.
optional bool GetNodeAttrBatchEnabled = 358;
// Max number of items to delete during TrimBytes.
optional uint64 TrimBytesItemCount = 359;
// auth token for node registration via ydb discovery api.
optional string NodeRegistrationToken = 360;
// Node type.
optional string NodeType = 361;
// TLS node registration details.
optional string NodeRegistrationRootCertsFile = 362;
optional NCloud.NProto.TCertificate NodeRegistrationCert = 363;
// Blob compression experiment params.
optional uint32 BlobCompressionRate = 364;
optional string BlobCompressionCodec = 365;
// Enables ThreeStageWrite for unaligned requests.
optional bool UnalignedThreeStageWriteEnabled = 366;
// Number of ranges with zero compaction stats to delete per tx.
optional uint32 MaxZeroCompactionRangesToDeletePerTx = 367;
// Mapping allowing for multiple fs ids to point to the same fs
message TFilestoreAliasEntry
{
optional string Alias = 1;
optional string FsId = 2;
}
message TFilestoreAliases
{
repeated TFilestoreAliasEntry Entries = 1;
}
optional TFilestoreAliases FilestoreAliases = 368;
// Channel free space threshold - used for write request balancing.
optional uint32 ChannelFreeSpaceThreshold = 369;
// Channel min free space - used for write request balancing.
optional uint32 ChannelMinFreeSpace = 370;
// Enables in-memory index cache for all index-related requests
optional bool InMemoryIndexCacheEnabled = 371;
// Capacity of in-memory index cache, in number of entries per each table
optional uint64 InMemoryIndexCacheNodesCapacity = 372;
// Ratio of maximal number of nodes in the fs to the in-memory index cache
// capacity. I. e., if there are max 1000 nodes and ratio is set to 10,
// then there can be max 100 entries in cache. If both capacity and ratio
// are set, the greater value is used.
optional uint64 InMemoryIndexCacheNodesToNodesCapacityRatio = 373;
optional uint64 InMemoryIndexCacheNodeAttrsCapacity = 374;
optional uint64 InMemoryIndexCacheNodesToNodeAttrsCapacityRatio = 375;
optional uint64 InMemoryIndexCacheNodeRefsCapacity = 376;
optional uint64 InMemoryIndexCacheNodesToNodeRefsCapacityRatio = 377;
// Used to send non-network metrics as network ones to HIVE,
// while we use them for load balancing
optional uint32 NonNetworkMetricsBalancingFactor = 378;
// Async processing of destroy handle requests.
optional bool AsyncDestroyHandleEnabled = 379;
// Period of processing create/destroy handle requests.
optional uint32 AsyncHandleOperationPeriod = 380;
// Dynamic node registration params.
optional uint32 NodeRegistrationMaxAttempts = 381;
optional uint32 NodeRegistrationTimeout = 382; // in ms
optional uint32 NodeRegistrationErrorTimeout = 383; // in ms
// Max block count per file.
// uint32 is chosen deliberately - using values that exceed 2^32 - 1 or even
// 2^31 (which is 8TiB for a 4KiB block) should be thoroughly tested anyway
optional uint32 MaxFileBlocks = 384;
// Enables the usage of large deletion markers (needed for efficient
// truncate ops on large files).
optional bool LargeDeletionMarkersEnabled = 385;
// Sets max block count per single large deletion marker.
optional uint64 LargeDeletionMarkerBlocks = 386;
// Truncate and allocate ops that exceed this threshold will lead to large
// deletion marker generation.
optional uint64 LargeDeletionMarkersThreshold = 387;
// If the number of blocks marked for deletion via large deletion markers
// exceeds this threshold, Cleanup will be triggered.
optional uint64 LargeDeletionMarkersCleanupThreshold = 388;
// Throttle DescribeData and GenerateBlobIds requests
optional bool MultipleStageRequestThrottlingEnabled = 389;
// After reaching this percentage of Compaction/Cleanup backpressure
// thresholds BlobIndexOps scheduling falls back to BIOP_FAIR.
optional uint32 BackpressurePercentageForFairBlobIndexOpsPriority = 390;
// If enabled, GarbageCompactionThresholdAverage will be compared to the
// difference between MixedBlocks and UsedBlocks instead of the difference
// between alive blocks and UsedBlocks. This actually should be the default
// behaviour but is implemented via this flag in order not to cause
// uncontrollable behaviour change for production systems. TODO: gradually
// enable this flag everywhere and make this behaviour the new default.
optional bool UseMixedBlocksInsteadOfAliveBlocksInCompaction = 391;
// IndexTabletActor will suicide (and thus reboot) after observing
// consecutive backpressure errors for this period of time. Needed to
// automatically recover after various races that may happen during index
// tablet startup due to bugs.
optional uint32 MaxBackpressurePeriodBeforeSuicide = 392; // in ms
// settings for ydb config dispatcher service.
optional NCloud.NProto.TConfigDispatcherSettings ConfigDispatcherSettings = 393;
// If the number of blocks marked for deletion via large deletion markers
// exceeds this threshold, large truncate-like operations will be rejected.
optional uint64 LargeDeletionMarkersThresholdForBackpressure = 394;
// Enables persistent backup for tablet path descriptions.
optional string PathDescriptionBackupFilePath = 395;
// Server will reject all requests to destroy filestores in the provided
// list
repeated string DestroyFilestoreDenyList = 396;
// In fallback mode, all requests to SchemeShard are served from backup.
optional bool SSProxyFallbackMode = 397;
// Enables index cache loading on tablet startup. Enables ListNodes
// responses from cache.
optional bool InMemoryIndexCacheLoadOnTabletStart = 398;
optional uint64 InMemoryIndexCacheLoadOnTabletStartRowsPerTx = 399;
// Blob is divided into chunks of a certain size (in bytes) and each chunk
// is compressed separately.
optional uint32 BlobCompressionChunkSize = 400;
// If Compaction encounters a range which contains <= garbage than set via
// these fields and whose blobs' average size is >= that set via these
// fields, it just marks this range as 'compacted' and moves on without
// rewriting the data belonging to this range.
optional uint32 CompactRangeGarbagePercentageThreshold = 401;
optional uint32 CompactRangeAverageBlobSizeThreshold = 402;
// Disables ThreeStageWrite for HDD filesystems.
optional bool ThreeStageWriteDisabledForHDD = 403;
// Disables TwoStageRead for HDD filesystems.
optional bool TwoStageReadDisabledForHDD = 404;
// Enables automatic shard creation for new filesystems upon create/resize
// operations. Shard count is calculated based on FS size. If this flag is
// enabled, new filesystems will have this feature enabled and will create
// extra shards upon resize if needed. Existing filesystems are not
// affected. Filesystems created with this flag enabled will maintain the
// autosharding feature even if this flag is disabled in the global storage
// config.
optional bool AutomaticShardCreationEnabled = 405;
// Affects shard count calculation if AutomaticShardCreationEnabled is on.
optional uint64 ShardAllocationUnit = 406;
// Enable Writeback cache on guest (fuse client)
optional bool GuestWritebackCacheEnabled = 407;
// Ignore orphan sessions upon filestore sessions check during destruction
// attempts.
optional bool AllowFileStoreDestroyWithOrphanSessions = 408;
// Automatically created shards will be of this size.
optional uint64 AutomaticallyCreatedShardSize = 409;
// Forbids session creation (forces E_REJECTED) if filesystem shard count
// calculated based on the filesystem's size is too small.
// We can't enable this logic by default yet since we need to maintain
// backward compatibility during the update which enables automatic
// sharding.
optional bool EnforceCorrectFileSystemShardCountUponSessionCreation = 410;
// Enables ShardId selection in Leader for CreateNode and CreateHandle
// requests. ShardIds selected in Leader have priority over ShardIds
// selected at the StorageServiceActor level.
optional bool ShardIdSelectionInLeaderEnabled = 411;
// Shard balancer will prefer shards which have at least this amount of
// free space in bytes.
optional uint64 ShardBalancerDesiredFreeSpaceReserve = 412;
// Shard balancer will not select shards which have less than this amount of
// free space in bytes.
optional uint64 ShardBalancerMinFreeSpaceReserve = 413;
// Enables directory creation in shards (by default directories are created
// only in the main tablet).
optional bool DirectoryCreationInShardsEnabled = 414;
// Mixed blocks map are stored in memory only for actively used ranges.
// Additionally MixedBlocksOffloadedRangesCapacity ranges that are not
// actively used are also stored in memory. Their memory footprint is
// proportional to the aforementioned value multiplied my the size of
// TRange
optional uint64 MixedBlocksOffloadedRangesCapacity = 415;
optional bool YdbViewerServiceEnabled = 416;
// During the InMemoryIndexCache load, the time between the batches loading
optional uint32 InMemoryIndexCacheLoadSchedulePeriod = 417; // in ms
}