Skip to content

Commit

Permalink
Emitting both a short term average and a long term average of shard
Browse files Browse the repository at this point in the history
throughput.

Scaling up relies on the short term average in order to rapidly
react to a change in throughput, while scaling down and the indexing scheduler relies on the long term average.
  • Loading branch information
fulmicoton committed Jul 8, 2024
1 parent 15d45c7 commit 075be7a
Show file tree
Hide file tree
Showing 8 changed files with 400 additions and 104 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ fn compute_load_per_shard(shard_entries: &[&ShardEntry]) -> NonZeroU32 {
let num_shards = shard_entries.len().max(1) as u64;
let average_throughput_per_shard_bytes: u64 = shard_entries
.iter()
.map(|shard_entry| shard_entry.ingestion_rate.0 as u64 * bytesize::MIB)
.map(|shard_entry| shard_entry.long_term_ingestion_rate.0 as u64 * bytesize::MIB)
.sum::<u64>()
.div_ceil(num_shards)
// A shard throughput cannot exceed PIPELINE_THROUGHPUT in the long term (this is
Expand Down
88 changes: 54 additions & 34 deletions quickwit/quickwit-control-plane/src/ingest/ingest_controller.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ use tokio::task::JoinHandle;
use tracing::{debug, enabled, error, info, warn, Level};
use ulid::Ulid;

use super::scaling_arbiter::ScalingArbiter;
use crate::control_plane::ControlPlane;
use crate::ingest::wait_handle::WaitHandle;
use crate::model::{ControlPlaneModel, ScalingMode, ShardEntry, ShardStats};
Expand Down Expand Up @@ -102,10 +103,7 @@ pub struct IngestController {
// This lock ensures that only one rebalance operation is performed at a time.
rebalance_lock: Arc<Mutex<()>>,
pub stats: IngestControllerStats,
// Threshold in MiB/s below which we decrease the number of shards.
scale_down_shards_threshold_mib_per_sec: f32,
// Threshold in MiB/s above which we increase the number of shards.
scale_up_shards_threshold_mib_per_sec: f32,
scaling_arbiter: ScalingArbiter,
}

impl fmt::Debug for IngestController {
Expand Down Expand Up @@ -192,9 +190,9 @@ impl IngestController {
replication_factor,
rebalance_lock: Arc::new(Mutex::new(())),
stats: IngestControllerStats::default(),
scale_up_shards_threshold_mib_per_sec: max_shard_ingestion_throughput_mib_per_sec * 0.8,
scale_down_shards_threshold_mib_per_sec: max_shard_ingestion_throughput_mib_per_sec
* 0.2,
scaling_arbiter: ScalingArbiter::with_max_shard_ingestion_throughput_mib_per_sec(
max_shard_ingestion_throughput_mib_per_sec,
),
}
}

Expand Down Expand Up @@ -291,20 +289,31 @@ impl IngestController {
&local_shards_update.source_uid,
&local_shards_update.shard_infos,
);
if shard_stats.avg_ingestion_rate >= self.scale_up_shards_threshold_mib_per_sec {
self.try_scale_up_shards(local_shards_update.source_uid, shard_stats, model, progress)
let Some(scaling_mode) = self.scaling_arbiter.should_scale(shard_stats) else {
return Ok(());
};

match scaling_mode {
ScalingMode::Up => {
self.try_scale_up_shards(
local_shards_update.source_uid,
shard_stats,
model,
progress,
)
.await?;
} else if shard_stats.avg_ingestion_rate <= self.scale_down_shards_threshold_mib_per_sec
&& shard_stats.num_open_shards > 1
{
self.try_scale_down_shards(
local_shards_update.source_uid,
shard_stats,
model,
progress,
)
.await?;
}
ScalingMode::Down => {
self.try_scale_down_shards(
local_shards_update.source_uid,
shard_stats,
model,
progress,
)
.await?;
}
}

Ok(())
}

Expand Down Expand Up @@ -1111,8 +1120,8 @@ fn find_scale_down_candidate(
*num_shards += 1;

if shard
.ingestion_rate
.cmp(&candidate.ingestion_rate)
.long_term_ingestion_rate
.cmp(&candidate.long_term_ingestion_rate)
.then_with(|| shard.shard_id.cmp(&candidate.shard_id))
.is_gt()
{
Expand Down Expand Up @@ -2082,13 +2091,14 @@ mod tests {
let shard_entries: Vec<ShardEntry> = model.all_shards().cloned().collect();

assert_eq!(shard_entries.len(), 1);
assert_eq!(shard_entries[0].ingestion_rate, 0);
assert_eq!(shard_entries[0].short_term_ingestion_rate, 0);

// Test update shard ingestion rate but no scale down because num open shards is 1.
let shard_infos = BTreeSet::from_iter([ShardInfo {
shard_id: ShardId::from(1),
shard_state: ShardState::Open,
ingestion_rate: RateMibPerSec(1),
short_term_ingestion_rate: RateMibPerSec(1),
long_term_ingestion_rate: RateMibPerSec(1),
}]);
let local_shards_update = LocalShardsUpdate {
leader_id: "test-ingester".into(),
Expand All @@ -2103,7 +2113,7 @@ mod tests {

let shard_entries: Vec<ShardEntry> = model.all_shards().cloned().collect();
assert_eq!(shard_entries.len(), 1);
assert_eq!(shard_entries[0].ingestion_rate, 1);
assert_eq!(shard_entries[0].short_term_ingestion_rate, 1);

// Test update shard ingestion rate with failing scale down.
let shards = vec![Shard {
Expand Down Expand Up @@ -2155,12 +2165,14 @@ mod tests {
ShardInfo {
shard_id: ShardId::from(1),
shard_state: ShardState::Open,
ingestion_rate: RateMibPerSec(1),
short_term_ingestion_rate: RateMibPerSec(1),
long_term_ingestion_rate: RateMibPerSec(1),
},
ShardInfo {
shard_id: ShardId::from(2),
shard_state: ShardState::Open,
ingestion_rate: RateMibPerSec(1),
short_term_ingestion_rate: RateMibPerSec(1),
long_term_ingestion_rate: RateMibPerSec(1),
},
]);
let local_shards_update = LocalShardsUpdate {
Expand All @@ -2178,12 +2190,14 @@ mod tests {
ShardInfo {
shard_id: ShardId::from(1),
shard_state: ShardState::Open,
ingestion_rate: RateMibPerSec(4),
short_term_ingestion_rate: RateMibPerSec(4),
long_term_ingestion_rate: RateMibPerSec(4),
},
ShardInfo {
shard_id: ShardId::from(2),
shard_state: ShardState::Open,
ingestion_rate: RateMibPerSec(4),
short_term_ingestion_rate: RateMibPerSec(4),
long_term_ingestion_rate: RateMibPerSec(4),
},
]);
let local_shards_update = LocalShardsUpdate {
Expand Down Expand Up @@ -2544,32 +2558,38 @@ mod tests {
ShardInfo {
shard_id: ShardId::from(1),
shard_state: ShardState::Open,
ingestion_rate: quickwit_ingest::RateMibPerSec(1),
short_term_ingestion_rate: quickwit_ingest::RateMibPerSec(1),
long_term_ingestion_rate: quickwit_ingest::RateMibPerSec(1),
},
ShardInfo {
shard_id: ShardId::from(2),
shard_state: ShardState::Open,
ingestion_rate: quickwit_ingest::RateMibPerSec(2),
short_term_ingestion_rate: quickwit_ingest::RateMibPerSec(2),
long_term_ingestion_rate: quickwit_ingest::RateMibPerSec(2),
},
ShardInfo {
shard_id: ShardId::from(3),
shard_state: ShardState::Open,
ingestion_rate: quickwit_ingest::RateMibPerSec(3),
short_term_ingestion_rate: quickwit_ingest::RateMibPerSec(3),
long_term_ingestion_rate: quickwit_ingest::RateMibPerSec(3),
},
ShardInfo {
shard_id: ShardId::from(4),
shard_state: ShardState::Open,
ingestion_rate: quickwit_ingest::RateMibPerSec(4),
short_term_ingestion_rate: quickwit_ingest::RateMibPerSec(4),
long_term_ingestion_rate: quickwit_ingest::RateMibPerSec(4),
},
ShardInfo {
shard_id: ShardId::from(5),
shard_state: ShardState::Open,
ingestion_rate: quickwit_ingest::RateMibPerSec(5),
short_term_ingestion_rate: quickwit_ingest::RateMibPerSec(5),
long_term_ingestion_rate: quickwit_ingest::RateMibPerSec(5),
},
ShardInfo {
shard_id: ShardId::from(6),
shard_state: ShardState::Open,
ingestion_rate: quickwit_ingest::RateMibPerSec(6),
short_term_ingestion_rate: quickwit_ingest::RateMibPerSec(6),
long_term_ingestion_rate: quickwit_ingest::RateMibPerSec(6),
},
]);
model.update_shards(&source_uid, &shard_infos);
Expand Down
1 change: 1 addition & 0 deletions quickwit/quickwit-control-plane/src/ingest/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
// along with this program. If not, see <http://www.gnu.org/licenses/>.

pub(crate) mod ingest_controller;
mod scaling_arbiter;
mod wait_handle;

pub use ingest_controller::IngestController;
Expand Down
119 changes: 119 additions & 0 deletions quickwit/quickwit-control-plane/src/ingest/scaling_arbiter.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
// Copyright (C) 2024 Quickwit, Inc.
//
// Quickwit is offered under the AGPL v3.0 and as commercial software.
// For commercial licensing, contact us at [email protected].
//
// AGPL:
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU Affero General Public License as
// published by the Free Software Foundation, either version 3 of the
// License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Affero General Public License for more details.
//
// You should have received a copy of the GNU Affero General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.

use crate::model::{ScalingMode, ShardStats};

pub(crate) struct ScalingArbiter {
// Threshold in MiB/s below which we decrease the number of shards.
scale_down_shards_threshold_mib_per_sec: f32,
// Threshold in MiB/s above which we increase the number of shards.
scale_up_shards_short_term_threshold_mib_per_sec: f32,
scale_up_shards_long_term_threshold_mib_per_sec: f32,
}

impl ScalingArbiter {
pub fn with_max_shard_ingestion_throughput_mib_per_sec(
max_shard_throughput_mib_per_sec: f32,
) -> ScalingArbiter {
ScalingArbiter {
scale_up_shards_short_term_threshold_mib_per_sec: max_shard_throughput_mib_per_sec
* 0.8f32,
scale_up_shards_long_term_threshold_mib_per_sec: max_shard_throughput_mib_per_sec
* 0.3f32,
scale_down_shards_threshold_mib_per_sec: max_shard_throughput_mib_per_sec * 0.2f32,
}
}

pub(crate) fn should_scale(&self, shard_stats: ShardStats) -> Option<ScalingMode> {
// We scale up based on the short term threshold to scale up more aggressively.
if shard_stats.avg_short_term_ingestion_rate
>= self.scale_up_shards_short_term_threshold_mib_per_sec
{
let long_term_ingestion_rate_after_scale_up = shard_stats.avg_long_term_ingestion_rate
* (shard_stats.num_open_shards as f32)
/ (shard_stats.num_open_shards as f32 + 1.0f32);
if long_term_ingestion_rate_after_scale_up
>= self.scale_up_shards_long_term_threshold_mib_per_sec
{
return Some(ScalingMode::Up);
}
}

// On the other hand, we scale down based on the long term ingestion rate, to avoid
// scaling down just due to a very short drop in ingestion
if shard_stats.avg_long_term_ingestion_rate <= self.scale_down_shards_threshold_mib_per_sec
&& shard_stats.num_open_shards > 1
{
return Some(ScalingMode::Down);
}

None
}
}

#[cfg(test)]
mod tests {
use super::ScalingArbiter;
use crate::model::{ScalingMode, ShardStats};

#[test]
fn test_scaling_arbiter() {
let scaling_arbiter = ScalingArbiter::with_max_shard_ingestion_throughput_mib_per_sec(10.0);
assert_eq!(
scaling_arbiter.should_scale(ShardStats {
num_open_shards: 1,
avg_short_term_ingestion_rate: 5.0,
avg_long_term_ingestion_rate: 6.0,
}),
None
);
assert_eq!(
scaling_arbiter.should_scale(ShardStats {
num_open_shards: 1,
avg_short_term_ingestion_rate: 8.1,
avg_long_term_ingestion_rate: 8.1,
}),
Some(ScalingMode::Up)
);
assert_eq!(
scaling_arbiter.should_scale(ShardStats {
num_open_shards: 2,
avg_short_term_ingestion_rate: 3.0,
avg_long_term_ingestion_rate: 1.5,
}),
Some(ScalingMode::Down)
);
assert_eq!(
scaling_arbiter.should_scale(ShardStats {
num_open_shards: 1,
avg_short_term_ingestion_rate: 3.0,
avg_long_term_ingestion_rate: 1.5,
}),
None,
);
assert_eq!(
scaling_arbiter.should_scale(ShardStats {
num_open_shards: 1,
avg_short_term_ingestion_rate: 8.0f32,
avg_long_term_ingestion_rate: 3.0f32,
}),
None,
);
}
}
Loading

0 comments on commit 075be7a

Please sign in to comment.