feat(rate_limit): implement user rate limiting in tabby-webserver (#3484

) * feat(rate_limit): implement user rate limiting in tabby-webserver - Added `rate_limit` module to `ee/tabby-webserver/src/lib.rs`. - Updated `crates/http-api-bindings/Cargo.toml` and `Cargo.toml` to include `ratelimit` dependency. - Added `rate_limit.rs` to `ee/tabby-webserver/src/` with implementation for user rate limiting. - Configured rate limiters to allow 200 requests per minute per user. * update * update * add unit test * [autofix.ci] apply automated fixes * Update ee/tabby-webserver/src/rate_limit.rs * [autofix.ci] apply automated fixes * Update ee/tabby-webserver/src/rate_limit.rs Co-authored-by: Wei Zhang <[email protected]> * Update ee/tabby-webserver/src/rate_limit.rs Co-authored-by: Wei Zhang <[email protected]> --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Wei Zhang <[email protected]>
TabbyML · Nov 29, 2024 · f66e503 · f66e503
1 parent 644c6ad
commit f66e503
Show file tree

Hide file tree

Showing 7 changed files with 81 additions and 1 deletion.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -70,6 +70,7 @@ logkit = "0.3"
 async-openai = "0.20"
 tracing-test = "0.2"
 clap = "4.3.0"
+ratelimit = "0.10"
 
 [workspace.dependencies.uuid]
 version = "1.3.3"

diff --git a/crates/http-api-bindings/Cargo.toml b/crates/http-api-bindings/Cargo.toml
@@ -18,7 +18,7 @@ tabby-common = { path = "../tabby-common" }
 tabby-inference = { path = "../tabby-inference" }
 ollama-api-bindings = { path = "../ollama-api-bindings" }
 async-openai.workspace = true
-ratelimit = "0.10"
+ratelimit.workspace = true
 tokio.workspace = true
 tracing.workspace = true
 

diff --git a/ee/tabby-webserver/Cargo.toml b/ee/tabby-webserver/Cargo.toml
@@ -54,6 +54,8 @@ cron = "0.12.1"
 async-stream.workspace = true
 logkit.workspace = true
 async-openai.workspace = true
+ratelimit.workspace = true
+cached.workspace = true
 
 [dev-dependencies]
 assert_matches.workspace = true

diff --git a/ee/tabby-webserver/src/lib.rs b/ee/tabby-webserver/src/lib.rs
@@ -4,6 +4,7 @@ mod hub;
 mod jwt;
 mod oauth;
 mod path;
+mod rate_limit;
 mod routes;
 mod service;
 mod webserver;

diff --git a/ee/tabby-webserver/src/rate_limit.rs b/ee/tabby-webserver/src/rate_limit.rs
@@ -0,0 +1,61 @@
+use std::time::Duration;
+
+use cached::{Cached, TimedCache};
+use tokio::sync::Mutex;
+
+pub struct UserRateLimiter {
+    /// Mapping from user ID to rate limiter.
+    rate_limiters: Mutex<TimedCache<String, ratelimit::Ratelimiter>>,
+}
+
+static USER_REQUEST_LIMIT_PER_MINUTE: u64 = 30;
+
+impl Default for UserRateLimiter {
+    fn default() -> Self {
+        Self {
+            // User rate limiter is hardcoded to 30 requests per minute, thus the timespan is 60 seconds.
+            rate_limiters: Mutex::new(TimedCache::with_lifespan(60)),
+        }
+    }
+}
+
+impl UserRateLimiter {
+    pub async fn is_allowed(&self, user_id: &str) -> bool {
+        let mut rate_limiters = self.rate_limiters.lock().await;
+        let rate_limiter = rate_limiters.cache_get_or_set_with(user_id.to_string(), || {
+            // Create a new rate limiter for this user.
+            ratelimit::Ratelimiter::builder(USER_REQUEST_LIMIT_PER_MINUTE, Duration::from_secs(60))
+                .max_tokens(USER_REQUEST_LIMIT_PER_MINUTE * 2)
+                .initial_available(USER_REQUEST_LIMIT_PER_MINUTE)
+                .build()
+                .expect("Failed to create rate limiter")
+        });
+        if let Err(_sleep) = rate_limiter.try_wait() {
+            // If the rate limiter is full, we return false.
+            false
+        } else {
+            // If the rate limiter is not full, we return true.
+            true
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+
+    use super::*;
+
+    #[tokio::test]
+    async fn test_user_rate_limiter() {
+        let user_id = "test_user";
+        let rate_limiter = UserRateLimiter::default();
+
+        // Test that the first `USER_REQUEST_LIMIT_PER_MINUTE` requests are allowed
+        for _ in 0..USER_REQUEST_LIMIT_PER_MINUTE {
+            assert!(rate_limiter.is_allowed(user_id).await);
+        }
+
+        // Test that the 201st request is not allowed
+        assert!(!rate_limiter.is_allowed(user_id).await);
+    }
+}
diff --git a/ee/tabby-webserver/src/service/mod.rs b/ee/tabby-webserver/src/service/mod.rs
@@ -61,6 +61,7 @@ use tabby_schema::{
 use self::{
     analytic::new_analytic_service, email::new_email_service, license::new_license_service,
 };
+use crate::rate_limit::UserRateLimiter;
 struct ServerContext {
     db_conn: DbConn,
     mail: Arc<dyn EmailService>,
@@ -83,6 +84,8 @@ struct ServerContext {
     code: Arc<dyn CodeSearch>,
 
     setting: Arc<dyn SettingService>,
+
+    user_rate_limiter: UserRateLimiter,
 }
 
 impl ServerContext {
@@ -153,6 +156,7 @@ impl ServerContext {
             user_group,
             access_policy,
             db_conn,
+            user_rate_limiter: UserRateLimiter::default(),
         }
     }
 
@@ -223,6 +227,15 @@ impl WorkerService for ServerContext {
         }
 
         if let Some(user) = user {
+            // Apply rate limiting when `user` is not none.
+            if !self.user_rate_limiter.is_allowed(&user).await {
+                return axum::response::Response::builder()
+                    .status(StatusCode::TOO_MANY_REQUESTS)
+                    .body(Body::empty())
+                    .unwrap()
+                    .into_response();
+            }
+
             request.headers_mut().append(
                 HeaderName::from_static(USER_HEADER_FIELD_NAME),
                 HeaderValue::from_str(&user).expect("User must be valid header"),