diff --git a/README.md b/README.md index 96d8750..4816d5b 100644 --- a/README.md +++ b/README.md @@ -54,11 +54,15 @@ most of the moving parts involved: ## Design Goals * Protect all devices in a network with zero client-side configuration or software -* Dynamically throttle traffic from clients to configurable sites or groups of sites to "train" good habits [1] +* Dynamically throttle traffic from clients to configurable sites or groups of sites to "train" good habits * Use only IP and (sniffed) DNS (i.e. as close to a pure L4 solution as possible) * Introduce no latency on "good" traffic * Usable on minimal hardware like an Orange Pi R1plus or similar +## Current milestones + +Milestone 8: Improved overall installation experience and support +for lower-end 32-bit arm devices such as Orange Pi Zero/R1. ## References & Acknowlegements diff --git a/config/config.go b/config/config.go index 460c2fa..906a653 100644 --- a/config/config.go +++ b/config/config.go @@ -3,6 +3,7 @@ package config import ( "log" "os" + "time" "gopkg.in/yaml.v3" ) @@ -16,6 +17,9 @@ var baseConfig = Config{ DnsDb: "/root/dns.db", PrometheusUrl: "http://localhost:9090", + MapCollectionInterval: time.Second * 30, + PolicyCheckInterval: time.Minute * 1, + // Start degrading from 1mbit down to 10kbits, over 5 bandwidth classes NumQdiscClasses: 5, StartRateKbs: 1000, @@ -23,7 +27,7 @@ var baseConfig = Config{ MinRateKbs: 50, ActivityThresholdBytes: 100, - PolicyBackoffMinutes: 3, + PolicyBackoffInterval: time.Minute * 3, } var Cfg = baseConfig @@ -39,6 +43,11 @@ type Config struct { DnsDb string `yaml:"dns_db"` PrometheusUrl string `yaml:"prometheus_url"` + // Operational parameters for tuning use on SBCs with different + // capabilities + MapCollectionInterval time.Duration `yaml:"map_collection_interval"` + PolicyCheckInterval time.Duration `yaml:"policy_check_interval"` + // Parameters for the htb / netem qdiscs and classes we'll create on the fly // WanMbs represents the max bandwidth of your downstream internet connection // The MinRateKbs and MaxDelayMs represent the maximum bandwidth degradation ; @@ -58,10 +67,10 @@ type Config struct { // the number of bytes considered "active" for the prom rate // queries ActivityThresholdBytes int `yaml:"activity_threshold_bytes"` - // How many minutes before we'll consider a change to policy; + // How long before we'll consider a change to policy; // setting this lower will cause clients to be moved up and // down bandwidth classes faster - PolicyBackoffMinutes int `yaml:"policy_backoff_minutes"` + PolicyBackoffInterval time.Duration `yaml:"policy_backoff_interval"` } func ParseConfig(file string) { diff --git a/config/nethadone.yml b/config/nethadone.yml index ace036d..8ed723e 100644 --- a/config/nethadone.yml +++ b/config/nethadone.yml @@ -7,6 +7,18 @@ cfg_db: "/root/cfg.db" dns_db: "/root/dns.db" prometheus_url: "http://localhost:9090" +### +# Operational parameters + +# How frequently we parse the eBPF-level maps for ip bandwidth usage +# For sites with many clients, and on lower-end hardware such as +# 32-bit orange pi zero/r1, this shouldn't be more frequent than +# 30s until the parsing code can be optimized +map_collection_interval: 30s + +# How frequently we check for policy violations; +policy_check_interval: 1m + # Parameters for the htb # netem qdiscs and classes we'll create on the fly # WanMbs represents the max bandwidth of your downstream internet connection # The MinRateKbs and MaxDelayMs represent the maximum bandwidth degradation ; @@ -30,7 +42,7 @@ min_rate_kbs: 50 # this can be set to a very low number like 0. For more bandwidth heavy # sites, a larger number may make sense. activity_threshold_bytes: 1 -# How many minutes before we'll consider a change to policy; +# How long before we'll consider a change to policy; # setting this lower will cause clients to be moved up and # down bandwidth classes faster -policy_backoff_minutes: 2 \ No newline at end of file +policy_backoff_interval: 2m \ No newline at end of file diff --git a/database/prom.go b/database/prom.go index b4e9d96..112692f 100644 --- a/database/prom.go +++ b/database/prom.go @@ -30,9 +30,10 @@ func GetSrcGlobUsage(rate int, mins int, k int, above bool) model.Vector { tm := time.Now().Add(-1 * dr) result, warnings, err := queryAPI.Query(context.Background(), pql, tm) if err != nil { - log.Println("error trying to query prometheus: ", err, warnings) - } - sample := result.(model.Vector) + log.Println("unable to query prometheus, policy changes will not be possible: ", err, warnings) + return model.Vector{} + } else { - return sample + return result.(model.Vector) + } } diff --git a/handlers/prom.go b/handlers/prom.go index 3eef5ae..ab6df56 100644 --- a/handlers/prom.go +++ b/handlers/prom.go @@ -6,6 +6,7 @@ import ( "time" "github.com/VictoriaMetrics/metrics" + "github.com/atomic77/nethadone/config" ) func InitMetrics() { @@ -15,12 +16,12 @@ func InitMetrics() { } func pollMetrics() { - log.Println("Setting up metrics collector") - for range time.Tick(time.Second * 15) { + log.Println("Setting up metrics collector on ", config.Cfg.MapCollectionInterval, " interval") + for range time.Tick(config.Cfg.MapCollectionInterval) { // TODO Once we get into the 1000s of IP pairs being tracked, the // performance of this method becomes quite slow bl := getBandwidthList(false) - log.Println("Tick happened, collected ", len(bl), " pairs") + log.Println("Collected bandwidth stats from ", len(bl), " ip pairs") for _, b := range bl { s := fmt.Sprintf( `ip_pair_vic_bytes_total{%s="%s", %s="%s", %s="%s", %s="%s"}`, diff --git a/policy/simpleloadavg.go b/policy/simpleloadavg.go index fe4d84c..56912f4 100644 --- a/policy/simpleloadavg.go +++ b/policy/simpleloadavg.go @@ -74,7 +74,7 @@ func increaseThrottling() bool { continue } - backOff := time.Now().Add(time.Minute * time.Duration(config.Cfg.PolicyBackoffMinutes)) + backOff := time.Now().Add(config.Cfg.PolicyBackoffInterval) if p.Tstamp.After(backOff) { log.Println("too soon to update policy for ", src_ip, glob) } else if p.Class > config.Cfg.NumQdiscClasses*10 { @@ -109,7 +109,7 @@ func decreasingThrottling() bool { // After testing set this back to 5 or some more sensible default // from configuration - backOff := time.Now().Add(time.Minute * -1) + backOff := time.Now().Add(config.Cfg.PolicyBackoffInterval) if p.Tstamp.After(backOff) { log.Println("too soon to update policy for ", src_ip, glob) } else if p.Class <= 10 { @@ -126,8 +126,8 @@ func decreasingThrottling() bool { } func pollPolicyCheck() { - log.Println("Setting up metrics collector") - for range time.Tick(time.Minute * 1) { + log.Println("Setting up policy checker on ", config.Cfg.PolicyCheckInterval, " interval ") + for range time.Tick(config.Cfg.PolicyCheckInterval) { log.Println("Checking policy ") changed := increaseThrottling() changed = changed || decreasingThrottling() @@ -153,8 +153,3 @@ func applyPolicies() { handlers.ApplyPolicies(&allPolicies) } -func pollPolicyApply() { - for range time.Tick(time.Minute * 1) { - applyPolicies() - } -}