From 8b7475c6bfd60f72f1c1fa166e81887c3608a961 Mon Sep 17 00:00:00 2001 From: ShuNing Date: Fri, 10 Apr 2020 20:58:43 +0800 Subject: [PATCH] *: picks some hot-region improvements to release 3.1 (#2342) Signed-off-by: nolouch Signed-off-by: lhy1024 --- go.mod | 18 +- go.sum | 65 +- pkg/mock/mockcluster/mockcluster.go | 72 +- server/api/scheduler_test.go | 41 +- server/cluster/cluster.go | 2 +- server/cluster/coordinator.go | 10 + server/schedulers/hot_region.go | 892 +++++++++++++++--------- server/schedulers/hot_region_config.go | 218 ++++++ server/schedulers/hot_test.go | 664 +++++++++++++----- server/schedulers/metrics.go | 9 + server/schedulers/scheduler_test.go | 12 +- server/schedulers/shuffle_hot_region.go | 19 +- server/schedulers/utils.go | 125 +++- server/statistics/avg_over_time.go | 73 +- server/statistics/avg_over_time_test.go | 4 +- server/statistics/hot_peer.go | 4 +- server/statistics/hot_peer_cache.go | 18 +- server/statistics/hot_regions_stat.go | 1 + server/statistics/store.go | 35 +- tests/pdctl/hot/hot_test.go | 10 +- tools/pd-ctl/pdctl/command/scheduler.go | 34 + 21 files changed, 1693 insertions(+), 633 deletions(-) create mode 100644 server/schedulers/hot_region_config.go diff --git a/go.mod b/go.mod index 43c8f8b7722..86e27b67f0a 100644 --- a/go.mod +++ b/go.mod @@ -10,26 +10,20 @@ require ( github.com/coreos/go-semver v0.2.0 github.com/coreos/pkg v0.0.0-20160727233714-3ac0863d7acf github.com/docker/go-units v0.4.0 - github.com/dustin/go-humanize v0.0.0-20180421182945-02af3965c54e // indirect github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385 // indirect github.com/elazarl/go-bindata-assetfs v1.0.0 github.com/ghodss/yaml v1.0.1-0.20190212211648-25d852aebe32 - github.com/go-playground/universal-translator v0.17.0 // indirect github.com/gogo/protobuf v1.3.1 - github.com/golang/groupcache v0.0.0-20181024230925-c65c006176ff // indirect github.com/golang/protobuf v1.3.2 - github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db // indirect + github.com/golang/snappy v0.0.1 // indirect github.com/google/btree v1.0.0 github.com/gorilla/mux v1.7.3 - github.com/gorilla/websocket v1.2.0 // indirect github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 - github.com/json-iterator/go v1.1.9 // indirect github.com/juju/ratelimit v1.0.1 - github.com/leodido/go-urn v1.2.0 // indirect - github.com/mattn/go-isatty v0.0.11 // indirect github.com/mattn/go-shellwords v1.0.3 github.com/montanaflynn/stats v0.0.0-20151014174947-eeaced052adb - github.com/onsi/gomega v1.4.2 // indirect + github.com/onsi/ginkgo v1.12.0 // indirect + github.com/onsi/gomega v1.9.0 // indirect github.com/opentracing/opentracing-go v1.0.2 github.com/phf/go-queue v0.0.0-20170504031614-9abe38d0371d github.com/pingcap-incubator/tidb-dashboard v0.0.0-20200218115603-7ab5f06db73d @@ -44,17 +38,11 @@ require ( github.com/spf13/cobra v0.0.3 github.com/spf13/pflag v1.0.1 github.com/syndtr/goleveldb v0.0.0-20180815032940-ae2bd5eed72d - github.com/tmc/grpc-websocket-proxy v0.0.0-20171017195756-830351dc03c6 // indirect github.com/unrolled/render v0.0.0-20171102162132-65450fb6b2d3 github.com/urfave/negroni v0.3.0 go.etcd.io/etcd v0.5.0-alpha.5.0.20191023171146-3cf2f69b5738 go.uber.org/goleak v0.10.0 go.uber.org/zap v1.13.0 - golang.org/x/net v0.0.0-20200202094626-16171245cfb2 // indirect - golang.org/x/sys v0.0.0-20200113162924-86b910548bc1 // indirect - golang.org/x/tools v0.0.0-20200216192241-b320d3a0f5a2 // indirect google.golang.org/grpc v1.25.1 - gopkg.in/go-playground/validator.v9 v9.31.0 // indirect gopkg.in/natefinch/lumberjack.v2 v2.0.0 - gopkg.in/yaml.v2 v2.2.8 // indirect ) diff --git a/go.sum b/go.sum index 44f8c3cb3b6..a210c1a83d7 100644 --- a/go.sum +++ b/go.sum @@ -44,9 +44,8 @@ github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumC github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ= github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= +github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4 h1:qk/FSDDxo05wdJH28W+p5yivv7LuLYLRXPPD8KQCtZs= github.com/dustin/go-humanize v0.0.0-20171111073723-bb3d318650d4/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= -github.com/dustin/go-humanize v0.0.0-20180421182945-02af3965c54e h1:Fw7ZmgiklsLh5EQWyHh1sumKSCG1+yjEctIpGKib87s= -github.com/dustin/go-humanize v0.0.0-20180421182945-02af3965c54e/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385 h1:clC1lXBpe2kTj2VHdaIu9ajZQe4kcEY9j0NsnDDBZ3o= github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385/go.mod h1:0vRUJqYpeSZifjYj7uP3BG/gKcuzL9xWVV/Y+cK33KM= github.com/elazarl/go-bindata-assetfs v1.0.0 h1:G/bYguwHIzWq9ZoyUQqrjTmJbbYn3j3CKKpKinvZLFk= @@ -93,13 +92,11 @@ github.com/go-openapi/swag v0.17.0/go.mod h1:AByQ+nYG6gQg71GINrmuDXCPWdL640yX49/ github.com/go-openapi/swag v0.19.2/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= github.com/go-openapi/swag v0.19.5 h1:lTz6Ys4CmqqCQmZPBlbQENR1/GucA2bzYTE12Pw4tFY= github.com/go-openapi/swag v0.19.5/go.mod h1:POnQmlKehdgb5mhVOsnJFsivZCEZ/vjK9gh66Z9tfKk= +github.com/go-playground/locales v0.12.1 h1:2FITxuFt/xuCNP1Acdhv62OzaCiviiE4kotfhkmOqEc= github.com/go-playground/locales v0.12.1/go.mod h1:IUMDtCfWo/w/mtMfIE/IG2K+Ey3ygWanZIBtBW0W2TM= -github.com/go-playground/locales v0.13.0 h1:HyWk6mgj5qFqCT5fjGBuRArbVDfE4hi8+e8ceBS/t7Q= -github.com/go-playground/locales v0.13.0/go.mod h1:taPMhCMXrRLJO55olJkUXHZBHCxTMfnGwq/HNwmWNS8= github.com/go-playground/overalls v0.0.0-20180201144345-22ec1a223b7c/go.mod h1:UqxAgEOt89sCiXlrc/ycnx00LVvUO/eS8tMUkWX4R7w= +github.com/go-playground/universal-translator v0.16.0 h1:X++omBR/4cE2MNg91AoC3rmGrCjJ8eAeUP/K/EKx4DM= github.com/go-playground/universal-translator v0.16.0/go.mod h1:1AnU7NaIRDWWzGEKwgtJRd2xk99HeFyHw3yid4rvQIY= -github.com/go-playground/universal-translator v0.17.0 h1:icxd5fm+REJzpZx7ZfpaD876Lmtgy7VtROAbHHXk8no= -github.com/go-playground/universal-translator v0.17.0/go.mod h1:UkSxE5sNxxRwHyU+Scu5vgOQjsIJAF8j9muTVoKLVtA= github.com/go-sql-driver/mysql v1.4.1 h1:g24URVg0OFbNUTx9qqY1IRZ9D9z3iPyi5zKhQZpNwpA= github.com/go-sql-driver/mysql v1.4.1/go.mod h1:zAC/RDZ24gD3HViQzih4MyKcchzm+sOG5ZlKdlhCg5w= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= @@ -111,16 +108,15 @@ github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe h1:lXe2qZdvpiX5WZ github.com/golang-sql/civil v0.0.0-20190719163853-cb61b32ac6fe/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b h1:VKtxabqXZkF25pY9ekfRL6a582T4P37/31XEstQ5p58= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903 h1:LbsanbbD6LieFkXbj9YNNBupiGHJgFeLpO0j0Fza1h8= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20181024230925-c65c006176ff h1:kOkM9whyQYodu09SJ6W3NCsHG7crFaJILQ22Gozp3lg= -github.com/golang/groupcache v0.0.0-20181024230925-c65c006176ff/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db h1:woRePGFeVFfLKN/pOkfl+p/TAqKOfFu+7KPlMVpok/w= -github.com/golang/snappy v0.0.0-20180518054509-2e65f85255db/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.1 h1:Qgr9rKW7uDUkrbSmQeiDsGa8SjGyCOGtuasMWwvp2P4= +github.com/golang/snappy v0.0.1/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v1.0.0 h1:0udJVsspx3VBr5FwtLhQQtuAsVc79tTq0ocGIPAU6qo= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -131,9 +127,8 @@ github.com/google/uuid v1.0.0 h1:b4Gk+7WdP/d3HZH8EJsZpvV7EtDOgaZLtnaNGIu1adA= github.com/google/uuid v1.0.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/mux v1.7.3 h1:gnP5JzjVOuiZD07fKKToCAOjS0yOpj/qPETTXCCS6hw= github.com/gorilla/mux v1.7.3/go.mod h1:1lud6UwP+6orDFRuTfBEV8e9/aOM/c4fVVCaMa2zaAs= +github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c h1:Lh2aW+HnU2Nbe1gqD9SOJLJxW1jBMmQOktN2acDyJk8= github.com/gorilla/websocket v0.0.0-20170926233335-4201258b820c/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= -github.com/gorilla/websocket v1.2.0 h1:VJtLvh6VQym50czpZzx07z/kw9EgAxI3x1ZB8taTMQQ= -github.com/gorilla/websocket v1.2.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4 h1:z53tR0945TRRQO/fLEVPI6SMv7ZflF0TEaTAoU7tOzg= github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de4/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 h1:Ovs26xHkKqVztRpIrF/92BcuyuQ/YW4NSIpoGtfXNho= @@ -155,9 +150,8 @@ github.com/jonboulle/clockwork v0.1.0 h1:VKV+ZcuP6l3yW9doeqz6ziZGgcynBVQO+obU0+0 github.com/jonboulle/clockwork v0.1.0/go.mod h1:Ii8DK3G1RaLaWxj9trq07+26W01tbo22gdxWY5EU2bo= github.com/json-iterator/go v1.1.5/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= +github.com/json-iterator/go v1.1.7 h1:KfgG9LzI+pYjr4xvmz/5H4FXjokeP+rlHLhv3iH62Fo= github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= -github.com/json-iterator/go v1.1.9 h1:9yzud/Ht36ygwatGx56VwCZtlI/2AD15T1X2sjSuGns= -github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/juju/ratelimit v1.0.1 h1:+7AIFJVQ0EQgq/K9+0Krm7m530Du7tIz0METWzN0RgY= github.com/juju/ratelimit v1.0.1/go.mod h1:qapgC/Gy+xNh9UxzV13HGGl/6UXNN+ct+vwSgWNm/qk= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= @@ -173,9 +167,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/pty v1.1.5/go.mod h1:9r2w37qlBe7rQ6e1fg1S/9xpWHSnaqNdHD3WcMdbPDA= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/leodido/go-urn v1.1.0 h1:Sm1gr51B1kKyfD2BlRcLSiEkffoG96g6TPv6eRoEiB8= github.com/leodido/go-urn v1.1.0/go.mod h1:+cyI34gQWZcE1eQU7NVgKkkzdXDQHr1dBMtdAPozLkw= -github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y= -github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= github.com/lib/pq v1.1.1 h1:sJZmqHoEaY7f+NPP8pgLB/WxulyR3fewgCM2qaSlBb4= github.com/lib/pq v1.1.1/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo= github.com/mailru/easyjson v0.0.0-20180823135443-60711f1a8329/go.mod h1:C1wdFJiN94OJF2b5HbByQZoLdCWB1Yqtg26g4irojpc= @@ -187,9 +180,8 @@ github.com/mattn/go-colorable v0.0.9/go.mod h1:9vuHe8Xs5qXnSaW/c/ABM9alt+Vo+STaO github.com/mattn/go-isatty v0.0.4/go.mod h1:M+lRXTBqGeGNdLjl/ufCoiOlB5xdOkqRJdNxMWT7Zi4= github.com/mattn/go-isatty v0.0.7/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hdxcsrc5s= +github.com/mattn/go-isatty v0.0.9 h1:d5US/mDsogSGW37IV293h//ZFaeajb69h+EHFsv2xGg= github.com/mattn/go-isatty v0.0.9/go.mod h1:YNRxwqDuOph6SZLI9vUUz6OYw3QyUt7WiY2yME+cCiQ= -github.com/mattn/go-isatty v0.0.11 h1:FxPOTFNqGkuDUGi3H/qkUbQO4ZiBa2brKq5r0l8TGeM= -github.com/mattn/go-isatty v0.0.11/go.mod h1:PhnuNfih5lzO57/f3n+odYbM4JtupLOxQOAqxQCu2WE= github.com/mattn/go-runewidth v0.0.2/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/mattn/go-shellwords v1.0.3 h1:K/VxK7SZ+cvuPgFSLKi5QPI9Vr/ipOf4C1gN+ntueUk= github.com/mattn/go-shellwords v1.0.3/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o= @@ -210,8 +202,11 @@ github.com/nicksnyder/go-i18n v1.10.0/go.mod h1:HrK7VCrbOvQoUAQ7Vpy7i87N7JZZZ7R2 github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo= github.com/onsi/ginkgo v1.6.0 h1:Ix8l273rp3QzYgXSR+c8d1fTG7UPgYkOSELPhiY/YGw= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/gomega v1.4.2 h1:3mYCb7aPxS/RU7TI1y4rkEn1oKmPRjNJLNEXgw7MH2I= -github.com/onsi/gomega v1.4.2/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= +github.com/onsi/ginkgo v1.12.0 h1:Iw5WCbBcaAAd0fpRb1c9r5YCylv4XDoCSigm1zLevwU= +github.com/onsi/ginkgo v1.12.0/go.mod h1:oUhWkIvk5aDxtKvDDuw8gItl8pKl42LzjC9KZE0HfGg= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/onsi/gomega v1.9.0 h1:R1uwffexN6Pr340GtYRIdZmAiN4J+iw6WG4wog1DUXg= +github.com/onsi/gomega v1.9.0/go.mod h1:Ho0h+IUsWyvy1OpqCwxlQ/21gkhVunqlU8fDGcoTdcA= github.com/opentracing/opentracing-go v1.0.2 h1:3jA2P6O1F9UOrWVpwrIo17pu01KWvNWg4X946/Y5Zwg= github.com/opentracing/opentracing-go v1.0.2/go.mod h1:UkNAQd3GIcIGf0SeVgPpRdFStlNbqXla1AfSYxPUl2o= github.com/pelletier/go-toml v1.3.0/go.mod h1:PN7xzY2wHTK0K9p34ErDQMlFxa51Fk0OUruD3k1mMwo= @@ -289,9 +284,8 @@ github.com/swaggo/swag v1.6.5 h1:2C+t+xyK6p1sujqncYO/VnMvPZcBJjNdKKyxbOdAW8o= github.com/swaggo/swag v1.6.5/go.mod h1:Y7ZLSS0d0DdxhWGVhQdu+Bu1QhaF5k0RD7FKdiAykeY= github.com/syndtr/goleveldb v0.0.0-20180815032940-ae2bd5eed72d h1:4J9HCZVpvDmj2tiKGSTUnb3Ok/9CEQb9oqu9LHKQQpc= github.com/syndtr/goleveldb v0.0.0-20180815032940-ae2bd5eed72d/go.mod h1:Z4AUp2Km+PwemOoO/VB5AOx9XSsIItzFjoJlOSiYmn0= +github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8 h1:ndzgwNDnKIqyCvHTXaCqh9KlOWKvBry6nuXMJmonVsE= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= -github.com/tmc/grpc-websocket-proxy v0.0.0-20171017195756-830351dc03c6 h1:lYIiVDtZnyTWlNwiAxLj0bbpTcx1BWCFhXjfsvmPdNc= -github.com/tmc/grpc-websocket-proxy v0.0.0-20171017195756-830351dc03c6/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/ugorji/go v1.1.4/go.mod h1:uQMGLiO92mf5W77hV/PUCpI3pbzQx3CRekS0kk+RGrc= github.com/ugorji/go v1.1.5-pre/go.mod h1:FwP/aQVg39TXzItUBMwnWp9T9gPQnXw4Poh4/oBQZ/0= github.com/ugorji/go v1.1.7 h1:/68gy2h+1mWMrwZFeD1kQialdSzAb432dtpeJ42ovdo= @@ -338,8 +332,6 @@ golang.org/x/crypto v0.0.0-20190325154230-a5d413f7728c/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529 h1:iMGN4xG0cnqj3t+zOM8wUB0BiPKHEwSxEZCvzcbZuvk= golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20190611184440-5c40567a22f8/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550 h1:ObdrDkeb4kJdCP557AjRjq69pTHfNouLtWZG7j9rPN8= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191205180655-e7c4368fe9dd h1:GGJVjV8waZKRHrgwvtH66z9ZGVurTD1MT0n1Bb+q4aM= golang.org/x/crypto v0.0.0-20191205180655-e7c4368fe9dd/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= @@ -349,8 +341,6 @@ golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHl golang.org/x/lint v0.0.0-20190930215403-16217165b5de h1:5hukYrvBGR8/eNkX5mdUezrA6JiaEZDtJb9Ei+1LlBs= golang.org/x/lint v0.0.0-20190930215403-16217165b5de/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= golang.org/x/mod v0.0.0-20190513183733-4bf6d317e70e/go.mod h1:mXi4GBBbnImb6dmsKGUJ2LatrhH/nqhxcFungHvyanc= -golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee h1:WG0RUwxtNT4qqaXX3DPA8zHFNm/D9xaBpxzHt1WcA/E= -golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -366,9 +356,8 @@ golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190813141303-74dc4d7220e7/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20190827160401-ba9fcec4b297/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20191002035440-2ec189313ef0 h1:2mqDk8w/o6UmeUCu5Qiq2y7iMf6anbx+YA8d1JFoFrs= golang.org/x/net v0.0.0-20191002035440-2ec189313ef0/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2 h1:CCH4IOTTfewWjGOlSp+zGcjutRKlBEZQ6wTn8ozI/nI= -golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -386,10 +375,10 @@ golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20190610200419-93c9922d18ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190616124812-15dcb6c0061f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190813064441-fde4db37ae7a/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456 h1:ng0gs1AKnRRuEMZoTLLlbOd+C17zUDepwGQBb/n+JVg= golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200113162924-86b910548bc1 h1:gZpLHxUX5BdYLA08Lj4YCJNN/jk7KtquiArPoeX0WvA= -golang.org/x/sys v0.0.0-20200113162924-86b910548bc1/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e h1:N7DeIrjYszNmSW409R3frPPwglRwMkXSBzwVbkOjLLA= +golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= @@ -408,12 +397,10 @@ golang.org/x/tools v0.0.0-20190614205625-5aca471b1d59/go.mod h1:/rFqwRUd4F7ZHNgw golang.org/x/tools v0.0.0-20190621195816-6e04913cbbac/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= golang.org/x/tools v0.0.0-20191029041327-9cc4af7d6b2c/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20191029190741-b9c20aec41a5/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20191107010934-f79515f33823 h1:akkRBeitX2EZP59KdtKw310CI4WGPCNPyrLbE7WZA8Y= golang.org/x/tools v0.0.0-20191107010934-f79515f33823/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20200216192241-b320d3a0f5a2 h1:0sfSpGSa544Fwnbot3Oxq/U6SXqjty6Jy/3wRhVS7ig= -golang.org/x/tools v0.0.0-20200216192241-b320d3a0f5a2/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7 h1:9zdDQZ7Thm29KFXgAX/+yaf3eVbP7djjWp/dXAppNCc= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898 h1:/atklqdjdhuosWIl6AIbOeHJjicWYPqR9bpxqxYG2pA= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0 h1:/wp5JvzpHIxhs/dumFmF7BXTf3Z+dd4uXta4kVyO508= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= @@ -440,9 +427,8 @@ gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMy gopkg.in/go-playground/assert.v1 v1.2.1 h1:xoYuJVE7KT85PYWrN730RguIQO0ePzVRfFMXadIrXTM= gopkg.in/go-playground/assert.v1 v1.2.1/go.mod h1:9RXL0bg/zibRAgZUYszZSwO/z8Y/a8bDuhia5mkpMnE= gopkg.in/go-playground/validator.v8 v8.18.2/go.mod h1:RX2a/7Ha8BgOhfk7j780h4/u/RRjR0eouCJSH80/M2Y= +gopkg.in/go-playground/validator.v9 v9.29.1 h1:SvGtYmN60a5CVKTOzMSyfzWDeZRxRuGvRQyEAKbw1xc= gopkg.in/go-playground/validator.v9 v9.29.1/go.mod h1:+c9/zcJMFNgbLvly1L1V+PpxWdVbfP1avr/N00E2vyQ= -gopkg.in/go-playground/validator.v9 v9.31.0 h1:bmXmP2RSNtFES+bn4uYuHT7iJFJv7Vj+an+ZQdDaD1M= -gopkg.in/go-playground/validator.v9 v9.31.0/go.mod h1:+c9/zcJMFNgbLvly1L1V+PpxWdVbfP1avr/N00E2vyQ= gopkg.in/natefinch/lumberjack.v2 v2.0.0 h1:1Lc07Kr7qY4U2YPouBjpCLxpiyxIVoxqXgkXLknAOE8= gopkg.in/natefinch/lumberjack.v2 v2.0.0/go.mod h1:l0ndWWf7gzL7RNwBG7wST/UCcT4T24xpD6X8LsfU/+k= gopkg.in/resty.v1 v1.12.0/go.mod h1:mDo4pnntr5jdWRML875a/NmxYqAlA73dVijT2AXvQQo= @@ -451,9 +437,10 @@ gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWD gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.3 h1:fvjTMHxHEw/mxHbtzPi3JCcKXQRAnQTBRo6YCJSVHKI= gopkg.in/yaml.v2 v2.2.3/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= -gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.1-2019.2.3 h1:3JgtbtFHMiCmsznwGVTUWbgGov+pVqnlf1dEJTNAXeM= diff --git a/pkg/mock/mockcluster/mockcluster.go b/pkg/mock/mockcluster/mockcluster.go index 2fc4a1eed47..f4726ed9e64 100644 --- a/pkg/mock/mockcluster/mockcluster.go +++ b/pkg/mock/mockcluster/mockcluster.go @@ -275,9 +275,14 @@ func (mc *Cluster) AddLeaderRegionWithRange(regionID uint64, startKey string, en } // AddLeaderRegionWithReadInfo adds region with specified leader, followers and read info. -func (mc *Cluster) AddLeaderRegionWithReadInfo(regionID uint64, leaderID uint64, readBytes uint64, reportInterval uint64, followerIds ...uint64) { +func (mc *Cluster) AddLeaderRegionWithReadInfo( + regionID uint64, leaderID uint64, + readBytes, readKeys uint64, + reportInterval uint64, + followerIds []uint64) { r := mc.newMockRegionInfo(regionID, leaderID, followerIds...) r = r.Clone(core.SetReadBytes(readBytes)) + r = r.Clone(core.SetReadKeys(readKeys)) r = r.Clone(core.SetReportInterval(reportInterval)) items := mc.HotCache.CheckRead(r, mc.StoresStats) for _, item := range items { @@ -287,9 +292,14 @@ func (mc *Cluster) AddLeaderRegionWithReadInfo(regionID uint64, leaderID uint64, } // AddLeaderRegionWithWriteInfo adds region with specified leader, followers and write info. -func (mc *Cluster) AddLeaderRegionWithWriteInfo(regionID uint64, leaderID uint64, writtenBytes uint64, reportInterval uint64, followerIds ...uint64) { +func (mc *Cluster) AddLeaderRegionWithWriteInfo( + regionID uint64, leaderID uint64, + writtenBytes, writtenKeys uint64, + reportInterval uint64, + followerIds []uint64) { r := mc.newMockRegionInfo(regionID, leaderID, followerIds...) r = r.Clone(core.SetWrittenBytes(writtenBytes)) + r = r.Clone(core.SetWrittenKeys(writtenKeys)) r = r.Clone(core.SetReportInterval(reportInterval)) items := mc.HotCache.CheckWrite(r, mc.StoresStats) for _, item := range items { @@ -383,11 +393,40 @@ func (mc *Cluster) UpdateStorageRatio(storeID uint64, usedRatio, availableRatio mc.PutStore(newStore) } +// UpdateStorageWrittenStats updates store written bytes. +func (mc *Cluster) UpdateStorageWrittenStats(storeID, bytesWritten, keysWritten uint64) { + store := mc.GetStore(storeID) + newStats := proto.Clone(store.GetStoreStats()).(*pdpb.StoreStats) + newStats.BytesWritten = bytesWritten + newStats.KeysWritten = keysWritten + now := time.Now().Second() + interval := &pdpb.TimeInterval{StartTimestamp: uint64(now - statistics.StoreHeartBeatReportInterval), EndTimestamp: uint64(now)} + newStats.Interval = interval + newStore := store.Clone(core.SetStoreStats(newStats)) + mc.Set(storeID, newStats) + mc.PutStore(newStore) +} + +// UpdateStorageReadStats updates store written bytes. +func (mc *Cluster) UpdateStorageReadStats(storeID, bytesWritten, keysWritten uint64) { + store := mc.GetStore(storeID) + newStats := proto.Clone(store.GetStoreStats()).(*pdpb.StoreStats) + newStats.BytesRead = bytesWritten + newStats.KeysRead = keysWritten + now := time.Now().Second() + interval := &pdpb.TimeInterval{StartTimestamp: uint64(now - statistics.StoreHeartBeatReportInterval), EndTimestamp: uint64(now)} + newStats.Interval = interval + newStore := store.Clone(core.SetStoreStats(newStats)) + mc.Set(storeID, newStats) + mc.PutStore(newStore) +} + // UpdateStorageWrittenBytes updates store written bytes. func (mc *Cluster) UpdateStorageWrittenBytes(storeID uint64, bytesWritten uint64) { store := mc.GetStore(storeID) newStats := proto.Clone(store.GetStoreStats()).(*pdpb.StoreStats) newStats.BytesWritten = bytesWritten + newStats.KeysWritten = bytesWritten / 100 now := time.Now().Second() interval := &pdpb.TimeInterval{StartTimestamp: uint64(now - statistics.StoreHeartBeatReportInterval), EndTimestamp: uint64(now)} newStats.Interval = interval @@ -401,6 +440,35 @@ func (mc *Cluster) UpdateStorageReadBytes(storeID uint64, bytesRead uint64) { store := mc.GetStore(storeID) newStats := proto.Clone(store.GetStoreStats()).(*pdpb.StoreStats) newStats.BytesRead = bytesRead + newStats.KeysRead = bytesRead / 100 + now := time.Now().Second() + interval := &pdpb.TimeInterval{StartTimestamp: uint64(now - statistics.StoreHeartBeatReportInterval), EndTimestamp: uint64(now)} + newStats.Interval = interval + newStore := store.Clone(core.SetStoreStats(newStats)) + mc.Set(storeID, newStats) + mc.PutStore(newStore) +} + +// UpdateStorageWrittenKeys updates store written keys. +func (mc *Cluster) UpdateStorageWrittenKeys(storeID uint64, keysWritten uint64) { + store := mc.GetStore(storeID) + newStats := proto.Clone(store.GetStoreStats()).(*pdpb.StoreStats) + newStats.KeysWritten = keysWritten + newStats.BytesWritten = keysWritten * 100 + now := time.Now().Second() + interval := &pdpb.TimeInterval{StartTimestamp: uint64(now - statistics.StoreHeartBeatReportInterval), EndTimestamp: uint64(now)} + newStats.Interval = interval + newStore := store.Clone(core.SetStoreStats(newStats)) + mc.Set(storeID, newStats) + mc.PutStore(newStore) +} + +// UpdateStorageReadKeys updates store read bytes. +func (mc *Cluster) UpdateStorageReadKeys(storeID uint64, keysRead uint64) { + store := mc.GetStore(storeID) + newStats := proto.Clone(store.GetStoreStats()).(*pdpb.StoreStats) + newStats.KeysRead = keysRead + newStats.BytesRead = keysRead * 100 now := time.Now().Second() interval := &pdpb.TimeInterval{StartTimestamp: uint64(now - statistics.StoreHeartBeatReportInterval), EndTimestamp: uint64(now)} newStats.Interval = interval diff --git a/server/api/scheduler_test.go b/server/api/scheduler_test.go index ca569ff2c18..609c4d213bf 100644 --- a/server/api/scheduler_test.go +++ b/server/api/scheduler_test.go @@ -98,7 +98,46 @@ func (s *testScheduleSuite) TestAPI(c *C) { extraTestFunc func(name string, c *C) }{ {name: "balance-leader-scheduler"}, - {name: "balance-hot-region-scheduler"}, + { + name: "balance-hot-region-scheduler", + extraTestFunc: func(name string, c *C) { + resp := make(map[string]interface{}) + listURL := fmt.Sprintf("%s%s%s/%s/list", s.svr.GetAddr(), apiPrefix, server.SchedulerConfigHandlerPath, name) + c.Assert(readJSON(listURL, &resp), IsNil) + expectMap := map[string]float64{ + "min-hot-byte-rate": 100, + "min-hot-key-rate": 10, + "max-zombie-rounds": 3, + "max-peer-number": 1000, + "byte-rate-rank-step-ratio": 0.05, + "key-rate-rank-step-ratio": 0.05, + "count-rank-step-ratio": 0.01, + "great-dec-ratio": 0.95, + "minor-dec-ratio": 0.99, + } + for key := range expectMap { + c.Assert(resp[key], DeepEquals, expectMap[key]) + } + dataMap := make(map[string]interface{}) + dataMap["max-zombie-rounds"] = 5.0 + expectMap["max-zombie-rounds"] = 5.0 + updateURL := fmt.Sprintf("%s%s%s/%s/config", s.svr.GetAddr(), apiPrefix, server.SchedulerConfigHandlerPath, name) + body, err := json.Marshal(dataMap) + c.Assert(err, IsNil) + c.Assert(postJSON(updateURL, body), IsNil) + resp = make(map[string]interface{}) + c.Assert(readJSON(listURL, &resp), IsNil) + for key := range expectMap { + c.Assert(resp[key], DeepEquals, expectMap[key]) + } + // update again + err = postJSON(updateURL, body, func(res []byte, code int) { + c.Assert(string(res), Equals, "no changed") + c.Assert(code, Equals, 200) + }) + c.Assert(err, IsNil) + }, + }, {name: "balance-region-scheduler"}, {name: "shuffle-leader-scheduler"}, {name: "shuffle-region-scheduler"}, diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go index 5f92e4844d9..eec1e2c07d4 100644 --- a/server/cluster/cluster.go +++ b/server/cluster/cluster.go @@ -44,7 +44,7 @@ import ( "go.uber.org/zap" ) -var backgroundJobInterval = time.Minute +var backgroundJobInterval = 10 * time.Second const ( clientTimeout = 3 * time.Second diff --git a/server/cluster/coordinator.go b/server/cluster/coordinator.go index ec82b72dd65..d0c1702175d 100644 --- a/server/cluster/coordinator.go +++ b/server/cluster/coordinator.go @@ -402,24 +402,30 @@ func (c *coordinator) collectHotSpotMetrics() { stat, ok := status.AsPeer[storeID] if ok { hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "total_written_bytes_as_peer").Set(stat.TotalBytesRate) + hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "total_written_keys_as_peer").Set(stat.TotalBytesRate) hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "hot_write_region_as_peer").Set(float64(stat.Count)) } else { hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "total_written_bytes_as_peer").Set(0) hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "hot_write_region_as_peer").Set(0) + hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "total_written_keys_as_peer").Set(0) } stat, ok = status.AsLeader[storeID] if ok { hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "total_written_bytes_as_leader").Set(stat.TotalBytesRate) + hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "total_written_keys_as_leader").Set(stat.TotalKeysRate) hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "hot_write_region_as_leader").Set(float64(stat.Count)) } else { hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "total_written_bytes_as_leader").Set(0) + hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "total_written_keys_as_leader").Set(0) hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "hot_write_region_as_leader").Set(0) } infl := pendings[storeID] // TODO: add to tidb-ansible after merging pending influence into operator influence. hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "write_pending_influence_byte_rate").Set(infl.ByteRate) + hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "write_pending_influence_key_rate").Set(infl.KeyRate) + hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "write_pending_influence_count").Set(infl.Count) } // Collects hot read region metrics. @@ -432,14 +438,18 @@ func (c *coordinator) collectHotSpotMetrics() { stat, ok := status.AsLeader[storeID] if ok { hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "total_read_bytes_as_leader").Set(stat.TotalBytesRate) + hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "total_read_keys_as_leader").Set(stat.TotalKeysRate) hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "hot_read_region_as_leader").Set(float64(stat.Count)) } else { hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "total_read_bytes_as_leader").Set(0) + hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "total_read_keys_as_leader").Set(0) hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "hot_read_region_as_leader").Set(0) } infl := pendings[storeID] hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "read_pending_influence_byte_rate").Set(infl.ByteRate) + hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "read_pending_influence_key_rate").Set(infl.KeyRate) + hotSpotStatusGauge.WithLabelValues(storeAddress, storeLabel, "read_pending_influence_count").Set(infl.Count) } } diff --git a/server/schedulers/hot_region.go b/server/schedulers/hot_region.go index 7d7c0679d46..138f372af69 100644 --- a/server/schedulers/hot_region.go +++ b/server/schedulers/hot_region.go @@ -15,8 +15,11 @@ package schedulers import ( "fmt" + "math" "math/rand" + "net/http" "sort" + "strconv" "sync" "time" @@ -38,21 +41,29 @@ func init() { } }) schedule.RegisterScheduler(HotRegionType, func(opController *schedule.OperatorController, storage *core.Storage, decoder schedule.ConfigDecoder) (schedule.Scheduler, error) { - return newHotScheduler(opController), nil + conf := initHotRegionScheduleConfig() + if err := decoder(conf); err != nil { + return nil, err + } + conf.storage = storage + return newHotScheduler(opController, conf), nil }) + // FIXME: remove this two schedule after the balance test move in schedulers package - schedule.RegisterScheduler(HotWriteRegionType, func(opController *schedule.OperatorController, storage *core.Storage, decoder schedule.ConfigDecoder) (schedule.Scheduler, error) { - return newHotWriteScheduler(opController), nil - }) - schedule.RegisterScheduler(HotReadRegionType, func(opController *schedule.OperatorController, storage *core.Storage, decoder schedule.ConfigDecoder) (schedule.Scheduler, error) { - return newHotReadScheduler(opController), nil - }) + { + schedule.RegisterScheduler(HotWriteRegionType, func(opController *schedule.OperatorController, storage *core.Storage, decoder schedule.ConfigDecoder) (schedule.Scheduler, error) { + return newHotWriteScheduler(opController, initHotRegionScheduleConfig()), nil + }) + schedule.RegisterScheduler(HotReadRegionType, func(opController *schedule.OperatorController, storage *core.Storage, decoder schedule.ConfigDecoder) (schedule.Scheduler, error) { + return newHotReadScheduler(opController, initHotRegionScheduleConfig()), nil + }) + + } } const ( // HotRegionName is balance hot region scheduler name. HotRegionName = "balance-hot-region-scheduler" - // HotRegionType is balance hot region scheduler type. HotRegionType = "hot-region" // HotReadRegionType is hot read region scheduler type. @@ -60,42 +71,12 @@ const ( // HotWriteRegionType is hot write region scheduler type. HotWriteRegionType = "hot-write-region" - hotRegionLimitFactor = 0.75 - hotRegionScheduleFactor = 0.95 - - maxZombieDur time.Duration = statistics.StoreHeartBeatReportInterval * time.Second - - minRegionScheduleInterval time.Duration = statistics.StoreHeartBeatReportInterval * time.Second -) - -// rwType : the perspective of balance -type rwType int - -const ( - write rwType = iota - read -) - -type opType int - -const ( - movePeer opType = iota - transferLeader + minHotScheduleInterval = time.Second + maxHotScheduleInterval = 20 * time.Second ) -type storeLoadInfos struct { - ReadLeaders map[uint64]*storeLoadDetail - WriteLeaders map[uint64]*storeLoadDetail - WritePeers map[uint64]*storeLoadDetail -} - -func newStoreLoadInfos() *storeLoadInfos { - return &storeLoadInfos{ - ReadLeaders: make(map[uint64]*storeLoadDetail), - WriteLeaders: make(map[uint64]*storeLoadDetail), - WritePeers: make(map[uint64]*storeLoadDetail), - } -} +// schedulePeerPr the probability of schedule the hot peer. +var schedulePeerPr = 0.66 type hotScheduler struct { name string @@ -107,42 +88,44 @@ type hotScheduler struct { r *rand.Rand // states across multiple `Schedule` calls - readPendings map[*pendingInfluence]struct{} - writePendings map[*pendingInfluence]struct{} + pendings [resourceTypeLen]map[*pendingInfluence]struct{} regionPendings map[uint64][2]*operator.Operator // temporary states but exported to API or metrics - stLoadInfos *storeLoadInfos - readPendingSum map[uint64]Influence - writePendingSum map[uint64]Influence + stLoadInfos [resourceTypeLen]map[uint64]*storeLoadDetail + pendingSums [resourceTypeLen]map[uint64]Influence + // config of hot scheduler + conf *hotRegionSchedulerConfig } -func newHotScheduler(opController *schedule.OperatorController) *hotScheduler { +func newHotScheduler(opController *schedule.OperatorController, conf *hotRegionSchedulerConfig) *hotScheduler { base := NewBaseScheduler(opController) - return &hotScheduler{ + ret := &hotScheduler{ name: HotRegionName, BaseScheduler: base, leaderLimit: 1, peerLimit: 1, types: []rwType{write, read}, r: rand.New(rand.NewSource(time.Now().UnixNano())), - readPendings: map[*pendingInfluence]struct{}{}, - writePendings: map[*pendingInfluence]struct{}{}, regionPendings: make(map[uint64][2]*operator.Operator), - - stLoadInfos: newStoreLoadInfos(), + conf: conf, + } + for ty := resourceType(0); ty < resourceTypeLen; ty++ { + ret.pendings[ty] = map[*pendingInfluence]struct{}{} + ret.stLoadInfos[ty] = map[uint64]*storeLoadDetail{} } + return ret } -func newHotReadScheduler(opController *schedule.OperatorController) *hotScheduler { - ret := newHotScheduler(opController) +func newHotReadScheduler(opController *schedule.OperatorController, conf *hotRegionSchedulerConfig) *hotScheduler { + ret := newHotScheduler(opController, conf) ret.name = "" ret.types = []rwType{read} return ret } -func newHotWriteScheduler(opController *schedule.OperatorController) *hotScheduler { - ret := newHotScheduler(opController) +func newHotWriteScheduler(opController *schedule.OperatorController, conf *hotRegionSchedulerConfig) *hotScheduler { + ret := newHotScheduler(opController, conf) ret.name = "" ret.types = []rwType{write} return ret @@ -156,17 +139,28 @@ func (h *hotScheduler) GetType() string { return HotRegionType } +func (h *hotScheduler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + h.conf.ServeHTTP(w, r) +} + +func (h *hotScheduler) GetMinInterval() time.Duration { + return minHotScheduleInterval +} +func (h *hotScheduler) GetNextInterval(interval time.Duration) time.Duration { + return intervalGrow(h.GetMinInterval(), maxHotScheduleInterval, exponentialGrowth) +} + func (h *hotScheduler) IsScheduleAllowed(cluster opt.Cluster) bool { return h.allowBalanceLeader(cluster) || h.allowBalanceRegion(cluster) } func (h *hotScheduler) allowBalanceLeader(cluster opt.Cluster) bool { - return h.OpController.OperatorCount(operator.OpHotRegion) < minUint64(h.leaderLimit, cluster.GetHotRegionScheduleLimit()) && + return h.OpController.OperatorCount(operator.OpHotRegion) < cluster.GetHotRegionScheduleLimit() && h.OpController.OperatorCount(operator.OpLeader) < cluster.GetLeaderScheduleLimit() } func (h *hotScheduler) allowBalanceRegion(cluster opt.Cluster) bool { - return h.OpController.OperatorCount(operator.OpHotRegion) < minUint64(h.peerLimit, cluster.GetHotRegionScheduleLimit()) + return h.OpController.OperatorCount(operator.OpHotRegion) < cluster.GetHotRegionScheduleLimit() } func (h *hotScheduler) Schedule(cluster opt.Cluster) []*operator.Operator { @@ -197,11 +191,13 @@ func (h *hotScheduler) prepareForBalance(cluster opt.Cluster) { minHotDegree := cluster.GetHotRegionCacheHitsThreshold() { // update read statistics regionRead := cluster.RegionReadStats() - storeRead := storesStat.GetStoresBytesReadStat() + storeByte := storesStat.GetStoresBytesReadStat() + storeKey := storesStat.GetStoresKeysReadStat() - h.stLoadInfos.ReadLeaders = summaryStoresLoad( - storeRead, - h.readPendingSum, + h.stLoadInfos[readLeader] = summaryStoresLoad( + storeByte, + storeKey, + h.pendingSums[readLeader], regionRead, minHotDegree, read, core.LeaderKind) @@ -209,18 +205,21 @@ func (h *hotScheduler) prepareForBalance(cluster opt.Cluster) { { // update write statistics regionWrite := cluster.RegionWriteStats() - storeWrite := storesStat.GetStoresBytesWriteStat() + storeByte := storesStat.GetStoresBytesWriteStat() + storeKey := storesStat.GetStoresKeysWriteStat() - h.stLoadInfos.WriteLeaders = summaryStoresLoad( - storeWrite, - map[uint64]Influence{}, + h.stLoadInfos[writeLeader] = summaryStoresLoad( + storeByte, + storeKey, + h.pendingSums[writeLeader], regionWrite, minHotDegree, write, core.LeaderKind) - h.stLoadInfos.WritePeers = summaryStoresLoad( - storeWrite, - h.writePendingSum, + h.stLoadInfos[writePeer] = summaryStoresLoad( + storeByte, + storeKey, + h.pendingSums[writePeer], regionWrite, minHotDegree, write, core.RegionKind) @@ -228,8 +227,9 @@ func (h *hotScheduler) prepareForBalance(cluster opt.Cluster) { } func (h *hotScheduler) summaryPendingInfluence() { - h.readPendingSum = summaryPendingInfluence(h.readPendings, calcPendingWeight) - h.writePendingSum = summaryPendingInfluence(h.writePendings, calcPendingWeight) + for ty := resourceType(0); ty < resourceTypeLen; ty++ { + h.pendingSums[ty] = summaryPendingInfluence(h.pendings[ty], h.calcPendingWeight) + } h.gcRegionPendings() } @@ -238,7 +238,7 @@ func (h *hotScheduler) gcRegionPendings() { empty := true for ty, op := range pendings { if op != nil && op.IsEnd() { - if time.Now().After(op.GetCreateTime().Add(minRegionScheduleInterval)) { + if time.Now().After(op.GetCreateTime().Add(h.conf.GetMaxZombieDuration())) { schedulerStatus.WithLabelValues(h.GetName(), "pending_op_infos").Dec() pendings[ty] = nil } @@ -258,6 +258,7 @@ func (h *hotScheduler) gcRegionPendings() { // Load information of all available stores. func summaryStoresLoad( storeByteRate map[uint64]float64, + storeKeyRate map[uint64]float64, pendings map[uint64]Influence, storeHotPeers map[uint64][]*statistics.HotPeerStat, minHotDegree int, @@ -265,32 +266,49 @@ func summaryStoresLoad( kind core.ResourceKind, ) map[uint64]*storeLoadDetail { loadDetail := make(map[uint64]*storeLoadDetail, len(storeByteRate)) + allByteSum := 0.0 + allKeySum := 0.0 + allCount := 0.0 // Stores without byte rate statistics is not available to schedule. - for id, rate := range storeByteRate { + for id, byteRate := range storeByteRate { + keyRate := storeKeyRate[id] // Find all hot peers first hotPeers := make([]*statistics.HotPeerStat, 0) { - hotSum := 0.0 + byteSum := 0.0 + keySum := 0.0 for _, peer := range filterHotPeers(kind, minHotDegree, storeHotPeers[id]) { - hotSum += peer.GetByteRate() + byteSum += peer.GetByteRate() + keySum += peer.GetKeyRate() hotPeers = append(hotPeers, peer.Clone()) } // Use sum of hot peers to estimate leader-only byte rate. if kind == core.LeaderKind && rwTy == write { - rate = hotSum + byteRate = byteSum + keyRate = keySum } // Metric for debug. - ty := "byte-rate-" + rwTy.String() + "-" + kind.String() - hotPeerSummary.WithLabelValues(ty, fmt.Sprintf("%v", id)).Set(hotSum) + { + ty := "byte-rate-" + rwTy.String() + "-" + kind.String() + hotPeerSummary.WithLabelValues(ty, fmt.Sprintf("%v", id)).Set(byteSum) + } + { + ty := "key-rate-" + rwTy.String() + "-" + kind.String() + hotPeerSummary.WithLabelValues(ty, fmt.Sprintf("%v", id)).Set(keySum) + } } + allByteSum += byteRate + allKeySum += keyRate + allCount += float64(len(hotPeers)) // Build store load prediction from current load and pending influence. stLoadPred := (&storeLoad{ - ByteRate: rate, - Count: len(hotPeers), + ByteRate: byteRate, + KeyRate: keyRate, + Count: float64(len(hotPeers)), }).ToLoadPred(pendings[id]) // Construct store load info. @@ -299,6 +317,29 @@ func summaryStoresLoad( HotPeers: hotPeers, } } + storeLen := float64(len(storeByteRate)) + + for id, detail := range loadDetail { + byteExp := allByteSum / storeLen + keyExp := allKeySum / storeLen + countExp := allCount / storeLen + detail.LoadPred.Future.ExpByteRate = byteExp + detail.LoadPred.Future.ExpKeyRate = keyExp + detail.LoadPred.Future.ExpCount = countExp + // Debug + { + ty := "exp-byte-rate-" + rwTy.String() + "-" + kind.String() + hotPeerSummary.WithLabelValues(ty, fmt.Sprintf("%v", id)).Set(byteExp) + } + { + ty := "exp-key-rate-" + rwTy.String() + "-" + kind.String() + hotPeerSummary.WithLabelValues(ty, fmt.Sprintf("%v", id)).Set(keyExp) + } + { + ty := "exp-count-rate-" + rwTy.String() + "-" + kind.String() + hotPeerSummary.WithLabelValues(ty, fmt.Sprintf("%v", id)).Set(countExp) + } + } return loadDetail } @@ -318,25 +359,27 @@ func filterHotPeers( return ret } -func (h *hotScheduler) addPendingInfluence(op *operator.Operator, srcStore, dstStore uint64, infl Influence, balanceType rwType, ty opType) { - influence := newPendingInfluence(op, srcStore, dstStore, infl) +func (h *hotScheduler) addPendingInfluence(op *operator.Operator, srcStore, dstStore uint64, infl Influence, rwTy rwType, opTy opType) bool { regionID := op.RegionID() - if balanceType == read { - h.readPendings[influence] = struct{}{} - } else { - h.writePendings[influence] = struct{}{} + _, ok := h.regionPendings[regionID] + if ok { + schedulerStatus.WithLabelValues(h.GetName(), "pending_op_fails").Inc() + return false } - if _, ok := h.regionPendings[regionID]; !ok { - h.regionPendings[regionID] = [2]*operator.Operator{nil, nil} - } + influence := newPendingInfluence(op, srcStore, dstStore, infl) + rcTy := toResourceType(rwTy, opTy) + h.pendings[rcTy][influence] = struct{}{} + + h.regionPendings[regionID] = [2]*operator.Operator{nil, nil} { // h.pendingOpInfos[regionID][ty] = influence tmp := h.regionPendings[regionID] - tmp[ty] = op + tmp[opTy] = op h.regionPendings[regionID] = tmp } - schedulerStatus.WithLabelValues(h.GetName(), "pending_op_infos").Inc() + schedulerStatus.WithLabelValues(h.GetName(), "pending_op_create").Inc() + return true } func (h *hotScheduler) balanceHotReadRegions(cluster opt.Cluster) []*operator.Operator { @@ -357,23 +400,23 @@ func (h *hotScheduler) balanceHotReadRegions(cluster opt.Cluster) []*operator.Op return nil } -// balanceHotRetryLimit is the limit to retry schedule for selected balance strategy. -const balanceHotRetryLimit = 5 - func (h *hotScheduler) balanceHotWriteRegions(cluster opt.Cluster) []*operator.Operator { - for i := 0; i < balanceHotRetryLimit; i++ { - // prefer to balance by peer + // prefer to balance by peer + s := h.r.Intn(100) + switch { + case s < int(schedulePeerPr*100): peerSolver := newBalanceSolver(h, cluster, write, movePeer) ops := peerSolver.solve() if len(ops) > 0 { return ops } + default: + } - leaderSolver := newBalanceSolver(h, cluster, write, transferLeader) - ops = leaderSolver.solve() - if len(ops) > 0 { - return ops - } + leaderSolver := newBalanceSolver(h, cluster, write, transferLeader) + ops := leaderSolver.solve() + if len(ops) > 0 { + return ops } schedulerCounter.WithLabelValues(h.GetName(), "skip").Inc() @@ -387,28 +430,57 @@ type balanceSolver struct { rwTy rwType opTy opType - // temporary states + cur *solution + + maxSrc *storeLoad + minDst *storeLoad + rankStep *storeLoad +} + +type solution struct { srcStoreID uint64 srcPeerStat *statistics.HotPeerStat region *core.RegionInfo dstStoreID uint64 + + // progressiveRank measures the contribution for balance. + // The smaller the rank, the better this solution is. + // If rank < 0, this solution makes thing better. + progressiveRank int64 } func (bs *balanceSolver) init() { - switch bs.rwTy { - case read: - bs.stLoadDetail = bs.sche.stLoadInfos.ReadLeaders - case write: - switch bs.opTy { - case movePeer: - bs.stLoadDetail = bs.sche.stLoadInfos.WritePeers - case transferLeader: - bs.stLoadDetail = bs.sche.stLoadInfos.WriteLeaders - } + switch toResourceType(bs.rwTy, bs.opTy) { + case writePeer: + bs.stLoadDetail = bs.sche.stLoadInfos[writePeer] + case writeLeader: + bs.stLoadDetail = bs.sche.stLoadInfos[writeLeader] + case readLeader: + bs.stLoadDetail = bs.sche.stLoadInfos[readLeader] } for _, id := range getUnhealthyStores(bs.cluster) { delete(bs.stLoadDetail, id) } + + bs.maxSrc = &storeLoad{} + bs.minDst = &storeLoad{ + ByteRate: math.MaxFloat64, + KeyRate: math.MaxFloat64, + Count: math.MaxFloat64, + } + maxCur := &storeLoad{} + + for _, detail := range bs.stLoadDetail { + bs.maxSrc = maxLoad(bs.maxSrc, detail.LoadPred.min()) + bs.minDst = minLoad(bs.minDst, detail.LoadPred.max()) + maxCur = maxLoad(maxCur, &detail.LoadPred.Current) + } + + bs.rankStep = &storeLoad{ + ByteRate: maxCur.ByteRate * bs.sche.conf.GetByteRankStepRatio(), + KeyRate: maxCur.KeyRate * bs.sche.conf.GetKeyRankStepRatio(), + Count: maxCur.Count * bs.sche.conf.GetCountRankStepRatio(), + } } func getUnhealthyStores(cluster opt.Cluster) []uint64 { @@ -439,7 +511,7 @@ func (bs *balanceSolver) isValid() bool { return false } switch bs.rwTy { - case read, write: + case write, read: default: return false } @@ -455,28 +527,44 @@ func (bs *balanceSolver) solve() []*operator.Operator { if !bs.isValid() || !bs.allowBalance() { return nil } - bs.srcStoreID = bs.selectSrcStoreID() - if bs.srcStoreID == 0 { - return nil - } + bs.cur = &solution{} + var ( + best *solution + ops []*operator.Operator + infls []Influence + ) - for _, srcPeerStat := range bs.getPeerList() { - bs.srcPeerStat = srcPeerStat - bs.region = bs.getRegion() - if bs.region == nil { - continue - } - dstCandidates := bs.getDstCandidateIDs() - if len(dstCandidates) <= 0 { - continue + for srcStoreID := range bs.filterSrcStores() { + bs.cur.srcStoreID = srcStoreID + + for _, srcPeerStat := range bs.filterHotPeers() { + bs.cur.srcPeerStat = srcPeerStat + bs.cur.region = bs.getRegion() + if bs.cur.region == nil { + continue + } + for dstStoreID := range bs.filterDstStores() { + bs.cur.dstStoreID = dstStoreID + bs.calcProgressiveRank() + if bs.cur.progressiveRank < 0 && bs.betterThan(best) { + if newOps, newInfls := bs.buildOperators(); len(newOps) > 0 { + ops = newOps + infls = newInfls + clone := *bs.cur + best = &clone + } + } + } } - bs.dstStoreID = bs.selectDstStoreID(dstCandidates) - ops := bs.buildOperators() - if len(ops) > 0 { - return ops + } + + for i := 0; i < len(ops); i++ { + // TODO: multiple operators need to be atomic. + if !bs.sche.addPendingInfluence(ops[i], best.srcStoreID, best.dstStoreID, infls[i], bs.rwTy, bs.opTy) { + return nil } } - return nil + return ops } func (bs *balanceSolver) allowBalance() bool { @@ -490,29 +578,80 @@ func (bs *balanceSolver) allowBalance() bool { } } -func (bs *balanceSolver) selectSrcStoreID() uint64 { - var id uint64 - switch bs.opTy { - case movePeer: - id = selectSrcStoreByByteRate(bs.stLoadDetail) - case transferLeader: - if bs.rwTy == write { - id = selectSrcStoreByCount(bs.stLoadDetail) - } else { - id = selectSrcStoreByByteRate(bs.stLoadDetail) +func (bs *balanceSolver) filterSrcStores() map[uint64]*storeLoadDetail { + ret := make(map[uint64]*storeLoadDetail) + for id, detail := range bs.stLoadDetail { + if bs.cluster.GetStore(id) == nil { + log.Error("failed to get the source store", zap.Uint64("store-id", id)) + continue } + if len(detail.HotPeers) == 0 { + continue + } + if detail.LoadPred.min().ByteRate > bs.sche.conf.GetSrcToleranceRatio()*detail.LoadPred.Future.ExpByteRate && + detail.LoadPred.min().KeyRate > bs.sche.conf.GetSrcToleranceRatio()*detail.LoadPred.Future.ExpKeyRate { + ret[id] = detail + balanceHotRegionCounter.WithLabelValues("src-store-succ", strconv.FormatUint(id, 10)).Inc() + } + balanceHotRegionCounter.WithLabelValues("src-store-failed", strconv.FormatUint(id, 10)).Inc() } - if id != 0 && bs.cluster.GetStore(id) == nil { - log.Error("failed to get the source store", zap.Uint64("store-id", id)) - } - return id + return ret } -func (bs *balanceSolver) getPeerList() []*statistics.HotPeerStat { - ret := bs.stLoadDetail[bs.srcStoreID].HotPeers - bs.sche.r.Shuffle(len(ret), func(i, j int) { - ret[i], ret[j] = ret[j], ret[i] +func (bs *balanceSolver) filterHotPeers() []*statistics.HotPeerStat { + ret := bs.stLoadDetail[bs.cur.srcStoreID].HotPeers + // Return at most MaxPeerNum peers, to prevent balanceSolver.solve() too slow. + maxPeerNum := bs.sche.conf.GetMaxPeerNumber() + + // filter pending region + appendItem := func(items []*statistics.HotPeerStat, item *statistics.HotPeerStat) []*statistics.HotPeerStat { + if _, ok := bs.sche.regionPendings[item.ID()]; !ok { + items = append(items, item) + } + return items + } + if len(ret) <= maxPeerNum { + nret := make([]*statistics.HotPeerStat, 0, len(ret)) + for _, peer := range ret { + nret = appendItem(nret, peer) + } + return nret + } + + byteSort := make([]*statistics.HotPeerStat, len(ret)) + copy(byteSort, ret) + sort.Slice(byteSort, func(i, j int) bool { + return byteSort[i].GetByteRate() > byteSort[j].GetByteRate() }) + keySort := make([]*statistics.HotPeerStat, len(ret)) + copy(keySort, ret) + sort.Slice(keySort, func(i, j int) bool { + return keySort[i].GetKeyRate() > keySort[j].GetKeyRate() + }) + + union := make(map[*statistics.HotPeerStat]struct{}, maxPeerNum) + for len(union) < maxPeerNum { + for len(byteSort) > 0 { + peer := byteSort[0] + byteSort = byteSort[1:] + if _, ok := union[peer]; !ok { + union[peer] = struct{}{} + break + } + } + for len(keySort) > 0 { + peer := keySort[0] + keySort = keySort[1:] + if _, ok := union[peer]; !ok { + union[peer] = struct{}{} + break + } + } + } + ret = make([]*statistics.HotPeerStat, 0, len(union)) + for peer := range union { + ret = appendItem(ret, peer) + } return ret } @@ -548,21 +687,21 @@ func (bs *balanceSolver) isRegionAvailable(region *core.RegionInfo) bool { } func (bs *balanceSolver) getRegion() *core.RegionInfo { - region := bs.cluster.GetRegion(bs.srcPeerStat.ID()) + region := bs.cluster.GetRegion(bs.cur.srcPeerStat.ID()) if !bs.isRegionAvailable(region) { return nil } switch bs.opTy { case movePeer: - srcPeer := region.GetStorePeer(bs.srcStoreID) + srcPeer := region.GetStorePeer(bs.cur.srcStoreID) if srcPeer == nil { - log.Debug("region does not have a peer on source store, maybe stat out of date", zap.Uint64("region-id", bs.srcPeerStat.ID())) + log.Debug("region does not have a peer on source store, maybe stat out of date", zap.Uint64("region-id", bs.cur.srcPeerStat.ID())) return nil } case transferLeader: - if region.GetLeader().GetStoreId() != bs.srcStoreID { - log.Debug("region leader is not on source store, maybe stat out of date", zap.Uint64("region-id", bs.srcPeerStat.ID())) + if region.GetLeader().GetStoreId() != bs.cur.srcStoreID { + log.Debug("region leader is not on source store, maybe stat out of date", zap.Uint64("region-id", bs.cur.srcPeerStat.ID())) return nil } default: @@ -572,7 +711,7 @@ func (bs *balanceSolver) getRegion() *core.RegionInfo { return region } -func (bs *balanceSolver) getDstCandidateIDs() map[uint64]struct{} { +func (bs *balanceSolver) filterDstStores() map[uint64]*storeLoadDetail { var ( filters []filter.Filter candidates []*core.StoreInfo @@ -582,18 +721,18 @@ func (bs *balanceSolver) getDstCandidateIDs() map[uint64]struct{} { case movePeer: var scoreGuard filter.Filter if bs.cluster.IsPlacementRulesEnabled() { - scoreGuard = filter.NewRuleFitFilter(bs.sche.GetName(), bs.cluster, bs.region, bs.srcStoreID) + scoreGuard = filter.NewRuleFitFilter(bs.sche.GetName(), bs.cluster, bs.cur.region, bs.cur.srcStoreID) } else { - srcStore := bs.cluster.GetStore(bs.srcStoreID) + srcStore := bs.cluster.GetStore(bs.cur.srcStoreID) if srcStore == nil { return nil } - scoreGuard = filter.NewDistinctScoreFilter(bs.sche.GetName(), bs.cluster.GetLocationLabels(), bs.cluster.GetRegionStores(bs.region), srcStore) + scoreGuard = filter.NewDistinctScoreFilter(bs.sche.GetName(), bs.cluster.GetLocationLabels(), bs.cluster.GetRegionStores(bs.cur.region), srcStore) } filters = []filter.Filter{ filter.StoreStateFilter{ActionScope: bs.sche.GetName(), MoveRegion: true}, - filter.NewExcludedFilter(bs.sche.GetName(), bs.region.GetStoreIds(), bs.region.GetStoreIds()), + filter.NewExcludedFilter(bs.sche.GetName(), bs.cur.region.GetStoreIds(), bs.cur.region.GetStoreIds()), filter.NewHealthFilter(bs.sche.GetName()), scoreGuard, } @@ -606,205 +745,285 @@ func (bs *balanceSolver) getDstCandidateIDs() map[uint64]struct{} { filter.NewHealthFilter(bs.sche.GetName()), } - candidates = bs.cluster.GetFollowerStores(bs.region) + candidates = bs.cluster.GetFollowerStores(bs.cur.region) default: return nil } - ret := make(map[uint64]struct{}, len(candidates)) + ret := make(map[uint64]*storeLoadDetail, len(candidates)) for _, store := range candidates { if !filter.Target(bs.cluster, store, filters) { - ret[store.GetID()] = struct{}{} + detail := bs.stLoadDetail[store.GetID()] + if detail.LoadPred.max().ByteRate*bs.sche.conf.GetDstToleranceRatio() < detail.LoadPred.Future.ExpByteRate && + detail.LoadPred.max().KeyRate*bs.sche.conf.GetDstToleranceRatio() < detail.LoadPred.Future.ExpKeyRate { + ret[store.GetID()] = bs.stLoadDetail[store.GetID()] + balanceHotRegionCounter.WithLabelValues("dst-store-succ", strconv.FormatUint(store.GetID(), 10)).Inc() + } + balanceHotRegionCounter.WithLabelValues("dst-store-fail", strconv.FormatUint(store.GetID(), 10)).Inc() + ret[store.GetID()] = bs.stLoadDetail[store.GetID()] } } return ret } -func (bs *balanceSolver) selectDstStoreID(candidateIDs map[uint64]struct{}) uint64 { - candidateLoadDetail := make(map[uint64]*storeLoadDetail, len(candidateIDs)) - for id := range candidateIDs { - candidateLoadDetail[id] = bs.stLoadDetail[id] - } - switch bs.opTy { - case movePeer: - return selectDstStoreByByteRate(candidateLoadDetail, bs.srcPeerStat.GetByteRate(), bs.stLoadDetail[bs.srcStoreID]) - case transferLeader: - if bs.rwTy == write { - return selectDstStoreByCount(candidateLoadDetail, bs.srcPeerStat.GetByteRate(), bs.stLoadDetail[bs.srcStoreID]) +// calcProgressiveRank calculates `bs.cur.progressiveRank`. +// See the comments of `solution.progressiveRank` for more about progressive rank. +func (bs *balanceSolver) calcProgressiveRank() { + srcLd := bs.stLoadDetail[bs.cur.srcStoreID].LoadPred.min() + dstLd := bs.stLoadDetail[bs.cur.dstStoreID].LoadPred.max() + peer := bs.cur.srcPeerStat + rank := int64(0) + if bs.rwTy == write && bs.opTy == transferLeader { + // In this condition, CPU usage is the matter. + // Only consider about key rate. + if srcLd.KeyRate >= dstLd.KeyRate+peer.GetKeyRate() { + rank = -1 + } + } else { + getSrcDecRate := func(a, b float64) float64 { + if a-b <= 0 { + return 1 + } + return a - b + } + keyDecRatio := (dstLd.KeyRate + peer.GetKeyRate()) / getSrcDecRate(srcLd.KeyRate, peer.GetKeyRate()) + keyHot := peer.GetKeyRate() >= bs.sche.conf.GetMinHotKeyRate() + byteDecRatio := (dstLd.ByteRate + peer.GetByteRate()) / getSrcDecRate(srcLd.ByteRate, peer.GetByteRate()) + byteHot := peer.GetByteRate() > bs.sche.conf.GetMinHotByteRate() + greatDecRatio, minorDecRatio := bs.sche.conf.GetGreatDecRatio(), bs.sche.conf.GetMinorGreatDecRatio() + switch { + case byteHot && byteDecRatio <= greatDecRatio && keyHot && keyDecRatio <= greatDecRatio: + // Both byte rate and key rate are balanced, the best choice. + rank = -3 + case byteDecRatio <= minorDecRatio && keyHot && keyDecRatio <= greatDecRatio: + // Byte rate is not worsened, key rate is balanced. + rank = -2 + case byteHot && byteDecRatio <= greatDecRatio: + // Byte rate is balanced, ignore the key rate. + rank = -1 } - return selectDstStoreByByteRate(candidateLoadDetail, bs.srcPeerStat.GetByteRate(), bs.stLoadDetail[bs.srcStoreID]) - default: - return 0 } + bs.cur.progressiveRank = rank } -func (bs *balanceSolver) isReadyToBuild() bool { - if bs.srcStoreID == 0 || bs.dstStoreID == 0 || - bs.srcPeerStat == nil || bs.region == nil { - return false +// betterThan checks if `bs.cur` is a better solution than `old`. +func (bs *balanceSolver) betterThan(old *solution) bool { + if old == nil { + return true } - if bs.srcStoreID != bs.srcPeerStat.StoreID || - bs.region.GetID() != bs.srcPeerStat.ID() { + + switch { + case bs.cur.progressiveRank < old.progressiveRank: + return true + case bs.cur.progressiveRank > old.progressiveRank: return false } - return true -} -func (bs *balanceSolver) buildOperators() []*operator.Operator { - if !bs.isReadyToBuild() { - return nil + if r := bs.compareSrcStore(bs.cur.srcStoreID, old.srcStoreID); r < 0 { + return true + } else if r > 0 { + return false } - var ( - op *operator.Operator - err error - ) - switch bs.opTy { - case movePeer: - srcPeer := bs.region.GetStorePeer(bs.srcStoreID) // checked in getRegionAndSrcPeer - dstPeer := &metapb.Peer{StoreId: bs.dstStoreID, IsLearner: srcPeer.IsLearner} - bs.sche.peerLimit = bs.sche.adjustBalanceLimit(bs.srcStoreID, bs.stLoadDetail) - op, err = operator.CreateMovePeerOperator("move-hot-"+bs.rwTy.String()+"-region", bs.cluster, bs.region, operator.OpHotRegion, bs.srcStoreID, dstPeer) - case transferLeader: - if bs.region.GetStoreVoter(bs.dstStoreID) == nil { - return nil - } - bs.sche.leaderLimit = bs.sche.adjustBalanceLimit(bs.srcStoreID, bs.stLoadDetail) - op, err = operator.CreateTransferLeaderOperator("transfer-hot-"+bs.rwTy.String()+"-leader", bs.cluster, bs.region, bs.srcStoreID, bs.dstStoreID, operator.OpHotRegion) + if r := bs.compareDstStore(bs.cur.dstStoreID, old.dstStoreID); r < 0 { + return true + } else if r > 0 { + return false } - if err != nil { - log.Debug("fail to create operator", zap.Error(err), zap.Stringer("opType", bs.opTy), zap.Stringer("rwType", bs.rwTy)) - schedulerCounter.WithLabelValues(bs.sche.GetName(), "create-operator-fail").Inc() - return nil - } + if bs.cur.srcPeerStat != old.srcPeerStat { + // compare region - op.SetPriorityLevel(core.HighPriority) - op.Counters = append(op.Counters, - schedulerCounter.WithLabelValues(bs.sche.GetName(), "new-operator"), - schedulerCounter.WithLabelValues(bs.sche.GetName(), bs.opTy.String())) + if bs.rwTy == write && bs.opTy == transferLeader { + switch { + case bs.cur.srcPeerStat.GetKeyRate() > old.srcPeerStat.GetKeyRate(): + return true + case bs.cur.srcPeerStat.GetKeyRate() < old.srcPeerStat.GetKeyRate(): + return false + } + } else { + byteRkCmp := rankCmp(bs.cur.srcPeerStat.GetByteRate(), old.srcPeerStat.GetByteRate(), stepRank(0, 100)) + keyRkCmp := rankCmp(bs.cur.srcPeerStat.GetKeyRate(), old.srcPeerStat.GetKeyRate(), stepRank(0, 10)) - infl := Influence{ByteRate: bs.srcPeerStat.GetByteRate()} - if bs.opTy == transferLeader && bs.rwTy == write { - infl.ByteRate = 0 + switch bs.cur.progressiveRank { + case -2: // greatDecRatio < byteDecRatio <= minorDecRatio && keyDecRatio <= greatDecRatio + if keyRkCmp != 0 { + return keyRkCmp > 0 + } + if byteRkCmp != 0 { + // prefer smaller byte rate, to reduce oscillation + return byteRkCmp < 0 + } + case -3: // byteDecRatio <= greatDecRatio && keyDecRatio <= greatDecRatio + if keyRkCmp != 0 { + return keyRkCmp > 0 + } + fallthrough + case -1: // byteDecRatio <= greatDecRatio + if byteRkCmp != 0 { + // prefer region with larger byte rate, to converge faster + return byteRkCmp > 0 + } + } + } } - bs.sche.addPendingInfluence(op, bs.srcStoreID, bs.dstStoreID, infl, bs.rwTy, bs.opTy) - - return []*operator.Operator{op} -} -// Sort stores according to their load prediction. -func sortStores(loadDetail map[uint64]*storeLoadDetail, better func(lp1, lp2 *storeLoadPred) bool) []uint64 { - ids := make([]uint64, 0, len(loadDetail)) - for id := range loadDetail { - ids = append(ids, id) - } - sort.Slice(ids, func(i, j int) bool { - id1, id2 := ids[i], ids[j] - return better(loadDetail[id1].LoadPred, loadDetail[id2].LoadPred) - }) - return ids + return false } -// Prefer store with larger `count`. -func selectSrcStoreByCount(loadDetail map[uint64]*storeLoadDetail) uint64 { - stores := sortStores(loadDetail, func(lp1, lp2 *storeLoadPred) bool { - ld1, ld2 := lp1.min(), lp2.min() - if ld1.Count > ld2.Count || - (ld1.Count == ld2.Count && ld1.ByteRate > ld2.ByteRate) { - return true +// smaller is better +func (bs *balanceSolver) compareSrcStore(st1, st2 uint64) int { + if st1 != st2 { + // compare source store + var lpCmp storeLPCmp + if bs.rwTy == write && bs.opTy == transferLeader { + lpCmp = sliceLPCmp( + minLPCmp(negLoadCmp(sliceLoadCmp( + stLdRankCmp(stLdKeyRate, stepRank(bs.maxSrc.KeyRate, bs.rankStep.KeyRate)), + stLdRankCmp(stLdByteRate, stepRank(bs.maxSrc.ByteRate, bs.rankStep.ByteRate)), + ))), + diffCmp(sliceLoadCmp( + stLdRankCmp(stLdCount, stepRank(0, bs.rankStep.Count)), + stLdRankCmp(stLdKeyRate, stepRank(0, bs.rankStep.KeyRate)), + stLdRankCmp(stLdByteRate, stepRank(0, bs.rankStep.ByteRate)), + )), + ) + } else { + lpCmp = sliceLPCmp( + minLPCmp(negLoadCmp(sliceLoadCmp( + stLdRankCmp(stLdByteRate, stepRank(bs.maxSrc.ByteRate, bs.rankStep.ByteRate)), + stLdRankCmp(stLdKeyRate, stepRank(bs.maxSrc.KeyRate, bs.rankStep.KeyRate)), + ))), + diffCmp( + stLdRankCmp(stLdByteRate, stepRank(0, bs.rankStep.ByteRate)), + ), + ) } - return false - }) - if len(stores) > 0 && loadDetail[stores[0]].LoadPred.Current.Count > 1 { - return stores[0] + lp1 := bs.stLoadDetail[st1].LoadPred + lp2 := bs.stLoadDetail[st2].LoadPred + return lpCmp(lp1, lp2) } return 0 } -// Prefer store with larger `byteRate`. -func selectSrcStoreByByteRate(loadDetail map[uint64]*storeLoadDetail) uint64 { - stores := sortStores(loadDetail, func(lp1, lp2 *storeLoadPred) bool { - ld1, ld2 := lp1.min(), lp2.min() - if ld1.ByteRate > ld2.ByteRate || - (ld1.ByteRate == ld2.ByteRate && ld1.Count > ld2.Count) { - return true +// smaller is better +func (bs *balanceSolver) compareDstStore(st1, st2 uint64) int { + if st1 != st2 { + // compare destination store + var lpCmp storeLPCmp + if bs.rwTy == write && bs.opTy == transferLeader { + lpCmp = sliceLPCmp( + maxLPCmp(sliceLoadCmp( + stLdRankCmp(stLdKeyRate, stepRank(bs.minDst.KeyRate, bs.rankStep.KeyRate)), + stLdRankCmp(stLdByteRate, stepRank(bs.minDst.ByteRate, bs.rankStep.ByteRate)), + )), + diffCmp(sliceLoadCmp( + stLdRankCmp(stLdCount, stepRank(0, bs.rankStep.Count)), + stLdRankCmp(stLdKeyRate, stepRank(0, bs.rankStep.KeyRate)), + stLdRankCmp(stLdByteRate, stepRank(0, bs.rankStep.ByteRate)), + ))) + } else { + lpCmp = sliceLPCmp( + maxLPCmp(sliceLoadCmp( + stLdRankCmp(stLdByteRate, stepRank(bs.minDst.ByteRate, bs.rankStep.ByteRate)), + stLdRankCmp(stLdKeyRate, stepRank(bs.minDst.KeyRate, bs.rankStep.KeyRate)), + )), + diffCmp( + stLdRankCmp(stLdByteRate, stepRank(0, bs.rankStep.ByteRate)), + ), + ) } - return false - }) - for _, id := range stores { - if loadDetail[id].LoadPred.Current.Count > 1 { - return id - } + lp1 := bs.stLoadDetail[st1].LoadPred + lp2 := bs.stLoadDetail[st2].LoadPred + return lpCmp(lp1, lp2) } return 0 } -// Prefer store with smaller `count`. -func selectDstStoreByCount(candidates map[uint64]*storeLoadDetail, regionBytesRate float64, srcLoadDetail *storeLoadDetail) uint64 { - stores := sortStores(candidates, func(lp1, lp2 *storeLoadPred) bool { - ld1, ld2 := lp1.max(), lp2.max() - if ld1.Count < ld2.Count || - (ld1.Count == ld2.Count && ld1.ByteRate < ld2.ByteRate) { - return true - } - return false - }) - - srcLoad := srcLoadDetail.LoadPred.min() - for _, id := range stores { - dstLoad := candidates[id].LoadPred.max() - if srcLoad.Count-1 >= dstLoad.Count+1 && - srcLoad.ByteRate*hotRegionScheduleFactor > dstLoad.ByteRate+regionBytesRate { - return id - } +func stepRank(rk0 float64, step float64) func(float64) int64 { + return func(rate float64) int64 { + return int64((rate - rk0) / step) } - return 0 } -// Prefer store with smaller `byteRate`. -func selectDstStoreByByteRate(candidates map[uint64]*storeLoadDetail, regionBytesRate float64, srcLoadDetail *storeLoadDetail) uint64 { - stores := sortStores(candidates, func(lp1, lp2 *storeLoadPred) bool { - ld1, ld2 := lp1.max(), lp2.max() - if ld1.ByteRate < ld2.ByteRate || - (ld1.ByteRate == ld2.ByteRate && ld1.Count < ld2.Count) { - return true - } +func (bs *balanceSolver) isReadyToBuild() bool { + if bs.cur.srcStoreID == 0 || bs.cur.dstStoreID == 0 || + bs.cur.srcPeerStat == nil || bs.cur.region == nil { return false - }) + } + if bs.cur.srcStoreID != bs.cur.srcPeerStat.StoreID || + bs.cur.region.GetID() != bs.cur.srcPeerStat.ID() { + return false + } + return true +} + +func (bs *balanceSolver) buildOperators() ([]*operator.Operator, []Influence) { + if !bs.isReadyToBuild() { + return nil, nil + } + var ( + op *operator.Operator + err error + ) - srcLoad := srcLoadDetail.LoadPred.min() - for _, id := range stores { - dstLoad := candidates[id].LoadPred.max() - if srcLoad.ByteRate*hotRegionScheduleFactor > dstLoad.ByteRate+regionBytesRate { - return id + switch bs.opTy { + case movePeer: + srcPeer := bs.cur.region.GetStorePeer(bs.cur.srcStoreID) // checked in getRegionAndSrcPeer + dstPeer := &metapb.Peer{StoreId: bs.cur.dstStoreID, IsLearner: srcPeer.IsLearner} + op, err = operator.CreateMovePeerOperator( + "move-hot-"+bs.rwTy.String()+"-region", + bs.cluster, + bs.cur.region, + operator.OpHotRegion, + bs.cur.srcStoreID, + dstPeer) + + op.Counters = append(op.Counters, balanceHotRegionCounter.WithLabelValues("move-peer", strconv.FormatUint(bs.cur.srcStoreID, 10)+"-out")) + op.Counters = append(op.Counters, balanceHotRegionCounter.WithLabelValues("move-peer", strconv.FormatUint(dstPeer.GetStoreId(), 10)+"-in")) + case transferLeader: + if bs.cur.region.GetStoreVoter(bs.cur.dstStoreID) == nil { + return nil, nil } + op, err = operator.CreateTransferLeaderOperator( + "transfer-hot-"+bs.rwTy.String()+"-leader", + bs.cluster, + bs.cur.region, + bs.cur.srcStoreID, + bs.cur.dstStoreID, + operator.OpHotRegion) + op.Counters = append(op.Counters, balanceHotRegionCounter.WithLabelValues("move-leader", strconv.FormatUint(bs.cur.srcStoreID, 10)+"-out")) + op.Counters = append(op.Counters, balanceHotRegionCounter.WithLabelValues("move-leader", strconv.FormatUint(bs.cur.dstStoreID, 10)+"-in")) } - return 0 -} -func (h *hotScheduler) adjustBalanceLimit(storeID uint64, loadDetail map[uint64]*storeLoadDetail) uint64 { - srcStoreStatistics := loadDetail[storeID] + if err != nil { + log.Debug("fail to create operator", zap.Error(err), zap.Stringer("rwType", bs.rwTy), zap.Stringer("opType", bs.opTy)) + schedulerCounter.WithLabelValues(bs.sche.GetName(), "create-operator-fail").Inc() + return nil, nil + } - var hotRegionTotalCount int - for _, m := range loadDetail { - hotRegionTotalCount += len(m.HotPeers) + op.SetPriorityLevel(core.HighPriority) + op.Counters = append(op.Counters, + schedulerCounter.WithLabelValues(bs.sche.GetName(), "new-operator"), + schedulerCounter.WithLabelValues(bs.sche.GetName(), bs.opTy.String())) + + infl := Influence{ + ByteRate: bs.cur.srcPeerStat.GetByteRate(), + KeyRate: bs.cur.srcPeerStat.GetKeyRate(), + Count: 1, } - avgRegionCount := float64(hotRegionTotalCount) / float64(len(loadDetail)) - // Multiplied by hotRegionLimitFactor to avoid transfer back and forth - limit := uint64((float64(len(srcStoreStatistics.HotPeers)) - avgRegionCount) * hotRegionLimitFactor) - return maxUint64(limit, 1) + return []*operator.Operator{op}, []Influence{infl} } func (h *hotScheduler) GetHotReadStatus() *statistics.StoreHotPeersInfos { h.RLock() defer h.RUnlock() - asLeader := make(statistics.StoreHotPeersStat, len(h.stLoadInfos.ReadLeaders)) - for id, detail := range h.stLoadInfos.ReadLeaders { + asLeader := make(statistics.StoreHotPeersStat, len(h.stLoadInfos[readLeader])) + for id, detail := range h.stLoadInfos[readLeader] { asLeader[id] = detail.toHotPeersStat() } return &statistics.StoreHotPeersInfos{ @@ -815,12 +1034,12 @@ func (h *hotScheduler) GetHotReadStatus() *statistics.StoreHotPeersInfos { func (h *hotScheduler) GetHotWriteStatus() *statistics.StoreHotPeersInfos { h.RLock() defer h.RUnlock() - asLeader := make(statistics.StoreHotPeersStat, len(h.stLoadInfos.WriteLeaders)) - asPeer := make(statistics.StoreHotPeersStat, len(h.stLoadInfos.WritePeers)) - for id, detail := range h.stLoadInfos.WriteLeaders { + asLeader := make(statistics.StoreHotPeersStat, len(h.stLoadInfos[writeLeader])) + asPeer := make(statistics.StoreHotPeersStat, len(h.stLoadInfos[writePeer])) + for id, detail := range h.stLoadInfos[writeLeader] { asLeader[id] = detail.toHotPeersStat() } - for id, detail := range h.stLoadInfos.WritePeers { + for id, detail := range h.stLoadInfos[writePeer] { asPeer[id] = detail.toHotPeersStat() } return &statistics.StoreHotPeersInfos{ @@ -830,22 +1049,17 @@ func (h *hotScheduler) GetHotWriteStatus() *statistics.StoreHotPeersInfos { } func (h *hotScheduler) GetWritePendingInfluence() map[uint64]Influence { - return h.copyPendingInfluence(write) + return h.copyPendingInfluence(writePeer) } func (h *hotScheduler) GetReadPendingInfluence() map[uint64]Influence { - return h.copyPendingInfluence(read) + return h.copyPendingInfluence(readLeader) } -func (h *hotScheduler) copyPendingInfluence(typ rwType) map[uint64]Influence { +func (h *hotScheduler) copyPendingInfluence(ty resourceType) map[uint64]Influence { h.RLock() defer h.RUnlock() - var pendingSum map[uint64]Influence - if typ == read { - pendingSum = h.readPendingSum - } else { - pendingSum = h.writePendingSum - } + pendingSum := h.pendingSums[ty] ret := make(map[uint64]Influence, len(pendingSum)) for id, infl := range pendingSum { ret[id] = infl @@ -853,7 +1067,7 @@ func (h *hotScheduler) copyPendingInfluence(typ rwType) map[uint64]Influence { return ret } -func calcPendingWeight(op *operator.Operator) float64 { +func (h *hotScheduler) calcPendingWeight(op *operator.Operator) float64 { if op.CheckExpired() || op.CheckTimeout() { return 0 } @@ -864,6 +1078,7 @@ func calcPendingWeight(op *operator.Operator) float64 { switch status { case operator.SUCCESS: zombieDur := time.Since(op.GetReachTimeOf(status)) + maxZombieDur := h.conf.GetMaxZombieDuration() if zombieDur >= maxZombieDur { return 0 } @@ -875,13 +1090,21 @@ func calcPendingWeight(op *operator.Operator) float64 { } func (h *hotScheduler) clearPendingInfluence() { - h.readPendings = map[*pendingInfluence]struct{}{} - h.writePendings = map[*pendingInfluence]struct{}{} - h.readPendingSum = nil - h.writePendingSum = nil + for ty := resourceType(0); ty < resourceTypeLen; ty++ { + h.pendings[ty] = map[*pendingInfluence]struct{}{} + h.pendingSums[ty] = nil + } h.regionPendings = make(map[uint64][2]*operator.Operator) } +// rwType : the perspective of balance +type rwType int + +const ( + write rwType = iota + read +) + func (rw rwType) String() string { switch rw { case read: @@ -893,6 +1116,13 @@ func (rw rwType) String() string { } } +type opType int + +const ( + movePeer opType = iota + transferLeader +) + func (ty opType) String() string { switch ty { case movePeer: @@ -903,3 +1133,27 @@ func (ty opType) String() string { return "" } } + +type resourceType int + +const ( + writePeer resourceType = iota + writeLeader + readLeader + resourceTypeLen +) + +func toResourceType(rwTy rwType, opTy opType) resourceType { + switch rwTy { + case write: + switch opTy { + case movePeer: + return writePeer + case transferLeader: + return writeLeader + } + case read: + return readLeader + } + panic(fmt.Sprintf("invalid arguments for toResourceType: rwTy = %v, opTy = %v", rwTy, opTy)) +} diff --git a/server/schedulers/hot_region_config.go b/server/schedulers/hot_region_config.go new file mode 100644 index 00000000000..05c5b08bb3d --- /dev/null +++ b/server/schedulers/hot_region_config.go @@ -0,0 +1,218 @@ +// Copyright 2020 PingCAP, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// See the License for the specific language governing permissions and +// limitations under the License. + +package schedulers + +import ( + "bytes" + "encoding/json" + "io/ioutil" + "net/http" + "reflect" + "strings" + "sync" + "time" + + "github.com/gorilla/mux" + "github.com/pingcap/pd/v3/server/core" + "github.com/pingcap/pd/v3/server/schedule" + "github.com/pingcap/pd/v3/server/statistics" + "github.com/unrolled/render" +) + +// params about hot region. +func initHotRegionScheduleConfig() *hotRegionSchedulerConfig { + return &hotRegionSchedulerConfig{ + MinHotByteRate: 100, + MinHotKeyRate: 10, + MaxZombieRounds: 3, + ByteRateRankStepRatio: 0.05, + KeyRateRankStepRatio: 0.05, + CountRankStepRatio: 0.01, + GreatDecRatio: 0.95, + MinorDecRatio: 0.99, + MaxPeerNum: 1000, + SrcToleranceRatio: 1.02, // Tolerate 2% difference + DstToleranceRatio: 1.02, // Tolerate 2% difference + } +} + +type hotRegionSchedulerConfig struct { + sync.RWMutex + storage *core.Storage + + MinHotByteRate float64 `json:"min-hot-byte-rate"` + MinHotKeyRate float64 `json:"min-hot-key-rate"` + MaxZombieRounds int `json:"max-zombie-rounds"` + MaxPeerNum int `json:"max-peer-number"` + + // rank step ratio decide the step when calculate rank + // step = max current * rank step ratio + ByteRateRankStepRatio float64 `json:"byte-rate-rank-step-ratio"` + KeyRateRankStepRatio float64 `json:"key-rate-rank-step-ratio"` + CountRankStepRatio float64 `json:"count-rank-step-ratio"` + GreatDecRatio float64 `json:"great-dec-ratio"` + MinorDecRatio float64 `json:"minor-dec-ratio"` + SrcToleranceRatio float64 `json:"src-tolerance-ratio"` + DstToleranceRatio float64 `json:"dst-tolerance-ratio"` +} + +func (conf *hotRegionSchedulerConfig) EncodeConfig() ([]byte, error) { + conf.RLock() + defer conf.RUnlock() + return schedule.EncodeConfig(conf) +} + +func (conf *hotRegionSchedulerConfig) GetMaxZombieDuration() time.Duration { + conf.RLock() + defer conf.RUnlock() + return time.Duration(conf.MaxZombieRounds) * statistics.StoreHeartBeatReportInterval * time.Second +} + +func (conf *hotRegionSchedulerConfig) GetMaxPeerNumber() int { + conf.RLock() + defer conf.RUnlock() + return conf.MaxPeerNum +} + +func (conf *hotRegionSchedulerConfig) GetSrcToleranceRatio() float64 { + conf.RLock() + defer conf.RUnlock() + return conf.SrcToleranceRatio +} + +func (conf *hotRegionSchedulerConfig) SetSrcToleranceRatio(tol float64) { + conf.Lock() + defer conf.Unlock() + conf.SrcToleranceRatio = tol +} + +func (conf *hotRegionSchedulerConfig) GetDstToleranceRatio() float64 { + conf.RLock() + defer conf.RUnlock() + return conf.DstToleranceRatio +} + +func (conf *hotRegionSchedulerConfig) SetDstToleranceRatio(tol float64) { + conf.Lock() + defer conf.Unlock() + conf.DstToleranceRatio = tol +} + +func (conf *hotRegionSchedulerConfig) GetByteRankStepRatio() float64 { + conf.RLock() + defer conf.RUnlock() + return conf.ByteRateRankStepRatio +} + +func (conf *hotRegionSchedulerConfig) GetKeyRankStepRatio() float64 { + conf.RLock() + defer conf.RUnlock() + return conf.KeyRateRankStepRatio +} + +func (conf *hotRegionSchedulerConfig) GetCountRankStepRatio() float64 { + conf.RLock() + defer conf.RUnlock() + return conf.CountRankStepRatio +} + +func (conf *hotRegionSchedulerConfig) GetGreatDecRatio() float64 { + conf.RLock() + defer conf.RUnlock() + return conf.GreatDecRatio +} + +func (conf *hotRegionSchedulerConfig) GetMinorGreatDecRatio() float64 { + conf.RLock() + defer conf.RUnlock() + return conf.MinorDecRatio +} + +func (conf *hotRegionSchedulerConfig) GetMinHotKeyRate() float64 { + conf.RLock() + defer conf.RUnlock() + return conf.MinHotKeyRate +} + +func (conf *hotRegionSchedulerConfig) GetMinHotByteRate() float64 { + conf.RLock() + defer conf.RUnlock() + return conf.MinHotByteRate +} + +func (conf *hotRegionSchedulerConfig) ServeHTTP(w http.ResponseWriter, r *http.Request) { + router := mux.NewRouter() + router.HandleFunc("/list", conf.handleGetConfig).Methods("GET") + router.HandleFunc("/config", conf.handleSetConfig).Methods("POST") + router.ServeHTTP(w, r) +} + +func (conf *hotRegionSchedulerConfig) handleGetConfig(w http.ResponseWriter, r *http.Request) { + conf.RLock() + defer conf.RUnlock() + rd := render.New(render.Options{IndentJSON: true}) + rd.JSON(w, http.StatusOK, conf) +} + +func (conf *hotRegionSchedulerConfig) handleSetConfig(w http.ResponseWriter, r *http.Request) { + conf.Lock() + defer conf.Unlock() + rd := render.New(render.Options{IndentJSON: true}) + oldc, _ := json.Marshal(conf) + data, err := ioutil.ReadAll(r.Body) + r.Body.Close() + if err != nil { + rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + + if err := json.Unmarshal(data, conf); err != nil { + rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + newc, _ := json.Marshal(conf) + if !bytes.Equal(oldc, newc) { + conf.persist() + rd.Text(w, http.StatusOK, "success") + } + + m := make(map[string]interface{}) + if err := json.Unmarshal(data, &m); err != nil { + rd.JSON(w, http.StatusInternalServerError, err.Error()) + return + } + t := reflect.TypeOf(conf).Elem() + for i := 0; i < t.NumField(); i++ { + jsonTag := t.Field(i).Tag.Get("json") + if i := strings.Index(jsonTag, ","); i != -1 { // trim 'foobar,string' to 'foobar' + jsonTag = jsonTag[:i] + } + if _, ok := m[jsonTag]; ok { + rd.Text(w, http.StatusOK, "no changed") + return + } + } + + rd.Text(w, http.StatusBadRequest, "config item not found") +} + +func (conf *hotRegionSchedulerConfig) persist() error { + data, err := schedule.EncodeConfig(conf) + if err != nil { + return err + + } + return conf.storage.SaveScheduleConfig(HotRegionName, data) + +} diff --git a/server/schedulers/hot_test.go b/server/schedulers/hot_test.go index 8de99422a72..8624255a2b3 100644 --- a/server/schedulers/hot_test.go +++ b/server/schedulers/hot_test.go @@ -29,6 +29,10 @@ import ( "github.com/pingcap/pd/v3/server/statistics" ) +func init() { + schedulePeerPr = 1.0 +} + var _ = Suite(&testHotWriteRegionSchedulerSuite{}) var _ = Suite(&testHotSchedulerSuite{}) @@ -40,7 +44,7 @@ func (s *testHotSchedulerSuite) TestGCPendingOpInfos(c *C) { opt := mockoption.NewScheduleOptions() newTestReplication(opt, 3, "zone", "host") tc := mockcluster.NewCluster(opt) - sche, err := schedule.CreateScheduler(HotRegionType, schedule.NewOperatorController(ctx, nil, nil), core.NewStorage(kv.NewMemoryKV()), nil) + sche, err := schedule.CreateScheduler(HotRegionType, schedule.NewOperatorController(ctx, tc, nil), core.NewStorage(kv.NewMemoryKV()), schedule.ConfigJSONDecoder([]byte("null"))) c.Assert(err, IsNil) hb := sche.(*hotScheduler) @@ -67,7 +71,7 @@ func (s *testHotSchedulerSuite) TestGCPendingOpInfos(c *C) { } shouldRemoveOp := func(region *core.RegionInfo, ty opType) *operator.Operator { op := doneOp(region, ty) - operator.SetOperatorStatusReachTime(op, operator.CREATED, time.Now().Add(-minRegionScheduleInterval)) + operator.SetOperatorStatusReachTime(op, operator.CREATED, time.Now().Add(-3*statistics.StoreHeartBeatReportInterval*time.Second)) return op } opCreaters := [4]func(region *core.RegionInfo, ty opType) *operator.Operator{nilOp, shouldRemoveOp, notDoneOp, doneOp} @@ -114,7 +118,7 @@ func newTestRegion(id uint64) *core.RegionInfo { type testHotWriteRegionSchedulerSuite struct{} -func (s *testHotWriteRegionSchedulerSuite) TestSchedule(c *C) { +func (s *testHotWriteRegionSchedulerSuite) TestByteRateOnly(c *C) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() statistics.Denoising = false @@ -125,12 +129,12 @@ func (s *testHotWriteRegionSchedulerSuite) TestSchedule(c *C) { c.Assert(err, IsNil) opt.HotRegionCacheHitsThreshold = 0 - s.checkSchedule(c, tc, opt, hb) + s.checkByteRateOnly(c, tc, opt, hb) opt.EnablePlacementRules = true - s.checkSchedule(c, tc, opt, hb) + s.checkByteRateOnly(c, tc, opt, hb) } -func (s *testHotWriteRegionSchedulerSuite) checkSchedule(c *C, tc *mockcluster.Cluster, opt *mockoption.ScheduleOptions, hb schedule.Scheduler) { +func (s *testHotWriteRegionSchedulerSuite) checkByteRateOnly(c *C, tc *mockcluster.Cluster, opt *mockoption.ScheduleOptions, hb schedule.Scheduler) { // Add stores 1, 2, 3, 4, 5, 6 with region counts 3, 2, 2, 2, 0, 0. tc.AddLabelsStore(1, 3, map[string]string{"zone": "z1", "host": "h1"}) @@ -163,9 +167,11 @@ func (s *testHotWriteRegionSchedulerSuite) checkSchedule(c *C, tc *mockcluster.C //| 2 | 1 | 3 | 4 | 512KB | //| 3 | 1 | 2 | 4 | 512KB | // Region 1, 2 and 3 are hot regions. - tc.AddLeaderRegionWithWriteInfo(1, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithWriteInfo(2, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 3, 4) - tc.AddLeaderRegionWithWriteInfo(3, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 4) + addRegionInfo(tc, write, []testRegionInfo{ + {1, []uint64{1, 2, 3}, 512 * KB, 0}, + {2, []uint64{1, 3, 4}, 512 * KB, 0}, + {3, []uint64{1, 2, 4}, 512 * KB, 0}, + }) // Will transfer a hot region from store 1, because the total count of peers // which is hot for store 1 is more larger than other stores. @@ -242,11 +248,13 @@ func (s *testHotWriteRegionSchedulerSuite) checkSchedule(c *C, tc *mockcluster.C //| 3 | 6 | 1 | 4 | 512KB | //| 4 | 5 | 6 | 4 | 512KB | //| 5 | 3 | 4 | 5 | 512KB | - tc.AddLeaderRegionWithWriteInfo(1, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithWriteInfo(2, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithWriteInfo(3, 6, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 1, 4) - tc.AddLeaderRegionWithWriteInfo(4, 5, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 6, 4) - tc.AddLeaderRegionWithWriteInfo(5, 3, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 4, 5) + addRegionInfo(tc, write, []testRegionInfo{ + {1, []uint64{1, 2, 3}, 512 * KB, 0}, + {2, []uint64{1, 2, 3}, 512 * KB, 0}, + {3, []uint64{6, 1, 4}, 512 * KB, 0}, + {4, []uint64{5, 6, 4}, 512 * KB, 0}, + {5, []uint64{3, 4, 5}, 512 * KB, 0}, + }) // 6 possible operator. // Assuming different operators have the same possibility, @@ -287,81 +295,180 @@ func (s *testHotWriteRegionSchedulerSuite) checkSchedule(c *C, tc *mockcluster.C hb.(*hotScheduler).clearPendingInfluence() } -func (s *testHotWriteRegionSchedulerSuite) TestWithPendingInfluence(c *C) { +func (s *testHotWriteRegionSchedulerSuite) TestWithKeyRate(c *C) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() statistics.Denoising = false opt := mockoption.NewScheduleOptions() - tc := mockcluster.NewCluster(opt) hb, err := schedule.CreateScheduler(HotWriteRegionType, schedule.NewOperatorController(ctx, nil, nil), core.NewStorage(kv.NewMemoryKV()), nil) c.Assert(err, IsNil) + hb.(*hotScheduler).conf.SetDstToleranceRatio(1) + hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) opt.HotRegionCacheHitsThreshold = 0 - opt.LeaderScheduleLimit = 0 + tc := mockcluster.NewCluster(opt) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) tc.AddRegionStore(3, 20) tc.AddRegionStore(4, 20) + tc.AddRegionStore(5, 20) - //| store_id | write_bytes_rate | - //|----------|------------------| - //| 1 | 8MB | - //| 2 | 6MB | - //| 3 | 6MB | - //| 4 | 4MB | - tc.UpdateStorageWrittenBytes(1, 8*MB*statistics.StoreHeartBeatReportInterval) - tc.UpdateStorageWrittenBytes(2, 6*MB*statistics.StoreHeartBeatReportInterval) - tc.UpdateStorageWrittenBytes(3, 6*MB*statistics.StoreHeartBeatReportInterval) - tc.UpdateStorageWrittenBytes(4, 4*MB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenStats(1, 10.5*MB*statistics.StoreHeartBeatReportInterval, 10.5*MB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenStats(2, 9.5*MB*statistics.StoreHeartBeatReportInterval, 9.5*MB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenStats(3, 9.5*MB*statistics.StoreHeartBeatReportInterval, 9.8*MB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenStats(4, 9*MB*statistics.StoreHeartBeatReportInterval, 9*MB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenStats(5, 8.9*MB*statistics.StoreHeartBeatReportInterval, 9.2*MB*statistics.StoreHeartBeatReportInterval) - //| region_id | leader_store | follower_store | follower_store | written_bytes | - //|-----------|--------------|----------------|----------------|---------------| - //| 1 | 1 | 2 | 3 | 512KB | - //| 2 | 1 | 2 | 3 | 512KB | - //| 3 | 1 | 2 | 3 | 512KB | - //| 4 | 1 | 2 | 3 | 512KB | - //| 5 | 1 | 2 | 3 | 512KB | - //| 6 | 1 | 2 | 3 | 512KB | - // All regions are hot. - tc.AddLeaderRegionWithWriteInfo(1, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithWriteInfo(2, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithWriteInfo(3, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithWriteInfo(4, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithWriteInfo(5, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithWriteInfo(6, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) + addRegionInfo(tc, write, []testRegionInfo{ + {1, []uint64{2, 1, 3}, 0.5 * MB, 0.5 * MB}, + {2, []uint64{2, 1, 3}, 0.5 * MB, 0.5 * MB}, + {3, []uint64{2, 4, 3}, 0.05 * MB, 0.1 * MB}, + }) - for i := 0; i < 20; i++ { + for i := 0; i < 100; i++ { hb.(*hotScheduler).clearPendingInfluence() - cnt := 0 - testLoop: - for j := 0; j < 1000; j++ { - c.Assert(cnt, LessEqual, 5) - emptyCnt := 0 - ops := hb.Schedule(tc) - for len(ops) == 0 { - emptyCnt++ - if emptyCnt >= 10 { - break testLoop + op := hb.Schedule(tc)[0] + // byteDecRatio <= 0.95 && keyDecRatio <= 0.95 + testutil.CheckTransferPeer(c, op, operator.OpHotRegion, 1, 4) + // store byte rate (min, max): (10, 10.5) | 9.5 | 9.5 | (9, 9.5) | 8.9 + // store key rate (min, max): (10, 10.5) | 9.5 | 9.8 | (9, 9.5) | 9.2 + + op = hb.Schedule(tc)[0] + // byteDecRatio <= 0.99 && keyDecRatio <= 0.95 + testutil.CheckTransferPeer(c, op, operator.OpHotRegion, 3, 5) + // store byte rate (min, max): (10, 10.5) | 9.5 | (9.45, 9.5) | (9, 9.5) | (8.9, 8.95) + // store key rate (min, max): (10, 10.5) | 9.5 | (9.7, 9.8) | (9, 9.5) | (9.2, 9.3) + + // byteDecRatio <= 0.95 + // op = hb.Schedule(tc)[0] + // FIXME: cover this case + // testutil.CheckTransferPeerWithLeaderTransfer(c, op, operator.OpHotRegion, 1, 5) + // store byte rate (min, max): (9.5, 10.5) | 9.5 | (9.45, 9.5) | (9, 9.5) | (8.9, 9.45) + // store key rate (min, max): (9.2, 10.2) | 9.5 | (9.7, 9.8) | (9, 9.5) | (9.2, 9.8) + } +} + +func (s *testHotWriteRegionSchedulerSuite) TestLeader(c *C) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + statistics.Denoising = false + opt := mockoption.NewScheduleOptions() + hb, err := schedule.CreateScheduler(HotWriteRegionType, schedule.NewOperatorController(ctx, nil, nil), core.NewStorage(kv.NewMemoryKV()), nil) + c.Assert(err, IsNil) + opt.HotRegionCacheHitsThreshold = 0 + + tc := mockcluster.NewCluster(opt) + tc.AddRegionStore(1, 20) + tc.AddRegionStore(2, 20) + tc.AddRegionStore(3, 20) + + tc.UpdateStorageWrittenBytes(1, 10*MB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenBytes(2, 10*MB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenBytes(3, 10*MB*statistics.StoreHeartBeatReportInterval) + + tc.UpdateStorageWrittenKeys(1, 10*MB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenKeys(2, 10*MB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageWrittenKeys(3, 10*MB*statistics.StoreHeartBeatReportInterval) + + addRegionInfo(tc, write, []testRegionInfo{ + {1, []uint64{1, 2, 3}, 0.5 * MB, 1 * MB}, + {2, []uint64{1, 2, 3}, 0.5 * MB, 1 * MB}, + {3, []uint64{2, 1, 3}, 0.5 * MB, 1 * MB}, + {4, []uint64{2, 1, 3}, 0.5 * MB, 1 * MB}, + {5, []uint64{2, 1, 3}, 0.5 * MB, 1 * MB}, + {6, []uint64{3, 1, 2}, 0.5 * MB, 1 * MB}, + {7, []uint64{3, 1, 2}, 0.5 * MB, 1 * MB}, + }) + + for i := 0; i < 100; i++ { + hb.(*hotScheduler).clearPendingInfluence() + op := hb.Schedule(tc)[0] + testutil.CheckTransferLeaderFrom(c, op, operator.OpHotRegion, 2) + + c.Assert(hb.Schedule(tc), HasLen, 0) + } +} + +func (s *testHotWriteRegionSchedulerSuite) TestWithPendingInfluence(c *C) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + statistics.Denoising = false + opt := mockoption.NewScheduleOptions() + hb, err := schedule.CreateScheduler(HotWriteRegionType, schedule.NewOperatorController(ctx, nil, nil), core.NewStorage(kv.NewMemoryKV()), nil) + c.Assert(err, IsNil) + opt.HotRegionCacheHitsThreshold = 0 + opt.LeaderScheduleLimit = 0 + for i := 0; i < 2; i++ { + // 0: byte rate + // 1: key rate + tc := mockcluster.NewCluster(opt) + tc.AddRegionStore(1, 20) + tc.AddRegionStore(2, 20) + tc.AddRegionStore(3, 20) + tc.AddRegionStore(4, 20) + + updateStore := tc.UpdateStorageWrittenBytes // byte rate + if i == 1 { // key rate + updateStore = tc.UpdateStorageWrittenKeys + } + updateStore(1, 8*MB*statistics.StoreHeartBeatReportInterval) + updateStore(2, 6*MB*statistics.StoreHeartBeatReportInterval) + updateStore(3, 6*MB*statistics.StoreHeartBeatReportInterval) + updateStore(4, 4*MB*statistics.StoreHeartBeatReportInterval) + + if i == 0 { // byte rate + addRegionInfo(tc, write, []testRegionInfo{ + {1, []uint64{1, 2, 3}, 512 * KB, 0}, + {2, []uint64{1, 2, 3}, 512 * KB, 0}, + {3, []uint64{1, 2, 3}, 512 * KB, 0}, + {4, []uint64{1, 2, 3}, 512 * KB, 0}, + {5, []uint64{1, 2, 3}, 512 * KB, 0}, + {6, []uint64{1, 2, 3}, 512 * KB, 0}, + }) + } else if i == 1 { // key rate + addRegionInfo(tc, write, []testRegionInfo{ + {1, []uint64{1, 2, 3}, 0, 512 * KB}, + {2, []uint64{1, 2, 3}, 0, 512 * KB}, + {3, []uint64{1, 2, 3}, 0, 512 * KB}, + {4, []uint64{1, 2, 3}, 0, 512 * KB}, + {5, []uint64{1, 2, 3}, 0, 512 * KB}, + {6, []uint64{1, 2, 3}, 0, 512 * KB}, + }) + } + + for i := 0; i < 20; i++ { + hb.(*hotScheduler).clearPendingInfluence() + cnt := 0 + testLoop: + for j := 0; j < 1000; j++ { + c.Assert(cnt, LessEqual, 5) + emptyCnt := 0 + ops := hb.Schedule(tc) + for len(ops) == 0 { + emptyCnt++ + if emptyCnt >= 10 { + break testLoop + } + ops = hb.Schedule(tc) } - ops = hb.Schedule(tc) - } - op := ops[0] - switch op.Len() { - case 1: - // balance by leader selected - testutil.CheckTransferLeaderFrom(c, op, operator.OpHotRegion, 1) - case 4: - // balance by peer selected - testutil.CheckTransferPeerWithLeaderTransfer(c, op, operator.OpHotRegion, 1, 4) - cnt++ - if cnt == 3 { - c.Assert(op.Cancel(), IsTrue) + op := ops[0] + switch op.Len() { + case 1: + // balance by leader selected + testutil.CheckTransferLeaderFrom(c, op, operator.OpHotRegion, 1) + case 4: + // balance by peer selected + testutil.CheckTransferPeerWithLeaderTransfer(c, op, operator.OpHotRegion, 1, 4) + cnt++ + if cnt == 3 { + c.Assert(op.Cancel(), IsTrue) + } + default: + c.Fatalf("wrong op: %v", op) } - default: - c.Fatalf("wrong op: %v", op) } + c.Assert(cnt, Equals, 4) } - c.Assert(cnt, Equals, 5) } } @@ -369,7 +476,7 @@ var _ = Suite(&testHotReadRegionSchedulerSuite{}) type testHotReadRegionSchedulerSuite struct{} -func (s *testHotReadRegionSchedulerSuite) TestSchedule(c *C) { +func (s *testHotReadRegionSchedulerSuite) TestByteRateOnly(c *C) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() opt := mockoption.NewScheduleOptions() @@ -388,12 +495,12 @@ func (s *testHotReadRegionSchedulerSuite) TestSchedule(c *C) { //| store_id | read_bytes_rate | //|----------|-----------------| //| 1 | 7.5MB | - //| 2 | 4.6MB | + //| 2 | 4.9MB | //| 3 | 4.5MB | //| 4 | 6MB | //| 5 | 0MB | tc.UpdateStorageReadBytes(1, 7.5*MB*statistics.StoreHeartBeatReportInterval) - tc.UpdateStorageReadBytes(2, 4.6*MB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageReadBytes(2, 4.9*MB*statistics.StoreHeartBeatReportInterval) tc.UpdateStorageReadBytes(3, 4.5*MB*statistics.StoreHeartBeatReportInterval) tc.UpdateStorageReadBytes(4, 6*MB*statistics.StoreHeartBeatReportInterval) tc.UpdateStorageReadBytes(5, 0) @@ -403,13 +510,14 @@ func (s *testHotReadRegionSchedulerSuite) TestSchedule(c *C) { //| 1 | 1 | 2 | 3 | 512KB | //| 2 | 2 | 1 | 3 | 512KB | //| 3 | 1 | 2 | 3 | 512KB | - //| 11 | 1 | 2 | 3 | 24KB | + //| 11 | 1 | 2 | 3 | 7KB | // Region 1, 2 and 3 are hot regions. - tc.AddLeaderRegionWithReadInfo(1, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithReadInfo(2, 2, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 1, 3) - tc.AddLeaderRegionWithReadInfo(3, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - // lower than hot read flow rate, but higher than write flow rate - tc.AddLeaderRegionWithReadInfo(11, 1, 7*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) + addRegionInfo(tc, read, []testRegionInfo{ + {1, []uint64{1, 2, 3}, 512 * KB, 0}, + {2, []uint64{2, 1, 3}, 512 * KB, 0}, + {3, []uint64{1, 2, 3}, 512 * KB, 0}, + {11, []uint64{1, 2, 3}, 7 * KB, 0}, + }) c.Assert(tc.IsRegionHot(tc.GetRegion(1)), IsTrue) c.Assert(tc.IsRegionHot(tc.GetRegion(11)), IsFalse) @@ -426,14 +534,10 @@ func (s *testHotReadRegionSchedulerSuite) TestSchedule(c *C) { } } - // Will transfer a hot region leader from store 1 to store 3. - // bytes_rate[store 1] * 0.9 > bytes_rate[store 3] + region_bytes_rate - // read_bytes_rate[store 3] < read_bytes_rate[store 2] - // when select dest store for hot read, we use score. testutil.CheckTransferLeader(c, hb.Schedule(tc)[0], operator.OpHotRegion, 1, 3) hb.(*hotScheduler).clearPendingInfluence() // assume handle the operator - tc.AddLeaderRegionWithReadInfo(3, 3, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 1, 2) + tc.AddLeaderRegionWithReadInfo(3, 3, 512*KB*statistics.RegionHeartBeatReportInterval, 0, statistics.RegionHeartBeatReportInterval, []uint64{1, 2}) // After transfer a hot region leader from store 1 to store 3 // the three region leader will be evenly distributed in three stores @@ -441,13 +545,13 @@ func (s *testHotReadRegionSchedulerSuite) TestSchedule(c *C) { //|----------|-----------------| //| 1 | 6MB | //| 2 | 5.5MB | - //| 3 | 6MB | - //| 4 | 3.1MB | + //| 3 | 5.5MB | + //| 4 | 3.4MB | //| 5 | 3MB | tc.UpdateStorageReadBytes(1, 6*MB*statistics.StoreHeartBeatReportInterval) tc.UpdateStorageReadBytes(2, 5.5*MB*statistics.StoreHeartBeatReportInterval) - tc.UpdateStorageReadBytes(3, 6*MB*statistics.StoreHeartBeatReportInterval) - tc.UpdateStorageReadBytes(4, 3.1*MB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageReadBytes(3, 5.5*MB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageReadBytes(4, 3.4*MB*statistics.StoreHeartBeatReportInterval) tc.UpdateStorageReadBytes(5, 3*MB*statistics.StoreHeartBeatReportInterval) //| region_id | leader_store | follower_store | follower_store | read_bytes_rate | @@ -458,15 +562,12 @@ func (s *testHotReadRegionSchedulerSuite) TestSchedule(c *C) { //| 4 | 1 | 2 | 3 | 512KB | //| 5 | 4 | 2 | 5 | 512KB | //| 11 | 1 | 2 | 3 | 24KB | - tc.AddLeaderRegionWithReadInfo(4, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithReadInfo(5, 4, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 5) + addRegionInfo(tc, read, []testRegionInfo{ + {4, []uint64{1, 2, 3}, 512 * KB, 0}, + {5, []uint64{4, 2, 5}, 512 * KB, 0}, + }) // We will move leader peer of region 1 from 1 to 5 - // Store 1 will be selected as source store (max rate, count > store 3 count). - // When trying to transfer leader: - // Store 2 and store 3 are also hot, failed. - // Trying to move leader peer: - // Store 5 is selected as destination because of less hot region count. testutil.CheckTransferPeerWithLeaderTransfer(c, hb.Schedule(tc)[0], operator.OpHotRegion, 1, 5) hb.(*hotScheduler).clearPendingInfluence() @@ -478,73 +579,154 @@ func (s *testHotReadRegionSchedulerSuite) TestSchedule(c *C) { hb.(*hotScheduler).clearPendingInfluence() } -func (s *testHotReadRegionSchedulerSuite) TestWithPendingInfluence(c *C) { +func (s *testHotReadRegionSchedulerSuite) TestWithKeyRate(c *C) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() + statistics.Denoising = false opt := mockoption.NewScheduleOptions() - tc := mockcluster.NewCluster(opt) hb, err := schedule.CreateScheduler(HotReadRegionType, schedule.NewOperatorController(ctx, nil, nil), core.NewStorage(kv.NewMemoryKV()), nil) c.Assert(err, IsNil) + hb.(*hotScheduler).conf.SetSrcToleranceRatio(1) + hb.(*hotScheduler).conf.SetDstToleranceRatio(1) opt.HotRegionCacheHitsThreshold = 0 + tc := mockcluster.NewCluster(opt) tc.AddRegionStore(1, 20) tc.AddRegionStore(2, 20) tc.AddRegionStore(3, 20) tc.AddRegionStore(4, 20) + tc.AddRegionStore(5, 20) - //| store_id | write_bytes_rate | - //|----------|------------------| - //| 1 | 7.1MB | - //| 2 | 6.1MB | - //| 3 | 6MB | - //| 4 | 5MB | - tc.UpdateStorageReadBytes(1, 7.1*MB*statistics.StoreHeartBeatReportInterval) - tc.UpdateStorageReadBytes(2, 6.1*MB*statistics.StoreHeartBeatReportInterval) - tc.UpdateStorageReadBytes(3, 6*MB*statistics.StoreHeartBeatReportInterval) - tc.UpdateStorageReadBytes(4, 5*MB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageReadStats(1, 10.5*MB*statistics.StoreHeartBeatReportInterval, 10.5*MB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageReadStats(2, 9.5*MB*statistics.StoreHeartBeatReportInterval, 9.5*MB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageReadStats(3, 9.5*MB*statistics.StoreHeartBeatReportInterval, 9.8*MB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageReadStats(4, 9*MB*statistics.StoreHeartBeatReportInterval, 9*MB*statistics.StoreHeartBeatReportInterval) + tc.UpdateStorageReadStats(5, 8.9*MB*statistics.StoreHeartBeatReportInterval, 9.2*MB*statistics.StoreHeartBeatReportInterval) - //| region_id | leader_store | follower_store | follower_store | read_bytes_rate | - //|-----------|--------------|----------------|----------------|--------------------| - //| 1 | 1 | 2 | 3 | 512KB | - //| 2 | 1 | 2 | 3 | 512KB | - //| 3 | 1 | 2 | 3 | 512KB | - //| 4 | 1 | 2 | 3 | 512KB | - //| 5 | 2 | 1 | 3 | 512KB | - //| 6 | 2 | 1 | 3 | 512KB | - //| 7 | 3 | 1 | 2 | 512KB | - //| 8 | 3 | 1 | 2 | 512KB | - tc.AddLeaderRegionWithReadInfo(1, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithReadInfo(2, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithReadInfo(3, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithReadInfo(4, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithReadInfo(5, 2, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 1, 3) - tc.AddLeaderRegionWithReadInfo(6, 2, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 1, 3) - tc.AddLeaderRegionWithReadInfo(7, 3, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 1, 2) - tc.AddLeaderRegionWithReadInfo(8, 3, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 1, 2) + addRegionInfo(tc, read, []testRegionInfo{ + {1, []uint64{1, 2, 4}, 0.5 * MB, 0.5 * MB}, + {2, []uint64{1, 2, 4}, 0.5 * MB, 0.5 * MB}, + {3, []uint64{3, 4, 5}, 0.05 * MB, 0.1 * MB}, + }) - for i := 0; i < 20; i++ { + for i := 0; i < 100; i++ { hb.(*hotScheduler).clearPendingInfluence() - op1 := hb.Schedule(tc)[0] - testutil.CheckTransferLeader(c, op1, operator.OpLeader, 1, 3) - op2 := hb.Schedule(tc)[0] - testutil.CheckTransferPeerWithLeaderTransfer(c, op2, operator.OpHotRegion, 1, 4) - ops := hb.Schedule(tc) - c.Assert(ops, HasLen, 0) + op := hb.Schedule(tc)[0] + // byteDecRatio <= 0.95 && keyDecRatio <= 0.95 + testutil.CheckTransferLeader(c, op, operator.OpHotRegion, 1, 4) + // store byte rate (min, max): (10, 10.5) | 9.5 | 9.5 | (9, 9.5) | 8.9 + // store key rate (min, max): (10, 10.5) | 9.5 | 9.8 | (9, 9.5) | 9.2 + + op = hb.Schedule(tc)[0] + // byteDecRatio <= 0.99 && keyDecRatio <= 0.95 + testutil.CheckTransferLeader(c, op, operator.OpHotRegion, 3, 5) + // store byte rate (min, max): (10, 10.5) | 9.5 | (9.45, 9.5) | (9, 9.5) | (8.9, 8.95) + // store key rate (min, max): (10, 10.5) | 9.5 | (9.7, 9.8) | (9, 9.5) | (9.2, 9.3) + + // byteDecRatio <= 0.95 + // FIXME: cover this case + // op = hb.Schedule(tc)[0] + // testutil.CheckTransferPeerWithLeaderTransfer(c, op, operator.OpHotRegion, 1, 5) + // store byte rate (min, max): (9.5, 10.5) | 9.5 | (9.45, 9.5) | (9, 9.5) | (8.9, 9.45) + // store key rate (min, max): (9.2, 10.2) | 9.5 | (9.7, 9.8) | (9, 9.5) | (9.2, 9.8) } - for i := 0; i < 20; i++ { - hb.(*hotScheduler).clearPendingInfluence() - op1 := hb.Schedule(tc)[0] - testutil.CheckTransferLeader(c, op1, operator.OpLeader, 1, 3) - op2 := hb.Schedule(tc)[0] - testutil.CheckTransferPeerWithLeaderTransfer(c, op2, operator.OpHotRegion, 1, 4) - c.Assert(op2.Cancel(), IsTrue) - op2 = hb.Schedule(tc)[0] - testutil.CheckTransferPeerWithLeaderTransfer(c, op2, operator.OpHotRegion, 1, 4) - c.Assert(op1.Cancel(), IsTrue) - op3 := hb.Schedule(tc)[0] - testutil.CheckTransferPeerWithLeaderTransfer(c, op3, operator.OpHotRegion, 1, 4) - ops := hb.Schedule(tc) - c.Assert(ops, HasLen, 0) +} + +func (s *testHotReadRegionSchedulerSuite) TestWithPendingInfluence(c *C) { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + opt := mockoption.NewScheduleOptions() + hb, err := schedule.CreateScheduler(HotReadRegionType, schedule.NewOperatorController(ctx, nil, nil), core.NewStorage(kv.NewMemoryKV()), nil) + c.Assert(err, IsNil) + opt.HotRegionCacheHitsThreshold = 0 + // For test + hb.(*hotScheduler).conf.GreatDecRatio = 0.99 + hb.(*hotScheduler).conf.MinorDecRatio = 1 + + for i := 0; i < 2; i++ { + // 0: byte rate + // 1: key rate + tc := mockcluster.NewCluster(opt) + tc.AddRegionStore(1, 20) + tc.AddRegionStore(2, 20) + tc.AddRegionStore(3, 20) + tc.AddRegionStore(4, 20) + + updateStore := tc.UpdateStorageReadBytes // byte rate + if i == 1 { // key rate + updateStore = tc.UpdateStorageReadKeys + } + updateStore(1, 7.1*MB*statistics.StoreHeartBeatReportInterval) + updateStore(2, 6.1*MB*statistics.StoreHeartBeatReportInterval) + updateStore(3, 6*MB*statistics.StoreHeartBeatReportInterval) + updateStore(4, 5*MB*statistics.StoreHeartBeatReportInterval) + + if i == 0 { // byte rate + addRegionInfo(tc, read, []testRegionInfo{ + {1, []uint64{1, 2, 3}, 512 * KB, 0}, + {2, []uint64{1, 2, 3}, 512 * KB, 0}, + {3, []uint64{1, 2, 3}, 512 * KB, 0}, + {4, []uint64{1, 2, 3}, 512 * KB, 0}, + {5, []uint64{2, 1, 3}, 512 * KB, 0}, + {6, []uint64{2, 1, 3}, 512 * KB, 0}, + {7, []uint64{3, 2, 1}, 512 * KB, 0}, + {8, []uint64{3, 2, 1}, 512 * KB, 0}, + }) + } else if i == 1 { // key rate + addRegionInfo(tc, read, []testRegionInfo{ + {1, []uint64{1, 2, 3}, 0, 512 * KB}, + {2, []uint64{1, 2, 3}, 0, 512 * KB}, + {3, []uint64{1, 2, 3}, 0, 512 * KB}, + {4, []uint64{1, 2, 3}, 0, 512 * KB}, + {5, []uint64{2, 1, 3}, 0, 512 * KB}, + {6, []uint64{2, 1, 3}, 0, 512 * KB}, + {7, []uint64{3, 2, 1}, 0, 512 * KB}, + {8, []uint64{3, 2, 1}, 0, 512 * KB}, + }) + } + + for i := 0; i < 20; i++ { + hb.(*hotScheduler).clearPendingInfluence() + + op1 := hb.Schedule(tc)[0] + testutil.CheckTransferLeader(c, op1, operator.OpLeader, 1, 3) + // store byte/key rate (min, max): (6.6, 7.1) | 6.1 | (6, 6.5) | 5 + + op2 := hb.Schedule(tc)[0] + testutil.CheckTransferPeerWithLeaderTransfer(c, op2, operator.OpHotRegion, 1, 4) + // store byte/key rate (min, max): (6.1, 7.1) | 6.1 | (6, 6.5) | (5, 5.5) + + ops := hb.Schedule(tc) + c.Logf("%v", ops) + c.Assert(ops, HasLen, 0) + } + for i := 0; i < 20; i++ { + hb.(*hotScheduler).clearPendingInfluence() + + op1 := hb.Schedule(tc)[0] + testutil.CheckTransferLeader(c, op1, operator.OpLeader, 1, 3) + // store byte/key rate (min, max): (6.6, 7.1) | 6.1 | (6, 6.5) | 5 + + op2 := hb.Schedule(tc)[0] + testutil.CheckTransferPeerWithLeaderTransfer(c, op2, operator.OpHotRegion, 1, 4) + // store bytekey rate (min, max): (6.1, 7.1) | 6.1 | (6, 6.5) | (5, 5.5) + c.Assert(op2.Cancel(), IsTrue) + // store byte/key rate (min, max): (6.6, 7.1) | 6.1 | (6, 6.5) | 5 + + op2 = hb.Schedule(tc)[0] + testutil.CheckTransferPeerWithLeaderTransfer(c, op2, operator.OpHotRegion, 1, 4) + // store byte/key rate (min, max): (6.1, 7.1) | 6.1 | (6, 6.5) | (5, 5.5) + + c.Assert(op1.Cancel(), IsTrue) + // store byte/key rate (min, max): (6.6, 7.1) | 6.1 | 6 | (5, 5.5) + + op3 := hb.Schedule(tc)[0] + testutil.CheckTransferPeerWithLeaderTransfer(c, op3, operator.OpHotRegion, 1, 4) + // store byte/key rate (min, max): (6.1, 7.1) | 6.1 | 6 | (5, 6) + + ops := hb.Schedule(tc) + c.Assert(ops, HasLen, 0) + } } } @@ -554,58 +736,44 @@ type testHotCacheSuite struct{} func (s *testHotCacheSuite) TestUpdateCache(c *C) { opt := mockoption.NewScheduleOptions() + opt.HotRegionCacheHitsThreshold = 0 tc := mockcluster.NewCluster(opt) - // Add stores 1, 2, 3, 4, 5 with region counts 3, 2, 2, 2, 0. - tc.AddRegionStore(1, 3) - tc.AddRegionStore(2, 2) - tc.AddRegionStore(3, 2) - tc.AddRegionStore(4, 2) - tc.AddRegionStore(5, 0) - - // Report store read bytes. - tc.UpdateStorageReadBytes(1, 7.5*MB*statistics.StoreHeartBeatReportInterval) - tc.UpdateStorageReadBytes(2, 4.5*MB*statistics.StoreHeartBeatReportInterval) - tc.UpdateStorageReadBytes(3, 4.5*MB*statistics.StoreHeartBeatReportInterval) - tc.UpdateStorageReadBytes(4, 6*MB*statistics.StoreHeartBeatReportInterval) - tc.UpdateStorageReadBytes(5, 0) - /// For read flow - tc.AddLeaderRegionWithReadInfo(1, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithReadInfo(2, 2, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 1, 3) - tc.AddLeaderRegionWithReadInfo(3, 1, 20*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - // lower than hot read flow rate, but higher than write flow rate - tc.AddLeaderRegionWithReadInfo(11, 1, 7*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - opt.HotRegionCacheHitsThreshold = 0 + addRegionInfo(tc, read, []testRegionInfo{ + {1, []uint64{1, 2, 3}, 512 * KB, 0}, + {2, []uint64{2, 1, 3}, 512 * KB, 0}, + {3, []uint64{1, 2, 3}, 20 * KB, 0}, + // lower than hot read flow rate, but higher than write flow rate + {11, []uint64{1, 2, 3}, 7 * KB, 0}, + }) stats := tc.RegionStats(statistics.ReadFlow) c.Assert(len(stats[1]), Equals, 2) c.Assert(len(stats[2]), Equals, 1) c.Assert(len(stats[3]), Equals, 0) - tc.AddLeaderRegionWithReadInfo(3, 2, 20*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithReadInfo(11, 1, 7*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) + addRegionInfo(tc, read, []testRegionInfo{ + {3, []uint64{2, 1, 3}, 20 * KB, 0}, + {11, []uint64{1, 2, 3}, 7 * KB, 0}, + }) stats = tc.RegionStats(statistics.ReadFlow) - c.Assert(len(stats[1]), Equals, 1) c.Assert(len(stats[2]), Equals, 2) c.Assert(len(stats[3]), Equals, 0) - // For write flow - tc.UpdateStorageWrittenBytes(1, 6*MB*statistics.StoreHeartBeatReportInterval) - tc.UpdateStorageWrittenBytes(2, 3*MB*statistics.StoreHeartBeatReportInterval) - tc.UpdateStorageWrittenBytes(3, 6*MB*statistics.StoreHeartBeatReportInterval) - tc.UpdateStorageWrittenBytes(4, 3*MB*statistics.StoreHeartBeatReportInterval) - tc.UpdateStorageWrittenBytes(5, 0) - tc.AddLeaderRegionWithWriteInfo(4, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithWriteInfo(5, 1, 20*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithWriteInfo(6, 1, 0.8*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - + addRegionInfo(tc, write, []testRegionInfo{ + {4, []uint64{1, 2, 3}, 512 * KB, 0}, + {5, []uint64{1, 2, 3}, 20 * KB, 0}, + {6, []uint64{1, 2, 3}, 0.8 * KB, 0}, + }) stats = tc.RegionStats(statistics.WriteFlow) c.Assert(len(stats[1]), Equals, 2) c.Assert(len(stats[2]), Equals, 2) c.Assert(len(stats[3]), Equals, 2) - tc.AddLeaderRegionWithWriteInfo(5, 1, 20*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 5) + addRegionInfo(tc, write, []testRegionInfo{ + {5, []uint64{1, 2, 5}, 20 * KB, 0}, + }) stats = tc.RegionStats(statistics.WriteFlow) c.Assert(len(stats[1]), Equals, 2) @@ -613,3 +781,141 @@ func (s *testHotCacheSuite) TestUpdateCache(c *C) { c.Assert(len(stats[3]), Equals, 1) c.Assert(len(stats[5]), Equals, 1) } + +func (s *testHotCacheSuite) TestKeyThresholds(c *C) { + opt := mockoption.NewScheduleOptions() + opt.HotRegionCacheHitsThreshold = 0 + { // only a few regions + tc := mockcluster.NewCluster(opt) + addRegionInfo(tc, read, []testRegionInfo{ + {1, []uint64{1, 2, 3}, 0, 1}, + {2, []uint64{1, 2, 3}, 0, 1 * KB}, + }) + stats := tc.RegionStats(statistics.ReadFlow) + c.Assert(stats[1], HasLen, 1) + addRegionInfo(tc, write, []testRegionInfo{ + {3, []uint64{4, 5, 6}, 0, 1}, + {4, []uint64{4, 5, 6}, 0, 1 * KB}, + }) + stats = tc.RegionStats(statistics.WriteFlow) + c.Assert(stats[4], HasLen, 1) + c.Assert(stats[5], HasLen, 1) + c.Assert(stats[6], HasLen, 1) + } + { // many regions + tc := mockcluster.NewCluster(opt) + regions := []testRegionInfo{} + for i := 1; i <= 1000; i += 2 { + regions = append(regions, testRegionInfo{ + id: uint64(i), + peers: []uint64{1, 2, 3}, + keyRate: 100 * KB, + }) + regions = append(regions, testRegionInfo{ + id: uint64(i + 1), + peers: []uint64{1, 2, 3}, + keyRate: 10 * KB, + }) + } + + { // read + addRegionInfo(tc, read, regions) + stats := tc.RegionStats(statistics.ReadFlow) + c.Assert(len(stats[1]), Greater, 500) + + // for AntiCount + addRegionInfo(tc, read, regions) + addRegionInfo(tc, read, regions) + addRegionInfo(tc, read, regions) + addRegionInfo(tc, read, regions) + stats = tc.RegionStats(statistics.ReadFlow) + c.Assert(len(stats[1]), Equals, 500) + } + { // write + addRegionInfo(tc, write, regions) + stats := tc.RegionStats(statistics.WriteFlow) + c.Assert(len(stats[1]), Greater, 500) + c.Assert(len(stats[2]), Greater, 500) + c.Assert(len(stats[3]), Greater, 500) + + // for AntiCount + addRegionInfo(tc, write, regions) + addRegionInfo(tc, write, regions) + addRegionInfo(tc, write, regions) + addRegionInfo(tc, write, regions) + stats = tc.RegionStats(statistics.WriteFlow) + c.Assert(len(stats[1]), Equals, 500) + c.Assert(len(stats[2]), Equals, 500) + c.Assert(len(stats[3]), Equals, 500) + } + } +} + +func (s *testHotCacheSuite) TestByteAndKey(c *C) { + opt := mockoption.NewScheduleOptions() + opt.HotRegionCacheHitsThreshold = 0 + tc := mockcluster.NewCluster(opt) + regions := []testRegionInfo{} + for i := 1; i <= 500; i++ { + regions = append(regions, testRegionInfo{ + id: uint64(i), + peers: []uint64{1, 2, 3}, + byteRate: 100 * KB, + keyRate: 100 * KB, + }) + } + { // read + addRegionInfo(tc, read, regions) + stats := tc.RegionStats(statistics.ReadFlow) + c.Assert(len(stats[1]), Equals, 500) + + addRegionInfo(tc, read, []testRegionInfo{ + {10001, []uint64{1, 2, 3}, 10 * KB, 10 * KB}, + {10002, []uint64{1, 2, 3}, 500 * KB, 10 * KB}, + {10003, []uint64{1, 2, 3}, 10 * KB, 500 * KB}, + {10004, []uint64{1, 2, 3}, 500 * KB, 500 * KB}, + }) + stats = tc.RegionStats(statistics.ReadFlow) + c.Assert(len(stats[1]), Equals, 503) + } + { // write + addRegionInfo(tc, write, regions) + stats := tc.RegionStats(statistics.WriteFlow) + c.Assert(len(stats[1]), Equals, 500) + c.Assert(len(stats[2]), Equals, 500) + c.Assert(len(stats[3]), Equals, 500) + addRegionInfo(tc, write, []testRegionInfo{ + {10001, []uint64{1, 2, 3}, 10 * KB, 10 * KB}, + {10002, []uint64{1, 2, 3}, 500 * KB, 10 * KB}, + {10003, []uint64{1, 2, 3}, 10 * KB, 500 * KB}, + {10004, []uint64{1, 2, 3}, 500 * KB, 500 * KB}, + }) + stats = tc.RegionStats(statistics.WriteFlow) + c.Assert(len(stats[1]), Equals, 503) + c.Assert(len(stats[2]), Equals, 503) + c.Assert(len(stats[3]), Equals, 503) + } +} + +type testRegionInfo struct { + id uint64 + peers []uint64 + byteRate float64 + keyRate float64 +} + +func addRegionInfo(tc *mockcluster.Cluster, rwTy rwType, regions []testRegionInfo) { + addFunc := tc.AddLeaderRegionWithReadInfo + if rwTy == write { + addFunc = tc.AddLeaderRegionWithWriteInfo + } + for _, r := range regions { + addFunc( + r.id, r.peers[0], + uint64(r.byteRate*statistics.RegionHeartBeatReportInterval), + uint64(r.keyRate*statistics.RegionHeartBeatReportInterval), + statistics.RegionHeartBeatReportInterval, + r.peers[1:], + ) + } +} diff --git a/server/schedulers/metrics.go b/server/schedulers/metrics.go index fd2299543f8..768a5c1d92a 100644 --- a/server/schedulers/metrics.go +++ b/server/schedulers/metrics.go @@ -71,6 +71,14 @@ var balanceRegionCounter = prometheus.NewCounterVec( Help: "Counter of balance region scheduler.", }, []string{"type", "address", "store"}) +var balanceHotRegionCounter = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: "pd", + Subsystem: "scheduler", + Name: "hot_region", + Help: "Counter of hot region scheduler.", + }, []string{"type", "store"}) + var balanceDirectionCounter = prometheus.NewCounterVec( prometheus.CounterOpts{ Namespace: "pd", @@ -101,6 +109,7 @@ func init() { prometheus.MustRegister(hotPeerSummary) prometheus.MustRegister(balanceLeaderCounter) prometheus.MustRegister(balanceRegionCounter) + prometheus.MustRegister(balanceHotRegionCounter) prometheus.MustRegister(balanceDirectionCounter) prometheus.MustRegister(scatterRangeLeaderCounter) prometheus.MustRegister(scatterRangeRegionCounter) diff --git a/server/schedulers/scheduler_test.go b/server/schedulers/scheduler_test.go index f99ff66fd1e..1fb229aed2f 100644 --- a/server/schedulers/scheduler_test.go +++ b/server/schedulers/scheduler_test.go @@ -382,9 +382,9 @@ func (s *testShuffleHotRegionSchedulerSuite) checkBalance(c *C, tc *mockcluster. //| 1 | 1 | 2 | 3 | 512KB | //| 2 | 1 | 3 | 4 | 512KB | //| 3 | 1 | 2 | 4 | 512KB | - tc.AddLeaderRegionWithWriteInfo(1, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) - tc.AddLeaderRegionWithWriteInfo(2, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 3, 4) - tc.AddLeaderRegionWithWriteInfo(3, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 4) + tc.AddLeaderRegionWithWriteInfo(1, 1, 512*KB*statistics.RegionHeartBeatReportInterval, 0, statistics.RegionHeartBeatReportInterval, []uint64{2, 3}) + tc.AddLeaderRegionWithWriteInfo(2, 1, 512*KB*statistics.RegionHeartBeatReportInterval, 0, statistics.RegionHeartBeatReportInterval, []uint64{3, 4}) + tc.AddLeaderRegionWithWriteInfo(3, 1, 512*KB*statistics.RegionHeartBeatReportInterval, 0, statistics.RegionHeartBeatReportInterval, []uint64{2, 4}) opt.HotRegionCacheHitsThreshold = 0 // try to get an operator @@ -422,9 +422,9 @@ func (s *testHotRegionSchedulerSuite) TestAbnormalReplica(c *C) { tc.UpdateStorageReadBytes(2, 4.5*MB*statistics.StoreHeartBeatReportInterval) tc.UpdateStorageReadBytes(3, 4.5*MB*statistics.StoreHeartBeatReportInterval) - tc.AddLeaderRegionWithReadInfo(1, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2) - tc.AddLeaderRegionWithReadInfo(2, 2, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 1, 3) - tc.AddLeaderRegionWithReadInfo(3, 1, 512*KB*statistics.RegionHeartBeatReportInterval, statistics.RegionHeartBeatReportInterval, 2, 3) + tc.AddLeaderRegionWithReadInfo(1, 1, 512*KB*statistics.RegionHeartBeatReportInterval, 0, statistics.RegionHeartBeatReportInterval, []uint64{2}) + tc.AddLeaderRegionWithReadInfo(2, 2, 512*KB*statistics.RegionHeartBeatReportInterval, 0, statistics.RegionHeartBeatReportInterval, []uint64{1, 3}) + tc.AddLeaderRegionWithReadInfo(3, 1, 512*KB*statistics.RegionHeartBeatReportInterval, 0, statistics.RegionHeartBeatReportInterval, []uint64{2, 3}) opt.HotRegionCacheHitsThreshold = 0 c.Assert(tc.IsRegionHot(tc.GetRegion(1)), IsTrue) c.Assert(hb.Schedule(tc), IsNil) diff --git a/server/schedulers/shuffle_hot_region.go b/server/schedulers/shuffle_hot_region.go index 60f63787f8d..c91f42c96db 100644 --- a/server/schedulers/shuffle_hot_region.go +++ b/server/schedulers/shuffle_hot_region.go @@ -75,7 +75,7 @@ type shuffleHotRegionSchedulerConfig struct { // the hot peer. type shuffleHotRegionScheduler struct { *BaseScheduler - stLoadInfos *storeLoadInfos + stLoadInfos [resourceTypeLen]map[uint64]*storeLoadDetail r *rand.Rand conf *shuffleHotRegionSchedulerConfig types []rwType @@ -84,13 +84,16 @@ type shuffleHotRegionScheduler struct { // newShuffleHotRegionScheduler creates an admin scheduler that random balance hot regions func newShuffleHotRegionScheduler(opController *schedule.OperatorController, conf *shuffleHotRegionSchedulerConfig) schedule.Scheduler { base := NewBaseScheduler(opController) - return &shuffleHotRegionScheduler{ + ret := &shuffleHotRegionScheduler{ BaseScheduler: base, conf: conf, - stLoadInfos: newStoreLoadInfos(), types: []rwType{read, write}, r: rand.New(rand.NewSource(time.Now().UnixNano())), } + for ty := resourceType(0); ty < resourceTypeLen; ty++ { + ret.stLoadInfos[ty] = map[uint64]*storeLoadDetail{} + } + return ret } func (s *shuffleHotRegionScheduler) GetName() string { @@ -122,21 +125,23 @@ func (s *shuffleHotRegionScheduler) dispatch(typ rwType, cluster opt.Cluster) [] minHotDegree := cluster.GetHotRegionCacheHitsThreshold() switch typ { case read: - s.stLoadInfos.ReadLeaders = summaryStoresLoad( + s.stLoadInfos[readLeader] = summaryStoresLoad( storesStats.GetStoresBytesReadStat(), + storesStats.GetStoresKeysReadStat(), map[uint64]Influence{}, cluster.RegionReadStats(), minHotDegree, read, core.LeaderKind) - return s.randomSchedule(cluster, s.stLoadInfos.ReadLeaders) + return s.randomSchedule(cluster, s.stLoadInfos[readLeader]) case write: - s.stLoadInfos.WriteLeaders = summaryStoresLoad( + s.stLoadInfos[writeLeader] = summaryStoresLoad( storesStats.GetStoresBytesWriteStat(), + storesStats.GetStoresKeysWriteStat(), map[uint64]Influence{}, cluster.RegionWriteStats(), minHotDegree, write, core.LeaderKind) - return s.randomSchedule(cluster, s.stLoadInfos.WriteLeaders) + return s.randomSchedule(cluster, s.stLoadInfos[writeLeader]) } return nil } diff --git a/server/schedulers/utils.go b/server/schedulers/utils.go index 515cd118f85..77e7d996283 100644 --- a/server/schedulers/utils.go +++ b/server/schedulers/utils.go @@ -164,10 +164,14 @@ func getKeyRanges(args []string) ([]core.KeyRange, error) { // Influence records operator influence. type Influence struct { ByteRate float64 + KeyRate float64 + Count float64 } func (infl Influence) add(rhs *Influence, w float64) Influence { infl.ByteRate += rhs.ByteRate * w + infl.KeyRate += rhs.KeyRate * w + infl.Count += rhs.Count * w return infl } @@ -202,58 +206,140 @@ func summaryPendingInfluence(pendings map[*pendingInfluence]struct{}, f func(*op type storeLoad struct { ByteRate float64 - Count int + KeyRate float64 + Count float64 + + ExpByteRate float64 + ExpKeyRate float64 + ExpCount float64 } func (load *storeLoad) ToLoadPred(infl Influence) *storeLoadPred { future := *load future.ByteRate += infl.ByteRate + future.KeyRate += infl.KeyRate + future.Count += infl.Count return &storeLoadPred{ Current: *load, Future: future, } } +func stLdByteRate(ld *storeLoad) float64 { + return ld.ByteRate +} + +func stLdKeyRate(ld *storeLoad) float64 { + return ld.KeyRate +} + +func stLdCount(ld *storeLoad) float64 { + return ld.Count +} + +type storeLoadCmp func(ld1, ld2 *storeLoad) int + +func negLoadCmp(cmp storeLoadCmp) storeLoadCmp { + return func(ld1, ld2 *storeLoad) int { + return -cmp(ld1, ld2) + } +} + +func sliceLoadCmp(cmps ...storeLoadCmp) storeLoadCmp { + return func(ld1, ld2 *storeLoad) int { + for _, cmp := range cmps { + if r := cmp(ld1, ld2); r != 0 { + return r + } + } + return 0 + } +} + +func stLdRankCmp(dim func(ld *storeLoad) float64, rank func(value float64) int64) storeLoadCmp { + return func(ld1, ld2 *storeLoad) int { + return rankCmp(dim(ld1), dim(ld2), rank) + } +} + +func rankCmp(a, b float64, rank func(value float64) int64) int { + aRk, bRk := rank(a), rank(b) + if aRk < bRk { + return -1 + } else if aRk > bRk { + return 1 + } + return 0 +} + // store load prediction type storeLoadPred struct { Current storeLoad Future storeLoad } -func (lp *storeLoadPred) min() storeLoad { +func (lp *storeLoadPred) min() *storeLoad { return minLoad(&lp.Current, &lp.Future) } -func (lp *storeLoadPred) max() storeLoad { +func (lp *storeLoadPred) max() *storeLoad { return maxLoad(&lp.Current, &lp.Future) } -func minLoad(a, b *storeLoad) storeLoad { - return storeLoad{ - ByteRate: math.Min(a.ByteRate, b.ByteRate), - Count: minInt(a.Count, b.Count), +func (lp *storeLoadPred) diff() *storeLoad { + mx, mn := lp.max(), lp.min() + return &storeLoad{ + ByteRate: mx.ByteRate - mn.ByteRate, + KeyRate: mx.KeyRate - mn.KeyRate, + Count: mx.Count - mn.Count, } } -func maxLoad(a, b *storeLoad) storeLoad { - return storeLoad{ - ByteRate: math.Max(a.ByteRate, b.ByteRate), - Count: maxInt(a.Count, b.Count), +type storeLPCmp func(lp1, lp2 *storeLoadPred) int + +func sliceLPCmp(cmps ...storeLPCmp) storeLPCmp { + return func(lp1, lp2 *storeLoadPred) int { + for _, cmp := range cmps { + if r := cmp(lp1, lp2); r != 0 { + return r + } + } + return 0 } } -func minInt(a, b int) int { - if a < b { - return a +func minLPCmp(ldCmp storeLoadCmp) storeLPCmp { + return func(lp1, lp2 *storeLoadPred) int { + return ldCmp(lp1.min(), lp2.min()) } - return b } -func maxInt(a, b int) int { - if a < b { - return b +func maxLPCmp(ldCmp storeLoadCmp) storeLPCmp { + return func(lp1, lp2 *storeLoadPred) int { + return ldCmp(lp1.max(), lp2.max()) + } +} + +func diffCmp(ldCmp storeLoadCmp) storeLPCmp { + return func(lp1, lp2 *storeLoadPred) int { + return ldCmp(lp1.diff(), lp2.diff()) + } +} + +func minLoad(a, b *storeLoad) *storeLoad { + return &storeLoad{ + ByteRate: math.Min(a.ByteRate, b.ByteRate), + KeyRate: math.Min(a.KeyRate, b.KeyRate), + Count: math.Min(a.Count, b.Count), + } +} + +func maxLoad(a, b *storeLoad) *storeLoad { + return &storeLoad{ + ByteRate: math.Max(a.ByteRate, b.ByteRate), + KeyRate: math.Max(a.KeyRate, b.KeyRate), + Count: math.Max(a.Count, b.Count), } - return a } type storeLoadDetail struct { @@ -268,6 +354,7 @@ func (li *storeLoadDetail) toHotPeersStat() *statistics.HotPeersStat { } return &statistics.HotPeersStat{ TotalBytesRate: li.LoadPred.Current.ByteRate, + TotalKeysRate: li.LoadPred.Current.KeyRate, Count: len(li.HotPeers), Stats: peers, } diff --git a/server/statistics/avg_over_time.go b/server/statistics/avg_over_time.go index eacfba71c5c..ead36715aa7 100644 --- a/server/statistics/avg_over_time.go +++ b/server/statistics/avg_over_time.go @@ -19,6 +19,11 @@ import ( "github.com/phf/go-queue/queue" ) +const ( + // StoreHeartBeatReportInterval is the heartbeat report interval of a store. + StoreHeartBeatReportInterval = 10 +) + type deltaWithInterval struct { delta float64 interval time.Duration @@ -48,38 +53,68 @@ func NewAvgOverTime(interval time.Duration) *AvgOverTime { // Get returns change rate in the last interval. func (aot *AvgOverTime) Get() float64 { - if aot.intervalSum.Seconds() < 1 { - return 0 - } return aot.deltaSum / aot.intervalSum.Seconds() } +// Clear clears the AvgOverTime. +func (aot *AvgOverTime) Clear() { + aot.que = queue.New() + aot.intervalSum = 0 + aot.deltaSum = 0 +} + // Add adds recent change to AvgOverTime. func (aot *AvgOverTime) Add(delta float64, interval time.Duration) { aot.que.PushBack(deltaWithInterval{delta, interval}) aot.deltaSum += delta aot.intervalSum += interval - if aot.intervalSum <= aot.avgInterval { - return - } - for aot.que.Len() > 0 { - front := aot.que.Front().(deltaWithInterval) - if aot.intervalSum-front.interval >= aot.avgInterval { - aot.que.PopFront() - aot.deltaSum -= front.delta - aot.intervalSum -= front.interval - } else { - break - } - } } // Set sets AvgOverTime to the given average. func (aot *AvgOverTime) Set(avg float64) { - for aot.que.Len() > 0 { - aot.que.PopFront() - } + aot.Clear() aot.deltaSum = avg * aot.avgInterval.Seconds() aot.intervalSum = aot.avgInterval aot.que.PushBack(deltaWithInterval{delta: aot.deltaSum, interval: aot.intervalSum}) } + +// TimeMedian is AvgOverTime + MedianFilter +// Size of MedianFilter should be larger than double size of AvgOverTime to denoisy. +// Delay is aotSize * mfSize * StoreHeartBeatReportInterval /4 +type TimeMedian struct { + aotInterval time.Duration + aot *AvgOverTime + mf *MedianFilter +} + +// NewTimeMedian returns a TimeMedian with given size. +func NewTimeMedian(aotSize, mfSize int) *TimeMedian { + interval := time.Duration(aotSize*StoreHeartBeatReportInterval) * time.Second + return &TimeMedian{ + aotInterval: interval, + aot: NewAvgOverTime(interval), + mf: NewMedianFilter(mfSize), + } +} + +// Get returns change rate in the median of the several intervals. +func (t *TimeMedian) Get() float64 { + return t.mf.Get() +} + +// Add adds recent change to TimeMedian. +func (t *TimeMedian) Add(delta float64, interval time.Duration) { + if interval < 1 { + return + } + t.aot.Add(delta, interval) + if t.aot.intervalSum >= t.aotInterval { + t.mf.Add(t.aot.Get()) + t.aot.Clear() + } +} + +// Set sets the given average. +func (t *TimeMedian) Set(avg float64) { + t.mf.Set(avg) +} diff --git a/server/statistics/avg_over_time_test.go b/server/statistics/avg_over_time_test.go index c617e6bdc73..cbf436b1c12 100644 --- a/server/statistics/avg_over_time_test.go +++ b/server/statistics/avg_over_time_test.go @@ -55,7 +55,7 @@ func (t *testAvgOverTimeSuite) TestChange(c *C) { for i := 0; i < 5; i++ { aot.Add(500, time.Second) } - c.Assert(aot.Get(), LessEqual, 505.) + c.Assert(aot.Get(), LessEqual, 900.) c.Assert(aot.Get(), GreaterEqual, 495.) for i := 0; i < 15; i++ { aot.Add(500, time.Second) @@ -65,6 +65,6 @@ func (t *testAvgOverTimeSuite) TestChange(c *C) { for i := 0; i < 5; i++ { aot.Add(100, time.Second) } - c.Assert(aot.Get(), LessEqual, 101.) + c.Assert(aot.Get(), LessEqual, 678.) c.Assert(aot.Get(), GreaterEqual, 99.) } diff --git a/server/statistics/hot_peer.go b/server/statistics/hot_peer.go index aa565dbddcd..76def9b4deb 100644 --- a/server/statistics/hot_peer.go +++ b/server/statistics/hot_peer.go @@ -29,7 +29,7 @@ type HotPeerStat struct { // HotDegree records the hot region update times HotDegree int `json:"hot_degree"` // AntiCount used to eliminate some noise when remove region in cache - AntiCount int + AntiCount int `json:"anti_count"` Kind FlowKind `json:"kind"` ByteRate float64 `json:"flow_bytes"` @@ -42,7 +42,7 @@ type HotPeerStat struct { // LastUpdateTime used to calculate average write LastUpdateTime time.Time `json:"last_update_time"` // Version used to check the region split times - Version uint64 + Version uint64 `json:"version"` needDelete bool isLeader bool diff --git a/server/statistics/hot_peer_cache.go b/server/statistics/hot_peer_cache.go index 8f64d8039e7..5335eb12916 100644 --- a/server/statistics/hot_peer_cache.go +++ b/server/statistics/hot_peer_cache.go @@ -116,9 +116,13 @@ func (f *hotPeerCache) CheckRegionFlow(region *core.RegionInfo, storesStats *Sto byteRate := totalBytes / float64(interval) keyRate := totalKeys / float64(interval) - for storeID := range storeIDs { + var tmpItem *HotPeerStat + for _, storeID := range storeIDs { isExpired := f.isRegionExpired(region, storeID) oldItem := f.getOldHotPeerStat(region.GetID(), storeID) + if isExpired && oldItem != nil { + tmpItem = oldItem + } // This is used for the simulator. Ignore if report too fast. if !isExpired && Denoising && interval < hotRegionReportMinInterval { @@ -137,6 +141,11 @@ func (f *hotPeerCache) CheckRegionFlow(region *core.RegionInfo, storesStats *Sto isLeader: region.GetLeader().GetStoreId() == storeID, } + // use the tmpItem cached from other store + if oldItem == nil && tmpItem != nil { + oldItem = tmpItem + } + newItem = f.updateHotPeerStat(newItem, oldItem, storesStats) if newItem != nil { ret = append(ret, newItem) @@ -224,13 +233,15 @@ func (f *hotPeerCache) calcHotThresholds(stats *StoresStats, storeID uint64) [di } // gets the storeIDs, including old region and new region -func (f *hotPeerCache) getAllStoreIDs(region *core.RegionInfo) map[uint64]struct{} { +func (f *hotPeerCache) getAllStoreIDs(region *core.RegionInfo) []uint64 { storeIDs := make(map[uint64]struct{}) + ret := make([]uint64, 0, len(region.GetPeers())) // old stores ids, ok := f.storesOfRegion[region.GetID()] if ok { for storeID := range ids { storeIDs[storeID] = struct{}{} + ret = append(ret, storeID) } } @@ -242,10 +253,11 @@ func (f *hotPeerCache) getAllStoreIDs(region *core.RegionInfo) map[uint64]struct } if _, ok := storeIDs[peer.GetStoreId()]; !ok { storeIDs[peer.GetStoreId()] = struct{}{} + ret = append(ret, peer.GetStoreId()) } } - return storeIDs + return ret } func (f *hotPeerCache) isRegionHotWithAnyPeers(region *core.RegionInfo, hotDegree int) bool { diff --git a/server/statistics/hot_regions_stat.go b/server/statistics/hot_regions_stat.go index f30ceaabb2b..74a54837819 100644 --- a/server/statistics/hot_regions_stat.go +++ b/server/statistics/hot_regions_stat.go @@ -16,6 +16,7 @@ package statistics // HotPeersStat records all hot regions statistics type HotPeersStat struct { TotalBytesRate float64 `json:"total_flow_bytes"` + TotalKeysRate float64 `json:"total_flow_keys"` Count int `json:"regions_count"` Stats []HotPeerStat `json:"statistics"` } diff --git a/server/statistics/store.go b/server/statistics/store.go index 28835eb7f58..02a88490f3b 100644 --- a/server/statistics/store.go +++ b/server/statistics/store.go @@ -18,12 +18,9 @@ import ( "time" "github.com/pingcap/kvproto/pkg/pdpb" + "github.com/pingcap/log" "github.com/pingcap/pd/v3/server/core" -) - -const ( - // StoreHeartBeatReportInterval is the heartbeat report interval of a store. - StoreHeartBeatReportInterval = 10 + "go.uber.org/zap" ) // StoresStats is a cache hold hot regions. @@ -261,25 +258,32 @@ func (s *StoresStats) GetStoresKeysReadStat() map[uint64]float64 { // RollingStoreStats are multiple sets of recent historical records with specified windows size. type RollingStoreStats struct { sync.RWMutex - bytesWriteRate *AvgOverTime - bytesReadRate *AvgOverTime - keysWriteRate *AvgOverTime - keysReadRate *AvgOverTime + bytesWriteRate *TimeMedian + bytesReadRate *TimeMedian + keysWriteRate *TimeMedian + keysReadRate *TimeMedian totalCPUUsage MovingAvg totalBytesDiskReadRate MovingAvg totalBytesDiskWriteRate MovingAvg } -const storeStatsRollingWindows = 3 -const storeAvgInterval time.Duration = 3 * StoreHeartBeatReportInterval * time.Second +const ( + storeStatsRollingWindows = 3 + // DefaultAotSize is default size of average over time. + DefaultAotSize = 2 + // DefaultWriteMfSize is default size of write median filter + DefaultWriteMfSize = 5 + // DefaultReadMfSize is default size of read median filter + DefaultReadMfSize = 3 +) // NewRollingStoreStats creates a RollingStoreStats. func newRollingStoreStats() *RollingStoreStats { return &RollingStoreStats{ - bytesWriteRate: NewAvgOverTime(storeAvgInterval), - bytesReadRate: NewAvgOverTime(storeAvgInterval), - keysWriteRate: NewAvgOverTime(storeAvgInterval), - keysReadRate: NewAvgOverTime(storeAvgInterval), + bytesWriteRate: NewTimeMedian(DefaultAotSize, DefaultWriteMfSize), + bytesReadRate: NewTimeMedian(DefaultAotSize, DefaultReadMfSize), + keysWriteRate: NewTimeMedian(DefaultAotSize, DefaultWriteMfSize), + keysReadRate: NewTimeMedian(DefaultAotSize, DefaultReadMfSize), totalCPUUsage: NewMedianFilter(storeStatsRollingWindows), totalBytesDiskReadRate: NewMedianFilter(storeStatsRollingWindows), totalBytesDiskWriteRate: NewMedianFilter(storeStatsRollingWindows), @@ -298,6 +302,7 @@ func collect(records []*pdpb.RecordPair) float64 { func (r *RollingStoreStats) Observe(stats *pdpb.StoreStats) { statInterval := stats.GetInterval() interval := statInterval.GetEndTimestamp() - statInterval.GetStartTimestamp() + log.Debug("update store stats", zap.Uint64("key-write", stats.KeysWritten), zap.Uint64("bytes-write", stats.BytesWritten), zap.Duration("interval", time.Duration(interval)*time.Second), zap.Uint64("store-id", stats.GetStoreId())) r.Lock() defer r.Unlock() r.bytesWriteRate.Add(float64(stats.BytesWritten), time.Duration(interval)*time.Second) diff --git a/tests/pdctl/hot/hot_test.go b/tests/pdctl/hot/hot_test.go index 199243afd5c..06e786b58bb 100644 --- a/tests/pdctl/hot/hot_test.go +++ b/tests/pdctl/hot/hot_test.go @@ -69,7 +69,6 @@ func (s *hotTestSuite) TestHot(c *C) { // test hot store ss := leaderServer.GetStore(1) now := time.Now().Second() - interval := &pdpb.TimeInterval{StartTimestamp: uint64(now - 10), EndTimestamp: uint64(now)} newStats := proto.Clone(ss.GetStoreStats()).(*pdpb.StoreStats) bytesWritten := uint64(8 * 1024 * 1024) bytesRead := uint64(16 * 1024 * 1024) @@ -79,9 +78,12 @@ func (s *hotTestSuite) TestHot(c *C) { newStats.BytesRead = bytesRead newStats.KeysWritten = keysWritten newStats.KeysRead = keysRead - newStats.Interval = interval rc := leaderServer.GetRaftCluster() - rc.GetStoresStats().Observe(ss.GetID(), newStats) + for i := statistics.DefaultWriteMfSize; i > 0; i-- { + newStats.Interval = &pdpb.TimeInterval{StartTimestamp: uint64(now - 10*i), EndTimestamp: uint64(now - 10*i + 10)} + rc.GetStoresStats().Observe(ss.GetID(), newStats) + } + args := []string{"-u", pdAddr, "hot", "store"} _, output, err := pdctl.ExecuteCommandC(cmd, args...) c.Assert(err, IsNil) @@ -113,7 +115,7 @@ func (s *hotTestSuite) TestHot(c *C) { hotReadRegionID, hotWriteRegionID, hotStoreId := uint64(3), uint64(2), uint64(1) pdctl.MustPutRegion(c, cluster, hotReadRegionID, hotStoreId, []byte("b"), []byte("c"), core.SetReadBytes(1000000000), core.SetReportInterval(reportInterval)) pdctl.MustPutRegion(c, cluster, hotWriteRegionID, hotStoreId, []byte("c"), []byte("d"), core.SetWrittenBytes(1000000000), core.SetReportInterval(reportInterval)) - time.Sleep(3200 * time.Millisecond) + time.Sleep(5000 * time.Millisecond) testHot(hotReadRegionID, hotStoreId, "read") testHot(hotWriteRegionID, hotStoreId, "write") } diff --git a/tools/pd-ctl/pdctl/command/scheduler.go b/tools/pd-ctl/pdctl/command/scheduler.go index aa7cd68aafc..92975a4924b 100644 --- a/tools/pd-ctl/pdctl/command/scheduler.go +++ b/tools/pd-ctl/pdctl/command/scheduler.go @@ -430,11 +430,29 @@ func NewConfigSchedulerCommand() *cobra.Command { c.AddCommand( newConfigEvictLeaderCommand(), newConfigGrantLeaderCommand(), + newConfigHotRegionCommand(), newConfigShuffleRegionCommand(), ) return c } +func newConfigHotRegionCommand() *cobra.Command { + c := &cobra.Command{ + Use: "balance-hot-region-scheduler", + Short: "show evict-leader-scheduler config", + Run: listSchedulerConfigCommandFunc, + } + c.AddCommand(&cobra.Command{ + Use: "list", + Short: "list the config item", + Run: listSchedulerConfigCommandFunc}) + c.AddCommand(&cobra.Command{ + Use: "set ", + Short: "set the config item", + Run: func(cmd *cobra.Command, args []string) { postSchedulerConfigCommandFunc(cmd, c.Name(), args) }}) + return c +} + func newConfigEvictLeaderCommand() *cobra.Command { c := &cobra.Command{ Use: "evict-leader-scheduler", @@ -519,6 +537,22 @@ func listSchedulerConfigCommandFunc(cmd *cobra.Command, args []string) { cmd.Println(r) } +func postSchedulerConfigCommandFunc(cmd *cobra.Command, schedulerName string, args []string) { + if len(args) != 2 { + cmd.Println(cmd.UsageString()) + return + } + var val interface{} + input := make(map[string]interface{}) + key, value := args[0], args[1] + val, err := strconv.ParseFloat(value, 64) + if err != nil { + val = value + } + input[key] = val + postJSON(cmd, path.Join(schedulerConfigPrefix, schedulerName, "config"), input) +} + // convertReomveConfigToReomveScheduler make cmd can be used at removeCommandFunc func convertReomveConfigToReomveScheduler(cmd *cobra.Command) { setCommandUse(cmd, "remove")