Technical details
@@ -16660,7 +16660,7 @@ To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100500`
Technical details
-Query: `(100.0 * ((((sum(rate(src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService",grpc_code!="OK"}[2m])))) / ((sum(rate(src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService"}[2m])))))))`
+Query: `(100.0 * ((((sum(rate(src_src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService",grpc_code!="OK"}[2m])))) / ((sum(rate(src_src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService"}[2m])))))))`
@@ -16681,7 +16681,7 @@ To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100501`
Technical details
-Query: `(100.0 * ((((sum(rate(src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService",grpc_method=~"${method:regex}",grpc_code!="OK"}[2m])) by (grpc_method))) / ((sum(rate(src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService",grpc_method=~"${method:regex}"}[2m])) by (grpc_method))))))`
+Query: `(100.0 * ((((sum(rate(src_src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService",grpc_method=~"${method:regex}",grpc_code!="OK"}[2m])) by (grpc_method))) / ((sum(rate(src_src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService",grpc_method=~"${method:regex}"}[2m])) by (grpc_method))))))`
@@ -16702,7 +16702,7 @@ To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100502`
Technical details
-Query: `(sum(rate(src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService",grpc_method=~"${method:regex}"}[2m])) by (grpc_method, grpc_code))`
+Query: `(sum(rate(src_src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService",grpc_method=~"${method:regex}"}[2m])) by (grpc_method, grpc_code))`
@@ -16729,7 +16729,7 @@ To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100510`
Technical details
-Query: `(100.0 * ((((sum(rate(src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService",grpc_code!="OK",is_internal_error="true"}[2m])))) / ((sum(rate(src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService"}[2m])))))))`
+Query: `(100.0 * ((((sum(rate(src_src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService",grpc_code!="OK",is_internal_error="true"}[2m])))) / ((sum(rate(src_src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService"}[2m])))))))`
@@ -16756,7 +16756,7 @@ To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100511`
Technical details
-Query: `(100.0 * ((((sum(rate(src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService",grpc_method=~"${method:regex}",grpc_code!="OK",is_internal_error="true"}[2m])) by (grpc_method))) / ((sum(rate(src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService",grpc_method=~"${method:regex}"}[2m])) by (grpc_method))))))`
+Query: `(100.0 * ((((sum(rate(src_src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService",grpc_method=~"${method:regex}",grpc_code!="OK",is_internal_error="true"}[2m])) by (grpc_method))) / ((sum(rate(src_src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService",grpc_method=~"${method:regex}"}[2m])) by (grpc_method))))))`
@@ -16783,7 +16783,7 @@ To see this panel, visit `/-/debug/grafana/d/searcher/searcher?viewPanel=100512`
Technical details
-Query: `(sum(rate(src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService",is_internal_error="true",grpc_method=~"${method:regex}"}[2m])) by (grpc_method, grpc_code))`
+Query: `(sum(rate(src_src_grpc_method_status{grpc_service=~"searcher.v1.SearcherService",is_internal_error="true",grpc_method=~"${method:regex}"}[2m])) by (grpc_method, grpc_code))`
@@ -18353,7 +18353,7 @@ To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100600` o
Technical details
-Query: `(100.0 * ((((sum(rate(src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService",grpc_code!="OK"}[2m])))) / ((sum(rate(src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService"}[2m])))))))`
+Query: `(100.0 * ((((sum(rate(src_src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService",grpc_code!="OK"}[2m])))) / ((sum(rate(src_src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService"}[2m])))))))`
@@ -18374,7 +18374,7 @@ To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100601` o
Technical details
-Query: `(100.0 * ((((sum(rate(src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService",grpc_method=~"${method:regex}",grpc_code!="OK"}[2m])) by (grpc_method))) / ((sum(rate(src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService",grpc_method=~"${method:regex}"}[2m])) by (grpc_method))))))`
+Query: `(100.0 * ((((sum(rate(src_src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService",grpc_method=~"${method:regex}",grpc_code!="OK"}[2m])) by (grpc_method))) / ((sum(rate(src_src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService",grpc_method=~"${method:regex}"}[2m])) by (grpc_method))))))`
@@ -18395,7 +18395,7 @@ To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100602` o
Technical details
-Query: `(sum(rate(src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService",grpc_method=~"${method:regex}"}[2m])) by (grpc_method, grpc_code))`
+Query: `(sum(rate(src_src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService",grpc_method=~"${method:regex}"}[2m])) by (grpc_method, grpc_code))`
@@ -18422,7 +18422,7 @@ To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100610` o
Technical details
-Query: `(100.0 * ((((sum(rate(src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService",grpc_code!="OK",is_internal_error="true"}[2m])))) / ((sum(rate(src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService"}[2m])))))))`
+Query: `(100.0 * ((((sum(rate(src_src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService",grpc_code!="OK",is_internal_error="true"}[2m])))) / ((sum(rate(src_src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService"}[2m])))))))`
@@ -18449,7 +18449,7 @@ To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100611` o
Technical details
-Query: `(100.0 * ((((sum(rate(src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService",grpc_method=~"${method:regex}",grpc_code!="OK",is_internal_error="true"}[2m])) by (grpc_method))) / ((sum(rate(src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService",grpc_method=~"${method:regex}"}[2m])) by (grpc_method))))))`
+Query: `(100.0 * ((((sum(rate(src_src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService",grpc_method=~"${method:regex}",grpc_code!="OK",is_internal_error="true"}[2m])) by (grpc_method))) / ((sum(rate(src_src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService",grpc_method=~"${method:regex}"}[2m])) by (grpc_method))))))`
@@ -18476,7 +18476,7 @@ To see this panel, visit `/-/debug/grafana/d/symbols/symbols?viewPanel=100612` o
Technical details
-Query: `(sum(rate(src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService",is_internal_error="true",grpc_method=~"${method:regex}"}[2m])) by (grpc_method, grpc_code))`
+Query: `(sum(rate(src_src_grpc_method_status{grpc_service=~"symbols.v1.SymbolsService",is_internal_error="true",grpc_method=~"${method:regex}"}[2m])) by (grpc_method, grpc_code))`
@@ -20714,6 +20714,469 @@ Query: `sum by (container_label_io_kubernetes_pod_name) (rate(container_network_
+### Zoekt: GRPC server metrics
+
+#### zoekt: zoekt-webserver_grpc_request_rate_all_methods
+
+Request rate across all methods over 2m
+
+The number of gRPC requests received per second across all methods, aggregated across all instances.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100800` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `sum(rate(grpc_server_started_total{instance=~`${webserver_instance:regex}`,grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m]))`
+
+
+
+
+
+#### zoekt: zoekt-webserver_grpc_request_rate_per_method
+
+Request rate per-method over 2m
+
+The number of gRPC requests received per second broken out per method, aggregated across all instances.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100801` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `sum(rate(grpc_server_started_total{grpc_method=~`${method:regex}`,instance=~`${webserver_instance:regex}`,grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m])) by (grpc_method)`
+
+
+
+
+
+#### zoekt: zoekt-webserver_error_percentage_all_methods
+
+Error percentage across all methods over 2m
+
+The percentage of gRPC requests that fail across all methods, aggregated across all instances.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100810` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `(100.0 * ( (sum(rate(grpc_server_handled_total{grpc_code!="OK",instance=~`${webserver_instance:regex}`,grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m]))) / (sum(rate(grpc_server_handled_total{instance=~`${webserver_instance:regex}`,grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m]))) ))`
+
+
+
+
+
+#### zoekt: zoekt-webserver_grpc_error_percentage_per_method
+
+Error percentage per-method over 2m
+
+The percentage of gRPC requests that fail per method, aggregated across all instances.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100811` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `(100.0 * ( (sum(rate(grpc_server_handled_total{grpc_method=~`${method:regex}`,grpc_code!="OK",instance=~`${webserver_instance:regex}`,grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m])) by (grpc_method)) / (sum(rate(grpc_server_handled_total{grpc_method=~`${method:regex}`,instance=~`${webserver_instance:regex}`,grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m])) by (grpc_method)) ))`
+
+
+
+
+
+#### zoekt: zoekt-webserver_p99_response_time_per_method
+
+99th percentile response time per method over 2m
+
+The 99th percentile response time per method, aggregated across all instances.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100820` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `histogram_quantile(0.99, sum by (le, name, grpc_method)(rate(grpc_server_handling_seconds_bucket{grpc_method=~`${method:regex}`,instance=~`${webserver_instance:regex}`,grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m])))`
+
+
+
+
+
+#### zoekt: zoekt-webserver_p90_response_time_per_method
+
+90th percentile response time per method over 2m
+
+The 90th percentile response time per method, aggregated across all instances.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100821` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `histogram_quantile(0.90, sum by (le, name, grpc_method)(rate(grpc_server_handling_seconds_bucket{grpc_method=~`${method:regex}`,instance=~`${webserver_instance:regex}`,grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m])))`
+
+
+
+
+
+#### zoekt: zoekt-webserver_p75_response_time_per_method
+
+75th percentile response time per method over 2m
+
+The 75th percentile response time per method, aggregated across all instances.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100822` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `histogram_quantile(0.75, sum by (le, name, grpc_method)(rate(grpc_server_handling_seconds_bucket{grpc_method=~`${method:regex}`,instance=~`${webserver_instance:regex}`,grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m])))`
+
+
+
+
+
+#### zoekt: zoekt-webserver_p99_9_response_size_per_method
+
+99.9th percentile total response size per method over 2m
+
+The 99.9th percentile total per-RPC response size per method, aggregated across all instances.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100830` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `histogram_quantile(0.999, sum by (le, name, grpc_method)(rate(src_grpc_server_sent_bytes_per_rpc_bucket{grpc_method=~`${method:regex}`,instance=~`${webserver_instance:regex}`,grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m])))`
+
+
+
+
+
+#### zoekt: zoekt-webserver_p90_response_size_per_method
+
+90th percentile total response size per method over 2m
+
+The 90th percentile total per-RPC response size per method, aggregated across all instances.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100831` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `histogram_quantile(0.90, sum by (le, name, grpc_method)(rate(src_grpc_server_sent_bytes_per_rpc_bucket{grpc_method=~`${method:regex}`,instance=~`${webserver_instance:regex}`,grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m])))`
+
+
+
+
+
+#### zoekt: zoekt-webserver_p75_response_size_per_method
+
+75th percentile total response size per method over 2m
+
+The 75th percentile total per-RPC response size per method, aggregated across all instances.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100832` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `histogram_quantile(0.75, sum by (le, name, grpc_method)(rate(src_grpc_server_sent_bytes_per_rpc_bucket{grpc_method=~`${method:regex}`,instance=~`${webserver_instance:regex}`,grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m])))`
+
+
+
+
+
+#### zoekt: zoekt-webserver_p99_9_invididual_sent_message_size_per_method
+
+99.9th percentile individual sent message size per method over 2m
+
+The 99.9th percentile size of every individual protocol buffer size sent by the service per method, aggregated across all instances.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100840` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `histogram_quantile(0.999, sum by (le, name, grpc_method)(rate(src_grpc_server_sent_individual_message_size_bytes_per_rpc_bucket{grpc_method=~`${method:regex}`,instance=~`${webserver_instance:regex}`,grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m])))`
+
+
+
+
+
+#### zoekt: zoekt-webserver_p90_invididual_sent_message_size_per_method
+
+90th percentile individual sent message size per method over 2m
+
+The 90th percentile size of every individual protocol buffer size sent by the service per method, aggregated across all instances.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100841` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `histogram_quantile(0.90, sum by (le, name, grpc_method)(rate(src_grpc_server_sent_individual_message_size_bytes_per_rpc_bucket{grpc_method=~`${method:regex}`,instance=~`${webserver_instance:regex}`,grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m])))`
+
+
+
+
+
+#### zoekt: zoekt-webserver_p75_invididual_sent_message_size_per_method
+
+75th percentile individual sent message size per method over 2m
+
+The 75th percentile size of every individual protocol buffer size sent by the service per method, aggregated across all instances.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100842` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `histogram_quantile(0.75, sum by (le, name, grpc_method)(rate(src_grpc_server_sent_individual_message_size_bytes_per_rpc_bucket{grpc_method=~`${method:regex}`,instance=~`${webserver_instance:regex}`,grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m])))`
+
+
+
+
+
+#### zoekt: zoekt-webserver_grpc_response_stream_message_count_per_method
+
+Average streaming response message count per-method over 2m
+
+The average number of response messages sent during a streaming RPC method, broken out per method, aggregated across all instances.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100850` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `((sum(rate(grpc_server_msg_sent_total{grpc_type="server_stream",instance=~`${webserver_instance:regex}`,grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m])) by (grpc_method))/(sum(rate(grpc_server_started_total{grpc_type="server_stream",instance=~`${webserver_instance:regex}`,grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m])) by (grpc_method)))`
+
+
+
+
+
+#### zoekt: zoekt-webserver_grpc_all_codes_per_method
+
+Response codes rate per-method over 2m
+
+The rate of all generated gRPC response codes per method, aggregated across all instances.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100860` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `sum(rate(grpc_server_handled_total{grpc_method=~`${method:regex}`,instance=~`${webserver_instance:regex}`,grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m])) by (grpc_method, grpc_code)`
+
+
+
+
+
+### Zoekt: GRPC "internal error" metrics
+
+#### zoekt: zoekt-webserver_grpc_clients_error_percentage_all_methods
+
+Client baseline error percentage across all methods over 2m
+
+The percentage of gRPC requests that fail across all methods (regardless of whether or not there was an internal error), aggregated across all "zoekt-webserver" clients.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100900` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `(100.0 * ((((sum(rate(src_grpc_method_status{grpc_service=~"zoekt.webserver.v1.WebserverService",grpc_code!="OK"}[2m])))) / ((sum(rate(src_grpc_method_status{grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m])))))))`
+
+
+
+
+
+#### zoekt: zoekt-webserver_grpc_clients_error_percentage_per_method
+
+Client baseline error percentage per-method over 2m
+
+The percentage of gRPC requests that fail per method (regardless of whether or not there was an internal error), aggregated across all "zoekt-webserver" clients.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100901` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `(100.0 * ((((sum(rate(src_grpc_method_status{grpc_service=~"zoekt.webserver.v1.WebserverService",grpc_method=~"${method:regex}",grpc_code!="OK"}[2m])) by (grpc_method))) / ((sum(rate(src_grpc_method_status{grpc_service=~"zoekt.webserver.v1.WebserverService",grpc_method=~"${method:regex}"}[2m])) by (grpc_method))))))`
+
+
+
+
+
+#### zoekt: zoekt-webserver_grpc_clients_all_codes_per_method
+
+Client baseline response codes rate per-method over 2m
+
+The rate of all generated gRPC response codes per method (regardless of whether or not there was an internal error), aggregated across all "zoekt-webserver" clients.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100902` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `(sum(rate(src_grpc_method_status{grpc_service=~"zoekt.webserver.v1.WebserverService",grpc_method=~"${method:regex}"}[2m])) by (grpc_method, grpc_code))`
+
+
+
+
+
+#### zoekt: zoekt-webserver_grpc_clients_internal_error_percentage_all_methods
+
+Client-observed gRPC internal error percentage across all methods over 2m
+
+The percentage of gRPC requests that appear to fail due to gRPC internal errors across all methods, aggregated across all "zoekt-webserver" clients.
+
+**Note**: Internal errors are ones that appear to originate from the https://github.com/grpc/grpc-go library itself, rather than from any user-written application code. These errors can be caused by a variety of issues, and can originate from either the code-generated "zoekt-webserver" gRPC client or gRPC server. These errors might be solvable by adjusting the gRPC configuration, or they might indicate a bug from Sourcegraph`s use of gRPC.
+
+When debugging, knowing that a particular error comes from the grpc-go library itself (an `internal error`) as opposed to `normal` application code can be helpful when trying to fix it.
+
+**Note**: Internal errors are detected via a very coarse heuristic (seeing if the error starts with `grpc:`, etc.). Because of this, it`s possible that some gRPC-specific issues might not be categorized as internal errors.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100910` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `(100.0 * ((((sum(rate(src_grpc_method_status{grpc_service=~"zoekt.webserver.v1.WebserverService",grpc_code!="OK",is_internal_error="true"}[2m])))) / ((sum(rate(src_grpc_method_status{grpc_service=~"zoekt.webserver.v1.WebserverService"}[2m])))))))`
+
+
+
+
+
+#### zoekt: zoekt-webserver_grpc_clients_internal_error_percentage_per_method
+
+Client-observed gRPC internal error percentage per-method over 2m
+
+The percentage of gRPC requests that appear to fail to due to gRPC internal errors per method, aggregated across all "zoekt-webserver" clients.
+
+**Note**: Internal errors are ones that appear to originate from the https://github.com/grpc/grpc-go library itself, rather than from any user-written application code. These errors can be caused by a variety of issues, and can originate from either the code-generated "zoekt-webserver" gRPC client or gRPC server. These errors might be solvable by adjusting the gRPC configuration, or they might indicate a bug from Sourcegraph`s use of gRPC.
+
+When debugging, knowing that a particular error comes from the grpc-go library itself (an `internal error`) as opposed to `normal` application code can be helpful when trying to fix it.
+
+**Note**: Internal errors are detected via a very coarse heuristic (seeing if the error starts with `grpc:`, etc.). Because of this, it`s possible that some gRPC-specific issues might not be categorized as internal errors.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100911` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `(100.0 * ((((sum(rate(src_grpc_method_status{grpc_service=~"zoekt.webserver.v1.WebserverService",grpc_method=~"${method:regex}",grpc_code!="OK",is_internal_error="true"}[2m])) by (grpc_method))) / ((sum(rate(src_grpc_method_status{grpc_service=~"zoekt.webserver.v1.WebserverService",grpc_method=~"${method:regex}"}[2m])) by (grpc_method))))))`
+
+
+
+
+
+#### zoekt: zoekt-webserver_grpc_clients_internal_error_all_codes_per_method
+
+Client-observed gRPC internal error response code rate per-method over 2m
+
+The rate of gRPC internal-error response codes per method, aggregated across all "zoekt-webserver" clients.
+
+**Note**: Internal errors are ones that appear to originate from the https://github.com/grpc/grpc-go library itself, rather than from any user-written application code. These errors can be caused by a variety of issues, and can originate from either the code-generated "zoekt-webserver" gRPC client or gRPC server. These errors might be solvable by adjusting the gRPC configuration, or they might indicate a bug from Sourcegraph`s use of gRPC.
+
+When debugging, knowing that a particular error comes from the grpc-go library itself (an `internal error`) as opposed to `normal` application code can be helpful when trying to fix it.
+
+**Note**: Internal errors are detected via a very coarse heuristic (seeing if the error starts with `grpc:`, etc.). Because of this, it`s possible that some gRPC-specific issues might not be categorized as internal errors.
+
+This panel has no related alerts.
+
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100912` on your Sourcegraph instance.
+
+*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
+
+
+Technical details
+
+Query: `(sum(rate(src_grpc_method_status{grpc_service=~"zoekt.webserver.v1.WebserverService",is_internal_error="true",grpc_method=~"${method:regex}"}[2m])) by (grpc_method, grpc_code))`
+
+
+
+
+
### Zoekt: Data disk I/O metrics
#### zoekt: data_disk_reads_sec
@@ -20726,7 +21189,7 @@ Note: Disk statistics are per _device_, not per _service_. In certain environmen
This panel has no related alerts.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100800` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101000` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -20749,7 +21212,7 @@ Note: Disk statistics are per _device_, not per _service_. In certain environmen
This panel has no related alerts.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100801` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101001` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -20772,7 +21235,7 @@ Note: Disk statistics are per _device_, not per _service_. In certain environmen
This panel has no related alerts.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100810` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101010` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -20795,7 +21258,7 @@ Note: Disk statistics are per _device_, not per _service_. In certain environmen
This panel has no related alerts.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100811` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101011` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -20818,7 +21281,7 @@ Note: Disk statistics are per _device_, not per _service_. In certain environmen
This panel has no related alerts.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100820` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101020` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -20841,7 +21304,7 @@ Note: Disk statistics are per _device_, not per _service_. In certain environmen
This panel has no related alerts.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100821` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101021` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -20864,7 +21327,7 @@ Note: Disk statistics are per _device_, not per _service_. In certain environmen
This panel has no related alerts.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100830` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101030` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -20887,7 +21350,7 @@ Note: Disk statistics are per _device_, not per _service_. In certain environmen
This panel has no related alerts.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100831` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101031` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -20910,7 +21373,7 @@ Note: Disk statistics are per _device_, not per _service_. In certain environmen
This panel has no related alerts.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100840` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101040` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -20933,7 +21396,7 @@ Note: Disk statistics are per _device_, not per _service_. In certain environmen
This panel has no related alerts.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100841` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101041` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -20956,7 +21419,7 @@ Note: Disk statistics are per _device_, not per _service_. In certain environmen
This panel has no related alerts.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100850` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101050` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -20987,7 +21450,7 @@ value change independent of deployment events (such as an upgrade), it could ind
This panel has no related alerts.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100900` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101100` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -21006,7 +21469,7 @@ Query: `count by(name) ((time() - container_last_seen{name=~"^zoekt-indexserver.
Refer to the [alerts reference](./alerts.md#zoekt-container-cpu-usage) for 1 alert related to this panel.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100901` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101101` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -21025,7 +21488,7 @@ Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^zoekt-indexserver.
Refer to the [alerts reference](./alerts.md#zoekt-container-memory-usage) for 1 alert related to this panel.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100902` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101102` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -21047,7 +21510,7 @@ When extremely high, this can indicate a resource usage problem, or can cause pr
This panel has no related alerts.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=100903` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101103` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -21078,7 +21541,7 @@ value change independent of deployment events (such as an upgrade), it could ind
This panel has no related alerts.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101000` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101200` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -21097,7 +21560,7 @@ Query: `count by(name) ((time() - container_last_seen{name=~"^zoekt-webserver.*"
Refer to the [alerts reference](./alerts.md#zoekt-container-cpu-usage) for 1 alert related to this panel.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101001` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101201` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -21116,7 +21579,7 @@ Query: `cadvisor_container_cpu_usage_percentage_total{name=~"^zoekt-webserver.*"
Refer to the [alerts reference](./alerts.md#zoekt-container-memory-usage) for 1 alert related to this panel.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101002` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101202` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -21138,7 +21601,7 @@ When extremely high, this can indicate a resource usage problem, or can cause pr
This panel has no related alerts.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101003` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101203` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -21159,7 +21622,7 @@ Query: `sum by(name) (rate(container_fs_reads_total{name=~"^zoekt-webserver.*"}[
Refer to the [alerts reference](./alerts.md#zoekt-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101100` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101300` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -21178,7 +21641,7 @@ Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{na
Refer to the [alerts reference](./alerts.md#zoekt-provisioning-container-memory-usage-long-term) for 1 alert related to this panel.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101101` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101301` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -21197,7 +21660,7 @@ Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^z
Refer to the [alerts reference](./alerts.md#zoekt-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101110` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101310` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -21216,7 +21679,7 @@ Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^zoek
Refer to the [alerts reference](./alerts.md#zoekt-provisioning-container-memory-usage-short-term) for 1 alert related to this panel.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101111` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101311` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -21238,7 +21701,7 @@ When it occurs frequently, it is an indicator of underprovisioning.
Refer to the [alerts reference](./alerts.md#zoekt-container-oomkill-events-total) for 1 alert related to this panel.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101112` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101312` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -21259,7 +21722,7 @@ Query: `max by (name) (container_oom_events_total{name=~"^zoekt-indexserver.*"})
Refer to the [alerts reference](./alerts.md#zoekt-provisioning-container-cpu-usage-long-term) for 1 alert related to this panel.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101200` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101400` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -21278,7 +21741,7 @@ Query: `quantile_over_time(0.9, cadvisor_container_cpu_usage_percentage_total{na
Refer to the [alerts reference](./alerts.md#zoekt-provisioning-container-memory-usage-long-term) for 1 alert related to this panel.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101201` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101401` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -21297,7 +21760,7 @@ Query: `max_over_time(cadvisor_container_memory_usage_percentage_total{name=~"^z
Refer to the [alerts reference](./alerts.md#zoekt-provisioning-container-cpu-usage-short-term) for 1 alert related to this panel.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101210` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101410` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -21316,7 +21779,7 @@ Query: `max_over_time(cadvisor_container_cpu_usage_percentage_total{name=~"^zoek
Refer to the [alerts reference](./alerts.md#zoekt-provisioning-container-memory-usage-short-term) for 1 alert related to this panel.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101211` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101411` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -21338,7 +21801,7 @@ When it occurs frequently, it is an indicator of underprovisioning.
Refer to the [alerts reference](./alerts.md#zoekt-container-oomkill-events-total) for 1 alert related to this panel.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101212` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101412` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
@@ -21359,7 +21822,7 @@ Query: `max by (name) (container_oom_events_total{name=~"^zoekt-webserver.*"})`
Refer to the [alerts reference](./alerts.md#zoekt-pods-available-percentage) for 1 alert related to this panel.
-To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101300` on your Sourcegraph instance.
+To see this panel, visit `/-/debug/grafana/d/zoekt/zoekt?viewPanel=101500` on your Sourcegraph instance.
*Managed by the [Sourcegraph Search Core team](https://handbook.sourcegraph.com/departments/engineering/teams/search/core).*
diff --git a/go.mod b/go.mod
index 0638a76ad8d97..9e05c9c5ff5f4 100644
--- a/go.mod
+++ b/go.mod
@@ -538,7 +538,7 @@ require (
github.com/sourcegraph/conc v0.2.0
github.com/sourcegraph/mountinfo v0.0.0-20230106004439-7026e28cef67
github.com/sourcegraph/sourcegraph/monitoring v0.0.0-20230124144931-b2d81b1accb6
- github.com/sourcegraph/zoekt v0.0.0-20230825171831-40a9a23bb04b
+ github.com/sourcegraph/zoekt v0.0.0-20230912152407-3ce1f2b24c80
github.com/spf13/cobra v1.7.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/objx v0.5.0 // indirect
diff --git a/go.sum b/go.sum
index 0b39f3d41c9de..d77a32e0cc780 100644
--- a/go.sum
+++ b/go.sum
@@ -2026,8 +2026,8 @@ github.com/sourcegraph/scip v0.3.1-0.20230627154934-45df7f6d33fc h1:o+eq0cjVV3B5
github.com/sourcegraph/scip v0.3.1-0.20230627154934-45df7f6d33fc/go.mod h1:7ZKAtLIUmiMvOIgG5LMcBxdtBXVa0v2GWC4Hm1ASYQ0=
github.com/sourcegraph/yaml v1.0.1-0.20200714132230-56936252f152 h1:z/MpntplPaW6QW95pzcAR/72Z5TWDyDnSo0EOcyij9o=
github.com/sourcegraph/yaml v1.0.1-0.20200714132230-56936252f152/go.mod h1:GIjDIg/heH5DOkXY3YJ/wNhfHsQHoXGjl8G8amsYQ1I=
-github.com/sourcegraph/zoekt v0.0.0-20230825171831-40a9a23bb04b h1:kqYHOCtMTKwkM2F6AbpelFE+Olt2qqJyxBCNps1oIjQ=
-github.com/sourcegraph/zoekt v0.0.0-20230825171831-40a9a23bb04b/go.mod h1:3rlMtZdLxkc7P1R14qWq20fWDDyRQwL6TmAqH81WQ4M=
+github.com/sourcegraph/zoekt v0.0.0-20230912152407-3ce1f2b24c80 h1:jz6dUd7NKcULoGoC0KcsdWw1dh58S8/cM50rl1K2b00=
+github.com/sourcegraph/zoekt v0.0.0-20230912152407-3ce1f2b24c80/go.mod h1:49Y2VVQuWxaSUXeFphZnZX+/zVy2G/sqd+cg/oqQTBk=
github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spf13/afero v0.0.0-20170901052352-ee1bd8ee15a1/go.mod h1:j4pytiNVoe2o6bmDsKpLACNPDBIoEAkihy7loJ1B0CQ=
diff --git a/internal/search/backend/BUILD.bazel b/internal/search/backend/BUILD.bazel
index fe2b70a617d51..035abcee24ce5 100644
--- a/internal/search/backend/BUILD.bazel
+++ b/internal/search/backend/BUILD.bazel
@@ -38,7 +38,7 @@ go_library(
"@com_github_sourcegraph_log//:log",
"@com_github_sourcegraph_zoekt//:zoekt",
"@com_github_sourcegraph_zoekt//cmd/zoekt-sourcegraph-indexserver/protos/sourcegraph/zoekt/configuration/v1:configuration",
- "@com_github_sourcegraph_zoekt//grpc/v1:grpc",
+ "@com_github_sourcegraph_zoekt//grpc/protos/zoekt/webserver/v1:webserver",
"@com_github_sourcegraph_zoekt//query",
"@com_github_sourcegraph_zoekt//rpc",
"@com_github_sourcegraph_zoekt//stream",
diff --git a/internal/search/backend/grpc.go b/internal/search/backend/grpc.go
index bae7379bc9dfd..a6bee9e522671 100644
--- a/internal/search/backend/grpc.go
+++ b/internal/search/backend/grpc.go
@@ -5,7 +5,7 @@ import (
"io"
"github.com/sourcegraph/zoekt"
- v1 "github.com/sourcegraph/zoekt/grpc/v1"
+ proto "github.com/sourcegraph/zoekt/grpc/protos/zoekt/webserver/v1"
"github.com/sourcegraph/zoekt/query"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
@@ -55,7 +55,7 @@ func (c *switchableZoektGRPCClient) String() string {
// zoektGRPCClient is a zoekt.Streamer that uses gRPC for its RPC layer
type zoektGRPCClient struct {
endpoint string
- client v1.WebserverServiceClient
+ client proto.WebserverServiceClient
// We capture the dial error to return it lazily.
// This allows us to treat Dial as infallible, which is
@@ -70,9 +70,11 @@ func (z *zoektGRPCClient) StreamSearch(ctx context.Context, q query.Q, opts *zoe
return z.dialErr
}
- req := &v1.SearchRequest{
- Query: query.QToProto(q),
- Opts: opts.ToProto(),
+ req := &proto.StreamSearchRequest{
+ Request: &proto.SearchRequest{
+ Query: query.QToProto(q),
+ Opts: opts.ToProto(),
+ },
}
ss, err := z.client.StreamSearch(ctx, req)
@@ -89,7 +91,7 @@ func (z *zoektGRPCClient) StreamSearch(ctx context.Context, q query.Q, opts *zoe
var repoURLS map[string]string // We don't use repoURLs in Sourcegraph
var lineFragments map[string]string // We don't use lineFragments in Sourcegraph
- sender.Send(zoekt.SearchResultFromProto(msg, repoURLS, lineFragments))
+ sender.Send(zoekt.SearchResultFromProto(msg.GetResponseChunk(), repoURLS, lineFragments))
}
}
@@ -98,7 +100,7 @@ func (z *zoektGRPCClient) Search(ctx context.Context, q query.Q, opts *zoekt.Sea
return nil, z.dialErr
}
- req := &v1.SearchRequest{
+ req := &proto.SearchRequest{
Query: query.QToProto(q),
Opts: opts.ToProto(),
}
@@ -121,7 +123,7 @@ func (z *zoektGRPCClient) List(ctx context.Context, q query.Q, opts *zoekt.ListO
return nil, z.dialErr
}
- req := &v1.ListRequest{
+ req := &proto.ListRequest{
Query: query.QToProto(q),
Opts: opts.ToProto(),
}
diff --git a/internal/search/backend/zoekt.go b/internal/search/backend/zoekt.go
index 056338a2c75bd..f3538e20f2e38 100644
--- a/internal/search/backend/zoekt.go
+++ b/internal/search/backend/zoekt.go
@@ -5,7 +5,7 @@ import (
"github.com/sourcegraph/log"
"github.com/sourcegraph/zoekt"
- "github.com/sourcegraph/zoekt/grpc/v1"
+ proto "github.com/sourcegraph/zoekt/grpc/protos/zoekt/webserver/v1"
"github.com/sourcegraph/zoekt/rpc"
zoektstream "github.com/sourcegraph/zoekt/stream"
"google.golang.org/grpc"
@@ -125,7 +125,7 @@ func ZoektDialGRPC(endpoint string) zoekt.Streamer {
)
return NewMeteredSearcher(endpoint, &zoektGRPCClient{
endpoint: endpoint,
- client: v1.NewWebserverServiceClient(conn),
+ client: proto.NewWebserverServiceClient(conn),
dialErr: err,
})
}
diff --git a/monitoring/definitions/frontend.go b/monitoring/definitions/frontend.go
index 9354fc7696f90..6112d57357a72 100644
--- a/monitoring/definitions/frontend.go
+++ b/monitoring/definitions/frontend.go
@@ -172,7 +172,7 @@ func Frontend() *monitoring.Dashboard {
Query: `histogram_quantile(0.9, sum by(le) (rate(src_http_request_duration_seconds_bucket{route="blob"}[10m])))`,
Critical: monitoring.Alert().GreaterOrEqual(5),
Panel: monitoring.Panel().LegendFormat("latency").Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: `
- Confirm that the Sourcegraph frontend has enough CPU/memory using the provisioning panels.
- Trace a request to see what the slowest part is: https://docs.sourcegraph.com/admin/observability/tracing
@@ -418,6 +418,7 @@ func Frontend() *monitoring.Dashboard {
shared.GRPCInternalErrorMetricsOptions{
HumanServiceName: "frontend",
RawGRPCServiceName: grpcZoektConfigurationServiceName,
+ Namespace: "", // intentionally empty
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcMethodVariable.Name),
}, monitoring.ObservableOwnerSearchCore),
@@ -468,7 +469,7 @@ func Frontend() *monitoring.Dashboard {
Query: `histogram_quantile(0.99, sum by (le,category)(rate(src_gitserver_request_duration_seconds_bucket{job=~"(sourcegraph-)?frontend"}[5m])))`,
Warning: monitoring.Alert().GreaterOrEqual(20),
Panel: monitoring.Panel().LegendFormat("{{category}}").Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: "none",
},
{
@@ -477,7 +478,7 @@ func Frontend() *monitoring.Dashboard {
Query: `sum by (category)(increase(src_gitserver_request_duration_seconds_count{job=~"(sourcegraph-)?frontend",code!~"2.."}[5m])) / ignoring(code) group_left sum by (category)(increase(src_gitserver_request_duration_seconds_count{job=~"(sourcegraph-)?frontend"}[5m])) * 100`,
Warning: monitoring.Alert().GreaterOrEqual(5).For(15 * time.Minute),
Panel: monitoring.Panel().LegendFormat("{{category}}").Unit(monitoring.Percentage),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: "none",
},
},
@@ -648,7 +649,7 @@ func Frontend() *monitoring.Dashboard {
Warning: monitoring.Alert().GreaterOrEqual(15000).For(5 * time.Minute),
Critical: monitoring.Alert().GreaterOrEqual(30000).For(5 * time.Minute),
Panel: monitoring.Panel().Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: `
- Revert recent commits that cause extensive listing from "external_services" and/or "user_external_accounts" tables.
`,
@@ -659,7 +660,7 @@ func Frontend() *monitoring.Dashboard {
Query: `min by (kubernetes_name) (src_encryption_cache_hit_total/(src_encryption_cache_hit_total+src_encryption_cache_miss_total))`,
NoAlert: true,
Panel: monitoring.Panel().Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `
- Encryption cache hit ratio (hits/(hits+misses)) - minimum across all instances of a workload.
`,
@@ -670,7 +671,7 @@ func Frontend() *monitoring.Dashboard {
Query: `sum by (kubernetes_name) (irate(src_encryption_cache_eviction_total[5m]))`,
NoAlert: true,
Panel: monitoring.Panel().Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `
- Rate of encryption cache evictions (caused by cache exceeding its maximum size) - sum across all instances of a workload
`,
@@ -1036,7 +1037,7 @@ func Frontend() *monitoring.Dashboard {
Query: "histogram_quantile(0.95, sum (rate(src_http_request_duration_seconds_bucket{route=~\"webhooks|github.webhooks|gitlab.webhooks|bitbucketServer.webhooks|bitbucketCloud.webhooks\"}[5m])) by (le, route))",
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("duration").Unit(monitoring.Seconds).With(monitoring.PanelOptions.NoLegend()),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `
p95 response time to incoming webhook requests from code hosts.
diff --git a/monitoring/definitions/git_server.go b/monitoring/definitions/git_server.go
index 17467ab83b82e..63e1b69b4d5a4 100644
--- a/monitoring/definitions/git_server.go
+++ b/monitoring/definitions/git_server.go
@@ -55,7 +55,7 @@ func GitServer() *monitoring.Dashboard {
Panel: monitoring.Panel().LegendFormat("{{container_label_io_kubernetes_pod_name}}").
Unit(monitoring.Bytes).
With(monitoring.PanelOptions.LegendOnRight()),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `
`,
},
@@ -66,7 +66,7 @@ func GitServer() *monitoring.Dashboard {
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{instance}}").
With(monitoring.PanelOptions.LegendOnRight()),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `
`,
},
@@ -80,7 +80,7 @@ func GitServer() *monitoring.Dashboard {
Panel: monitoring.Panel().LegendFormat("{{container_label_io_kubernetes_pod_name}}").
Unit(monitoring.Percentage).
With(monitoring.PanelOptions.LegendOnRight()),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `
`,
},
@@ -91,7 +91,7 @@ func GitServer() *monitoring.Dashboard {
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{container_label_io_kubernetes_pod_name}}").
With(monitoring.PanelOptions.LegendOnRight()),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `
`,
},
@@ -113,7 +113,7 @@ func GitServer() *monitoring.Dashboard {
Panel: monitoring.Panel().LegendFormat("{{instance}}").
Unit(monitoring.Percentage).
With(monitoring.PanelOptions.LegendOnRight()),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `
Indicates disk space remaining for each gitserver instance, which is used to determine when to start evicting least-used repository clones from disk (default 10%, configured by 'SRC_REPOS_DESIRED_PERCENT_FREE').
`,
@@ -132,7 +132,7 @@ func GitServer() *monitoring.Dashboard {
Panel: monitoring.Panel().LegendFormat("{{container_label_io_kubernetes_pod_name}}").
Unit(monitoring.ReadsPerSecond).
With(monitoring.PanelOptions.LegendOnRight()),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `
`,
},
@@ -144,7 +144,7 @@ func GitServer() *monitoring.Dashboard {
Panel: monitoring.Panel().LegendFormat("{{container_label_io_kubernetes_pod_name}}").
Unit(monitoring.WritesPerSecond).
With(monitoring.PanelOptions.LegendOnRight()),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `
`,
},
@@ -158,7 +158,7 @@ func GitServer() *monitoring.Dashboard {
Panel: monitoring.Panel().LegendFormat("{{container_label_io_kubernetes_pod_name}}").
Unit(monitoring.ReadsPerSecond).
With(monitoring.PanelOptions.LegendOnRight()),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `
`,
},
@@ -170,7 +170,7 @@ func GitServer() *monitoring.Dashboard {
Panel: monitoring.Panel().LegendFormat("{{container_label_io_kubernetes_pod_name}}").
Unit(monitoring.WritesPerSecond).
With(monitoring.PanelOptions.LegendOnRight()),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `
`,
},
@@ -184,7 +184,7 @@ func GitServer() *monitoring.Dashboard {
Panel: monitoring.Panel().LegendFormat("{{container_label_io_kubernetes_pod_name}}").
Unit(monitoring.ReadsPerSecond).
With(monitoring.PanelOptions.LegendOnRight()),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `
`,
},
@@ -196,7 +196,7 @@ func GitServer() *monitoring.Dashboard {
Panel: monitoring.Panel().LegendFormat("{{container_label_io_kubernetes_pod_name}}").
Unit(monitoring.WritesPerSecond).
With(monitoring.PanelOptions.LegendOnRight()),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `
`,
},
@@ -210,7 +210,7 @@ func GitServer() *monitoring.Dashboard {
Critical: monitoring.Alert().GreaterOrEqual(100).For(5 * time.Minute),
Panel: monitoring.Panel().LegendFormat("{{instance}} {{cmd}}").
With(monitoring.PanelOptions.LegendOnRight()),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `
A high value signals load.
`,
@@ -228,7 +228,7 @@ func GitServer() *monitoring.Dashboard {
Interpretation: "per second rate per command across all instances",
Panel: monitoring.Panel().LegendFormat("{{cmd}}").
With(monitoring.PanelOptions.LegendOnRight()),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
},
},
{
@@ -238,7 +238,7 @@ func GitServer() *monitoring.Dashboard {
Query: "sum(src_gitserver_clone_queue)",
Warning: monitoring.Alert().GreaterOrEqual(25),
Panel: monitoring.Panel().LegendFormat("queue size"),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: `
- **If you just added several repositories**, the warning may be expected.
- **Check which repositories need cloning**, by visiting e.g. https://sourcegraph.example.com/site-admin/repositories?filter=not-cloned
@@ -250,7 +250,7 @@ func GitServer() *monitoring.Dashboard {
Query: "sum(src_gitserver_lsremote_queue)",
Warning: monitoring.Alert().GreaterOrEqual(25),
Panel: monitoring.Panel().LegendFormat("queue size"),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: `
- **Check the code host status indicator for errors:** on the Sourcegraph app homepage, when signed in as an admin click the cloud icon in the top right corner of the page.
- **Check if the issue continues to happen after 30 minutes**, it may be temporary.
@@ -265,7 +265,7 @@ func GitServer() *monitoring.Dashboard {
Query: "max(src_gitserver_echo_duration_seconds)",
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("running commands").Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `
A high value here likely indicates a problem, especially if consistently high.
You can query for individual commands using 'sum by (cmd)(src_gitserver_exec_running)' in Grafana ('/-/debug/grafana') to see if a specific Git Server command might be spiking in frequency.
@@ -276,7 +276,7 @@ func GitServer() *monitoring.Dashboard {
- **Kubernetes and Docker Compose:** Check that you are running a similar number of git server replicas and that their CPU/memory limits are allocated according to what is shown in the [Sourcegraph resource estimator](../deploy/resource_estimator.md).
`,
},
- shared.FrontendInternalAPIErrorResponses("gitserver", monitoring.ObservableOwnerRepoManagement).Observable(),
+ shared.FrontendInternalAPIErrorResponses("gitserver", monitoring.ObservableOwnerSource).Observable(),
},
{
{
@@ -285,7 +285,7 @@ func GitServer() *monitoring.Dashboard {
Query: "src_gitserver_repo_count",
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("repo count"),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
MultiInstance: true,
Interpretation: `
This metric is only for informational purposes. It indicates the total number of repositories on gitserver.
@@ -309,7 +309,7 @@ func GitServer() *monitoring.Dashboard {
Query: "histogram_quantile(0.95, sum(rate(src_gitserver_gitservice_duration_seconds_bucket{type=`gitserver`, error=`false`}[5m])) by (le))",
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{le}}").Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `A high value means any internal service trying to clone a repo from gitserver is slowed down.`,
},
{
@@ -318,7 +318,7 @@ func GitServer() *monitoring.Dashboard {
Query: "histogram_quantile(0.95, sum(rate(src_gitserver_gitservice_duration_seconds_bucket{type=`gitserver`, error=`false`, instance=~`${shard:regex}`}[5m])) by (le, instance))",
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{instance}}").Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `A high value means any internal service trying to clone a repo from gitserver is slowed down.`,
},
},
@@ -329,7 +329,7 @@ func GitServer() *monitoring.Dashboard {
Query: "histogram_quantile(0.95, sum(rate(src_gitserver_gitservice_duration_seconds_bucket{type=`gitserver`, error=`true`}[5m])) by (le))",
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{le}}").Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `95th percentile gitservice error request duration aggregate`,
},
{
@@ -338,7 +338,7 @@ func GitServer() *monitoring.Dashboard {
Query: "histogram_quantile(0.95, sum(rate(src_gitserver_gitservice_duration_seconds_bucket{type=`gitserver`, error=`true`, instance=~`${shard:regex}`}[5m])) by (le, instance))",
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{instance}}").Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `95th percentile gitservice error request duration per shard`,
},
},
@@ -349,7 +349,7 @@ func GitServer() *monitoring.Dashboard {
Query: "sum(rate(src_gitserver_gitservice_duration_seconds_count{type=`gitserver`, error=`false`}[5m]))",
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("gitservers").Unit(monitoring.RequestsPerSecond),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `Aggregate gitservice request rate`,
},
{
@@ -358,7 +358,7 @@ func GitServer() *monitoring.Dashboard {
Query: "sum(rate(src_gitserver_gitservice_duration_seconds_count{type=`gitserver`, error=`false`, instance=~`${shard:regex}`}[5m]))",
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{instance}}").Unit(monitoring.RequestsPerSecond),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `Per shard gitservice request rate`,
},
},
@@ -369,7 +369,7 @@ func GitServer() *monitoring.Dashboard {
Query: "sum(rate(src_gitserver_gitservice_duration_seconds_count{type=`gitserver`, error=`true`}[5m]))",
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("gitservers").Unit(monitoring.RequestsPerSecond),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `Aggregate gitservice request error rate`,
},
{
@@ -378,7 +378,7 @@ func GitServer() *monitoring.Dashboard {
Query: "sum(rate(src_gitserver_gitservice_duration_seconds_count{type=`gitserver`, error=`true`, instance=~`${shard:regex}`}[5m]))",
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{instance}}").Unit(monitoring.RequestsPerSecond),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `Per shard gitservice request error rate`,
},
},
@@ -389,7 +389,7 @@ func GitServer() *monitoring.Dashboard {
Query: "sum(src_gitserver_gitservice_running{type=`gitserver`})",
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("gitservers").Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `Aggregate gitservice requests running`,
},
{
@@ -398,7 +398,7 @@ func GitServer() *monitoring.Dashboard {
Query: "sum(src_gitserver_gitservice_running{type=`gitserver`, instance=~`${shard:regex}`}) by (instance)",
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{instance}}").Unit(monitoring.RequestsPerSecond),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `Per shard gitservice requests running`,
},
},
@@ -415,7 +415,7 @@ func GitServer() *monitoring.Dashboard {
Query: "max by (instance) (src_gitserver_janitor_running)",
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("janitor process running").Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: "1, if the janitor process is currently running",
},
},
@@ -426,7 +426,7 @@ func GitServer() *monitoring.Dashboard {
Query: "histogram_quantile(0.95, sum(rate(src_gitserver_janitor_job_duration_seconds_bucket[5m])) by (le, job_name))",
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{job_name}}").Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: "95th percentile job run duration",
},
},
@@ -437,7 +437,7 @@ func GitServer() *monitoring.Dashboard {
Query: `sum by (job_name) (rate(src_gitserver_janitor_job_duration_seconds_count{success="false"}[5m]))`,
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{job_name}}").Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: "the rate of failures over 5m (by job)",
},
},
@@ -448,7 +448,7 @@ func GitServer() *monitoring.Dashboard {
Query: "sum by (instance) (rate(src_gitserver_repos_removed_disk_pressure[5m]))",
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{instance}}").Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: "Repositories removed due to disk pressure",
},
},
@@ -459,7 +459,7 @@ func GitServer() *monitoring.Dashboard {
Query: "sum by (instance) (increase(src_gitserver_non_existing_repos_removed[5m]))",
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{instance}}").Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: "Repositoriess removed because they are not defined in the DB",
},
},
@@ -470,7 +470,7 @@ func GitServer() *monitoring.Dashboard {
Query: `sum by (reason) (rate(src_gitserver_maintenance_status{success="true"}[1h]))`,
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{reason}}").Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: "the rate of successful sg maintenance jobs and the reason why they were triggered",
},
},
@@ -481,7 +481,7 @@ func GitServer() *monitoring.Dashboard {
Query: `sum by (skipped) (rate(src_gitserver_prune_status{success="true"}[1h]))`,
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("skipped={{skipped}}").Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: "the rate of successful git prune jobs over 1h and whether they were skipped",
},
},
@@ -544,7 +544,7 @@ func GitServer() *monitoring.Dashboard {
ServiceName: "gitserver",
InstanceFilterRegex: `${shard:regex}`,
},
- monitoring.ObservableOwnerRepoManagement,
+ monitoring.ObservableOwnerSource,
),
shared.NewGRPCServerMetricsGroup(
@@ -560,6 +560,7 @@ func GitServer() *monitoring.Dashboard {
shared.GRPCInternalErrorMetricsOptions{
HumanServiceName: "gitserver",
RawGRPCServiceName: grpcServiceName,
+ Namespace: "src",
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcMethodVariable.Name),
}, monitoring.ObservableOwnerSearchCore),
@@ -569,10 +570,10 @@ func GitServer() *monitoring.Dashboard {
shared.HTTP.NewHandlersGroup(containerName),
shared.NewDatabaseConnectionsMonitoringGroup(containerName),
- shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerRepoManagement, nil),
- shared.NewProvisioningIndicatorsGroup(containerName, monitoring.ObservableOwnerRepoManagement, provisioningIndicatorsOptions),
- shared.NewGolangMonitoringGroup(containerName, monitoring.ObservableOwnerRepoManagement, nil),
- shared.NewKubernetesMonitoringGroup(containerName, monitoring.ObservableOwnerRepoManagement, nil),
+ shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerSource, nil),
+ shared.NewProvisioningIndicatorsGroup(containerName, monitoring.ObservableOwnerSource, provisioningIndicatorsOptions),
+ shared.NewGolangMonitoringGroup(containerName, monitoring.ObservableOwnerSource, nil),
+ shared.NewKubernetesMonitoringGroup(containerName, monitoring.ObservableOwnerSource, nil),
},
}
}
diff --git a/monitoring/definitions/github_proxy.go b/monitoring/definitions/github_proxy.go
index 3661ff0f12f18..0c31220fc86ab 100644
--- a/monitoring/definitions/github_proxy.go
+++ b/monitoring/definitions/github_proxy.go
@@ -25,7 +25,7 @@ func GitHubProxy() *monitoring.Dashboard {
Query: `max(github_proxy_waiting_requests)`,
Warning: monitoring.Alert().GreaterOrEqual(100).For(5 * time.Minute),
Panel: monitoring.Panel().LegendFormat("requests waiting"),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: `
- **Check github-proxy logs for network connection issues.
- **Check github status.`,
diff --git a/monitoring/definitions/repo_updater.go b/monitoring/definitions/repo_updater.go
index 5bfc9c34d0274..c1f845a19fe13 100644
--- a/monitoring/definitions/repo_updater.go
+++ b/monitoring/definitions/repo_updater.go
@@ -54,7 +54,7 @@ func RepoUpdater() *monitoring.Dashboard {
Query: `max(timestamp(vector(time()))) - max(src_repoupdater_syncer_sync_last_time)`,
NoAlert: true,
Panel: monitoring.Panel().Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `
A high value here indicates issues synchronizing repo metadata.
If the value is persistently high, make sure all external services have valid tokens.
@@ -66,7 +66,7 @@ func RepoUpdater() *monitoring.Dashboard {
Query: `max(src_repoupdater_max_sync_backoff)`,
Critical: monitoring.Alert().GreaterOrEqual(syncDurationThreshold.Seconds()).For(10 * time.Minute),
Panel: monitoring.Panel().Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: fmt.Sprintf(`
An alert here indicates that no code host connections have synced in at least %v. This indicates that there could be a configuration issue
with your code hosts connections or networking issues affecting communication with your code hosts.
@@ -84,7 +84,7 @@ func RepoUpdater() *monitoring.Dashboard {
Warning: monitoring.Alert().Greater(0.5).For(10 * time.Minute),
Critical: monitoring.Alert().Greater(1).For(10 * time.Minute),
Panel: monitoring.Panel().LegendFormat("{{family}}").Unit(monitoring.Number).With(monitoring.PanelOptions.ZeroIfNoData()),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: `
An alert here indicates errors syncing site level repo metadata with code hosts. This indicates that there could be a configuration issue
with your code hosts connections or networking issues affecting communication with your code hosts.
@@ -103,7 +103,7 @@ func RepoUpdater() *monitoring.Dashboard {
Query: fmt.Sprintf(`max by (family) (rate(src_repoupdater_syncer_start_sync{family="Syncer.SyncExternalService"}[%s]))`, syncDurationThreshold.String()),
Warning: monitoring.Alert().LessOrEqual(0).For(syncDurationThreshold),
Panel: monitoring.Panel().LegendFormat("Family: {{family}} Owner: {{owner}}").Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: "Check repo-updater logs for errors.",
},
{
@@ -112,7 +112,7 @@ func RepoUpdater() *monitoring.Dashboard {
Query: `histogram_quantile(0.95, max by (le, family, success) (rate(src_repoupdater_syncer_sync_duration_seconds_bucket[1m])))`,
Warning: monitoring.Alert().GreaterOrEqual(30).For(5 * time.Minute),
Panel: monitoring.Panel().LegendFormat("{{family}}-{{success}}").Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: "Check the network latency is reasonable (<50ms) between the Sourcegraph and the code host",
},
{
@@ -121,7 +121,7 @@ func RepoUpdater() *monitoring.Dashboard {
Query: `histogram_quantile(0.95, max by (le) (rate(src_repoupdater_source_duration_seconds_bucket[1m])))`,
Warning: monitoring.Alert().GreaterOrEqual(30).For(5 * time.Minute),
Panel: monitoring.Panel().Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: "Check the network latency is reasonable (<50ms) between the Sourcegraph and the code host",
},
},
@@ -134,7 +134,7 @@ func RepoUpdater() *monitoring.Dashboard {
AggregateBy(monitoring.AggregatorMax).
For(syncDurationThreshold),
Panel: monitoring.Panel().LegendFormat("{{state}}").Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: "Check network connectivity to code hosts",
},
{
@@ -143,7 +143,7 @@ func RepoUpdater() *monitoring.Dashboard {
Query: `max(rate(src_repoupdater_source_repos_total[1m]))`,
Warning: monitoring.Alert().LessOrEqual(0).For(syncDurationThreshold),
Panel: monitoring.Panel().Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: "Check network connectivity to code hosts",
},
},
@@ -154,7 +154,7 @@ func RepoUpdater() *monitoring.Dashboard {
Query: `max(rate(src_repoupdater_purge_failed[1m]))`,
Warning: monitoring.Alert().Greater(0).For(5 * time.Minute),
Panel: monitoring.Panel().Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: "Check repo-updater's connectivity with gitserver and gitserver logs",
},
},
@@ -165,7 +165,7 @@ func RepoUpdater() *monitoring.Dashboard {
Query: `max(rate(src_repoupdater_sched_auto_fetch[1m]))`,
Warning: monitoring.Alert().LessOrEqual(0).For(syncDurationThreshold),
Panel: monitoring.Panel().Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: "Check repo-updater logs.",
},
{
@@ -174,7 +174,7 @@ func RepoUpdater() *monitoring.Dashboard {
Query: `max(rate(src_repoupdater_sched_manual_fetch[1m]))`,
NoAlert: true,
Panel: monitoring.Panel().Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: `
Check repo-updater logs if this value is persistently high.
This does not indicate anything if there are no user added code hosts.
@@ -188,7 +188,7 @@ func RepoUpdater() *monitoring.Dashboard {
Query: `max(src_repoupdater_sched_known_repos)`,
Warning: monitoring.Alert().LessOrEqual(0).For(10 * time.Minute),
Panel: monitoring.Panel().Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: "Check repo-updater logs. This is expected to fire if there are no user added code hosts",
},
{
@@ -198,7 +198,7 @@ func RepoUpdater() *monitoring.Dashboard {
// Alert if the derivative is positive for longer than 30 minutes
Critical: monitoring.Alert().Greater(0).For(120 * time.Minute),
Panel: monitoring.Panel().Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: "Check repo-updater logs for indications that the queue is not being processed. The queue length should trend downwards over time as items are sent to GitServer",
},
{
@@ -207,7 +207,7 @@ func RepoUpdater() *monitoring.Dashboard {
Query: `max(rate(src_repoupdater_sched_loops[1m]))`,
Warning: monitoring.Alert().LessOrEqual(0).For(syncDurationThreshold),
Panel: monitoring.Panel().Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: "Check repo-updater logs for errors. This is expected to fire if there are no user added code hosts",
},
},
@@ -218,7 +218,7 @@ func RepoUpdater() *monitoring.Dashboard {
Query: `max(src_repoupdater_stale_repos)`,
Warning: monitoring.Alert().GreaterOrEqual(1).For(25 * time.Minute),
Panel: monitoring.Panel().Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: `
Check repo-updater logs for errors.
Check for rows in gitserver_repos where LastError is not an empty string.
@@ -230,7 +230,7 @@ func RepoUpdater() *monitoring.Dashboard {
Query: `max(rate(src_repoupdater_sched_error[1m]))`,
Critical: monitoring.Alert().GreaterOrEqual(1).For(25 * time.Minute),
Panel: monitoring.Panel().Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: "Check repo-updater logs for errors",
},
},
@@ -423,7 +423,7 @@ func RepoUpdater() *monitoring.Dashboard {
Query: `max(src_repoupdater_external_services_total)`,
Critical: monitoring.Alert().GreaterOrEqual(20000).For(1 * time.Hour),
Panel: monitoring.Panel().Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: "Check for spikes in external services, could be abuse",
},
},
@@ -434,7 +434,7 @@ func RepoUpdater() *monitoring.Dashboard {
Query: `max(src_repoupdater_queued_sync_jobs_total)`,
Warning: monitoring.Alert().GreaterOrEqual(100).For(1 * time.Hour),
Panel: monitoring.Panel().Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: `
- **Check if jobs are failing to sync:** "SELECT * FROM external_service_sync_jobs WHERE state = 'errored'";
- **Increase the number of workers** using the 'repoConcurrentExternalServiceSyncers' site config.
@@ -446,7 +446,7 @@ func RepoUpdater() *monitoring.Dashboard {
Query: `max(src_repoupdater_completed_sync_jobs_total)`,
Warning: monitoring.Alert().GreaterOrEqual(100000).For(1 * time.Hour),
Panel: monitoring.Panel().Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: "Check repo-updater logs. Jobs older than 1 day should have been removed.",
},
{
@@ -455,7 +455,7 @@ func RepoUpdater() *monitoring.Dashboard {
Query: `max(src_repoupdater_errored_sync_jobs_percentage)`,
Warning: monitoring.Alert().Greater(10).For(1 * time.Hour),
Panel: monitoring.Panel().Unit(monitoring.Percentage),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: "Check repo-updater logs. Check code host connectivity",
},
},
@@ -467,7 +467,7 @@ func RepoUpdater() *monitoring.Dashboard {
// 5% of initial limit of 5000
Warning: monitoring.Alert().LessOrEqual(250),
Panel: monitoring.Panel().LegendFormat("{{name}}"),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: `
- Consider creating a new token for the indicated resource (the 'name' label for series below the threshold in the dashboard) under a dedicated machine user to reduce rate limit pressure.
`,
@@ -479,7 +479,7 @@ func RepoUpdater() *monitoring.Dashboard {
// 5% of initial limit of 5000
Warning: monitoring.Alert().LessOrEqual(250),
Panel: monitoring.Panel().LegendFormat("{{name}}"),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: `
- Consider creating a new token for the indicated resource (the 'name' label for series below the threshold in the dashboard) under a dedicated machine user to reduce rate limit pressure.
`,
@@ -490,7 +490,7 @@ func RepoUpdater() *monitoring.Dashboard {
Query: `max by (name) (src_github_rate_limit_remaining_v2{resource="search"})`,
Warning: monitoring.Alert().LessOrEqual(5),
Panel: monitoring.Panel().LegendFormat("{{name}}"),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: `
- Consider creating a new token for the indicated resource (the 'name' label for series below the threshold in the dashboard) under a dedicated machine user to reduce rate limit pressure.
`,
@@ -502,7 +502,7 @@ func RepoUpdater() *monitoring.Dashboard {
Description: "time spent waiting for the GitHub graphql API rate limiter",
Query: `max by(name) (rate(src_github_rate_limit_wait_duration_seconds{resource="graphql"}[5m]))`,
Panel: monitoring.Panel().LegendFormat("{{name}}").Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NoAlert: true,
Interpretation: "Indicates how long we're waiting on the rate limit once it has been exceeded",
},
@@ -511,7 +511,7 @@ func RepoUpdater() *monitoring.Dashboard {
Description: "time spent waiting for the GitHub rest API rate limiter",
Query: `max by(name) (rate(src_github_rate_limit_wait_duration_seconds{resource="rest"}[5m]))`,
Panel: monitoring.Panel().LegendFormat("{{name}}").Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NoAlert: true,
Interpretation: "Indicates how long we're waiting on the rate limit once it has been exceeded",
},
@@ -520,7 +520,7 @@ func RepoUpdater() *monitoring.Dashboard {
Description: "time spent waiting for the GitHub search API rate limiter",
Query: `max by(name) (rate(src_github_rate_limit_wait_duration_seconds{resource="search"}[5m]))`,
Panel: monitoring.Panel().LegendFormat("{{name}}").Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NoAlert: true,
Interpretation: "Indicates how long we're waiting on the rate limit once it has been exceeded",
},
@@ -533,7 +533,7 @@ func RepoUpdater() *monitoring.Dashboard {
// 5% of initial limit of 600
Critical: monitoring.Alert().LessOrEqual(30),
Panel: monitoring.Panel().LegendFormat("{{name}}"),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NextSteps: `Try restarting the pod to get a different public IP.`,
},
{
@@ -541,7 +541,7 @@ func RepoUpdater() *monitoring.Dashboard {
Description: "time spent waiting for the GitLab rest API rate limiter",
Query: `max by (name) (rate(src_gitlab_rate_limit_wait_duration_seconds{resource="rest"}[5m]))`,
Panel: monitoring.Panel().LegendFormat("{{name}}").Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NoAlert: true,
Interpretation: "Indicates how long we're waiting on the rate limit once it has been exceeded",
},
@@ -552,7 +552,7 @@ func RepoUpdater() *monitoring.Dashboard {
Description: "95th percentile time spent successfully waiting on our internal rate limiter",
Query: `histogram_quantile(0.95, sum(rate(src_internal_rate_limit_wait_duration_bucket{failed="false"}[5m])) by (le, urn))`,
Panel: monitoring.Panel().LegendFormat("{{urn}}").Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NoAlert: true,
Interpretation: "Indicates how long we're waiting on our internal rate limiter when communicating with a code host",
},
@@ -561,7 +561,7 @@ func RepoUpdater() *monitoring.Dashboard {
Description: "rate of failures waiting on our internal rate limiter",
Query: `sum by (urn) (rate(src_internal_rate_limit_wait_duration_count{failed="true"}[5m]))`,
Panel: monitoring.Panel().LegendFormat("{{urn}}"),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
NoAlert: true,
Interpretation: "The rate at which we fail our internal rate limiter.",
},
@@ -582,23 +582,24 @@ func RepoUpdater() *monitoring.Dashboard {
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcMethodVariable.Name),
InstanceFilterRegex: `${instance:regex}`,
- }, monitoring.ObservableOwnerRepoManagement),
+ }, monitoring.ObservableOwnerSource),
shared.NewGRPCInternalErrorMetricsGroup(
shared.GRPCInternalErrorMetricsOptions{
HumanServiceName: "repo_updater",
RawGRPCServiceName: grpcServiceName,
+ Namespace: "src",
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcMethodVariable.Name),
- }, monitoring.ObservableOwnerRepoManagement),
+ }, monitoring.ObservableOwnerSource),
shared.HTTP.NewHandlersGroup(containerName),
- shared.NewFrontendInternalAPIErrorResponseMonitoringGroup(containerName, monitoring.ObservableOwnerRepoManagement, nil),
+ shared.NewFrontendInternalAPIErrorResponseMonitoringGroup(containerName, monitoring.ObservableOwnerSource, nil),
shared.NewDatabaseConnectionsMonitoringGroup(containerName),
- shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerRepoManagement, containerMonitoringOptions),
- shared.NewProvisioningIndicatorsGroup(containerName, monitoring.ObservableOwnerRepoManagement, nil),
- shared.NewGolangMonitoringGroup(containerName, monitoring.ObservableOwnerRepoManagement, nil),
- shared.NewKubernetesMonitoringGroup(containerName, monitoring.ObservableOwnerRepoManagement, nil),
+ shared.NewContainerMonitoringGroup(containerName, monitoring.ObservableOwnerSource, containerMonitoringOptions),
+ shared.NewProvisioningIndicatorsGroup(containerName, monitoring.ObservableOwnerSource, nil),
+ shared.NewGolangMonitoringGroup(containerName, monitoring.ObservableOwnerSource, nil),
+ shared.NewKubernetesMonitoringGroup(containerName, monitoring.ObservableOwnerSource, nil),
},
}
}
diff --git a/monitoring/definitions/searcher.go b/monitoring/definitions/searcher.go
index 4e0758ccc0c71..fea05aabe70ac 100644
--- a/monitoring/definitions/searcher.go
+++ b/monitoring/definitions/searcher.go
@@ -235,6 +235,7 @@ regularly above 0 it is a sign for further investigation.`,
shared.GRPCInternalErrorMetricsOptions{
HumanServiceName: "searcher",
RawGRPCServiceName: grpcServiceName,
+ Namespace: "src",
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcMethodVariable.Name),
}, monitoring.ObservableOwnerSearchCore),
diff --git a/monitoring/definitions/shared/gitserver.go b/monitoring/definitions/shared/gitserver.go
index e2122b34a960c..e786c54a30bc9 100644
--- a/monitoring/definitions/shared/gitserver.go
+++ b/monitoring/definitions/shared/gitserver.go
@@ -14,7 +14,7 @@ type gitServer struct{}
// src_gitserver_api_duration_seconds_bucket
// src_gitserver_api_errors_total
func (gitServer) NewAPIGroup(containerName string) monitoring.Group {
- return Observation.NewGroup(containerName, monitoring.ObservableOwnerRepoManagement, ObservationGroupOptions{
+ return Observation.NewGroup(containerName, monitoring.ObservableOwnerSource, ObservationGroupOptions{
GroupConstructorOptions: GroupConstructorOptions{
Namespace: "gitserver",
DescriptionRoot: "Gitserver API (powered by internal/observation)",
@@ -46,7 +46,7 @@ func (gitServer) NewAPIGroup(containerName string) monitoring.Group {
// src_gitserver_client_duration_seconds_bucket
// src_gitserver_client_errors_total
func (gitServer) NewClientGroup(containerName string) monitoring.Group {
- return Observation.NewGroup(containerName, monitoring.ObservableOwnerRepoManagement, ObservationGroupOptions{
+ return Observation.NewGroup(containerName, monitoring.ObservableOwnerSource, ObservationGroupOptions{
GroupConstructorOptions: GroupConstructorOptions{
Namespace: "gitserver",
DescriptionRoot: "Gitserver Client",
@@ -84,7 +84,7 @@ func (gitServer) NewBatchLogSemaphoreWait(containerName string) monitoring.Group
NoAlertsOption("none")(Observation.Duration(ObservableConstructorOptions{
MetricNameRoot: "batch_log_semaphore_wait",
MetricDescriptionRoot: "batch log semaphore",
- })(containerName, monitoring.ObservableOwnerRepoManagement)).Observable(),
+ })(containerName, monitoring.ObservableOwnerSource)).Observable(),
},
},
}
diff --git a/monitoring/definitions/shared/grpc.go b/monitoring/definitions/shared/grpc.go
index 05bf2597b7717..8cdac4a8bb342 100644
--- a/monitoring/definitions/shared/grpc.go
+++ b/monitoring/definitions/shared/grpc.go
@@ -289,6 +289,9 @@ type GRPCInternalErrorMetricsOptions struct {
//
// Example: (Search | Exec)
MethodFilterRegex string
+
+ // Namespace is the Prometheus metrics namespace for metrics emitted by this service.
+ Namespace string
}
// NewGRPCInternalErrorMetricsGroup creates a Group containing metrics that track "internal" gRPC errors.
@@ -296,6 +299,10 @@ func NewGRPCInternalErrorMetricsGroup(opts GRPCInternalErrorMetricsOptions, owne
metric := func(base string, labelFilters ...string) string {
m := base
+ if opts.Namespace != "" {
+ m = fmt.Sprintf("%s_%s", opts.Namespace, m)
+ }
+
if len(labelFilters) > 0 {
m = fmt.Sprintf("%s{%s}", m, strings.Join(labelFilters, ","))
}
diff --git a/monitoring/definitions/shared/http.go b/monitoring/definitions/shared/http.go
index 095450f71579e..73096da543163 100644
--- a/monitoring/definitions/shared/http.go
+++ b/monitoring/definitions/shared/http.go
@@ -22,7 +22,7 @@ func (http) NewHandlersGroup(name string) monitoring.Group {
Query: fmt.Sprintf("sum by (route) (rate(src_http_request_duration_seconds_count{app=\"%s\",code=~\"2..\"}[5m]))", name),
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{route}}").Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: "The number of healthy HTTP requests per second to internal HTTP api",
},
{
@@ -31,7 +31,7 @@ func (http) NewHandlersGroup(name string) monitoring.Group {
Query: fmt.Sprintf("sum by (route) (rate(src_http_request_duration_seconds_count{app=\"%s\",code!~\"2..\"}[5m]))", name),
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{route}}").Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: "The number of unhealthy HTTP requests per second to internal HTTP api",
},
{
@@ -40,7 +40,7 @@ func (http) NewHandlersGroup(name string) monitoring.Group {
Query: fmt.Sprintf("sum by (code) (rate(src_http_request_duration_seconds_count{app=\"%s\"}[5m]))", name),
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{code}}").Unit(monitoring.Number),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: "The number of HTTP requests per second by code",
},
},
@@ -51,7 +51,7 @@ func (http) NewHandlersGroup(name string) monitoring.Group {
Query: fmt.Sprintf("histogram_quantile(0.95, sum(rate(src_http_request_duration_seconds_bucket{app=\"%s\",code=~\"2..\"}[5m])) by (le, route))", name),
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{route}}").Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: "The 95th percentile duration by route when the status code is 200 ",
},
{
@@ -60,7 +60,7 @@ func (http) NewHandlersGroup(name string) monitoring.Group {
Query: fmt.Sprintf("histogram_quantile(0.95, sum(rate(src_http_request_duration_seconds_bucket{app=\"%s\",code!~\"2..\"}[5m])) by (le, route))", name),
NoAlert: true,
Panel: monitoring.Panel().LegendFormat("{{route}}").Unit(monitoring.Seconds),
- Owner: monitoring.ObservableOwnerRepoManagement,
+ Owner: monitoring.ObservableOwnerSource,
Interpretation: "The 95th percentile duration by route when the status code is not 200 ",
},
},
diff --git a/monitoring/definitions/symbols.go b/monitoring/definitions/symbols.go
index 85100483ec788..e4c9a029025c7 100644
--- a/monitoring/definitions/symbols.go
+++ b/monitoring/definitions/symbols.go
@@ -52,6 +52,7 @@ func Symbols() *monitoring.Dashboard {
shared.GRPCInternalErrorMetricsOptions{
HumanServiceName: containerName,
RawGRPCServiceName: grpcServiceName,
+ Namespace: "src",
MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcMethodVariable.Name),
}, monitoring.ObservableOwnerCodeIntel),
diff --git a/monitoring/definitions/worker.go b/monitoring/definitions/worker.go
index b025a051f3cd1..78dc6aca59279 100644
--- a/monitoring/definitions/worker.go
+++ b/monitoring/definitions/worker.go
@@ -93,7 +93,7 @@ func Worker() *monitoring.Dashboard {
Panel: monitoring.Panel().LegendFormat("{{tableName}}").Unit(monitoring.Percentage).Min(0).Max(100),
Owner: owner,
}
- }(monitoring.ObservableOwnerRepoManagement).WithNoAlerts(`
+ }(monitoring.ObservableOwnerSource).WithNoAlerts(`
Percentage of encrypted database records
`).Observable(),
@@ -101,7 +101,7 @@ func Worker() *monitoring.Dashboard {
MetricNameRoot: "records_encrypted",
MetricDescriptionRoot: "database",
By: []string{"tableName"},
- })(containerName, monitoring.ObservableOwnerRepoManagement).WithNoAlerts(`
+ })(containerName, monitoring.ObservableOwnerSource).WithNoAlerts(`
Number of encrypted database records every 5m
`).Observable(),
@@ -109,14 +109,14 @@ func Worker() *monitoring.Dashboard {
MetricNameRoot: "records_decrypted",
MetricDescriptionRoot: "database",
By: []string{"tableName"},
- })(containerName, monitoring.ObservableOwnerRepoManagement).WithNoAlerts(`
+ })(containerName, monitoring.ObservableOwnerSource).WithNoAlerts(`
Number of encrypted database records every 5m
`).Observable(),
shared.Observation.Errors(shared.ObservableConstructorOptions{
MetricNameRoot: "record_encryption",
MetricDescriptionRoot: "encryption",
- })(containerName, monitoring.ObservableOwnerRepoManagement).WithNoAlerts(`
+ })(containerName, monitoring.ObservableOwnerSource).WithNoAlerts(`
Number of database record encryption/decryption errors every 5m
`).Observable(),
},
diff --git a/monitoring/definitions/zoekt.go b/monitoring/definitions/zoekt.go
index 5d7b2037d418b..a8c82f28fd12d 100644
--- a/monitoring/definitions/zoekt.go
+++ b/monitoring/definitions/zoekt.go
@@ -15,8 +15,11 @@ func Zoekt() *monitoring.Dashboard {
indexServerContainerName = "zoekt-indexserver"
webserverContainerName = "zoekt-webserver"
bundledContainerName = "indexed-search"
+ grpcServiceName = "zoekt.webserver.v1.WebserverService"
)
+ grpcMethodVariable := shared.GRPCMethodVariable(grpcServiceName)
+
return &monitoring.Dashboard{
Name: "zoekt",
Title: "Zoekt",
@@ -33,6 +36,17 @@ func Zoekt() *monitoring.Dashboard {
},
Multi: true,
},
+ {
+ Label: "Webserver Instance",
+ Name: "webserver_instance",
+ OptionsLabelValues: monitoring.ContainerVariableOptionsLabelValues{
+ Query: "zoekt_webserver_watchdog_errors",
+ LabelName: "instance",
+ ExampleOption: "zoekt-webserver-0:6072",
+ },
+ Multi: true,
+ },
+ grpcMethodVariable,
},
Groups: []monitoring.Group{
{
@@ -1055,6 +1069,25 @@ func Zoekt() *monitoring.Dashboard {
},
},
},
+
+ shared.NewGRPCServerMetricsGroup(
+ shared.GRPCServerMetricsOptions{
+ HumanServiceName: "zoekt-webserver",
+ RawGRPCServiceName: grpcServiceName,
+
+ MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcMethodVariable.Name),
+ InstanceFilterRegex: `${webserver_instance:regex}`,
+ }, monitoring.ObservableOwnerSearchCore),
+
+ shared.NewGRPCInternalErrorMetricsGroup(
+ shared.GRPCInternalErrorMetricsOptions{
+ HumanServiceName: "zoekt-webserver",
+ RawGRPCServiceName: grpcServiceName,
+ Namespace: "", // deliberately empty
+
+ MethodFilterRegex: fmt.Sprintf("${%s:regex}", grpcMethodVariable.Name),
+ }, monitoring.ObservableOwnerSearchCore),
+
shared.NewDiskMetricsGroup(
shared.DiskMetricsGroupOptions{
DiskTitle: "data",
diff --git a/monitoring/monitoring/monitoring.go b/monitoring/monitoring/monitoring.go
index f3125716219d1..54f0241c9be2a 100644
--- a/monitoring/monitoring/monitoring.go
+++ b/monitoring/monitoring/monitoring.go
@@ -501,10 +501,10 @@ var (
handbookSlug: "security",
teamName: "Security",
}
- ObservableOwnerRepoManagement = ObservableOwner{
- identifier: "repo-management",
- handbookSlug: "repo-management",
- teamName: "Repo Management",
+ ObservableOwnerSource = ObservableOwner{
+ identifier: "source",
+ handbookSlug: "source",
+ teamName: "Source",
}
ObservableOwnerCodeInsights = ObservableOwner{
identifier: "code-insights",