diff --git a/antarest/core/config.py b/antarest/core/config.py index 86acdd8bfd..84e144dfe6 100644 --- a/antarest/core/config.py +++ b/antarest/core/config.py @@ -365,6 +365,36 @@ def from_dict(data: JSON) -> "ServerConfig": ) +@dataclass(frozen=True) +class PrometheusConfig: + """ + Sub config object dedicated to prometheus metrics + """ + + multiprocess: bool = False + + @classmethod + def from_dict(cls, data: JSON) -> "PrometheusConfig": + return cls(multiprocess=bool(data["multiprocess"])) + + +@dataclass(frozen=True) +class MetricsConfig: + """ + Sub config object dedicated to metrics + """ + + prometheus: Optional[PrometheusConfig] = None + + @classmethod + def from_dict(cls, data: JSON) -> "MetricsConfig": + return cls( + prometheus=PrometheusConfig.from_dict(data["prometheus"]) + if "prometheus" in data + else None + ) + + @dataclass(frozen=True) class Config: """ @@ -383,6 +413,7 @@ class Config: eventbus: EventBusConfig = EventBusConfig() cache: CacheConfig = CacheConfig() tasks: TaskConfig = TaskConfig() + metrics: MetricsConfig = MetricsConfig() root_path: str = "" @staticmethod @@ -421,6 +452,9 @@ def from_dict(data: JSON, res: Optional[Path] = None) -> "Config": server=ServerConfig.from_dict(data["server"]) if "server" in data else ServerConfig(), + metrics=MetricsConfig.from_dict(data["metrics"]) + if "metrics" in data + else MetricsConfig(), ) @staticmethod diff --git a/antarest/core/exceptions.py b/antarest/core/exceptions.py index 5a0476751f..c04ddd38b6 100644 --- a/antarest/core/exceptions.py +++ b/antarest/core/exceptions.py @@ -224,3 +224,10 @@ def __init__(self) -> None: HTTPStatus.BAD_REQUEST, "You cannot scan the default internal workspace", ) + + +class ConfigurationError(RuntimeError): + """ + Raised when some configuration is invalid. + """ + pass diff --git a/antarest/core/metrics.py b/antarest/core/metrics.py new file mode 100644 index 0000000000..b88ab760ad --- /dev/null +++ b/antarest/core/metrics.py @@ -0,0 +1,91 @@ +import logging +import os +import time + +import prometheus_client +from fastapi import FastAPI +from prometheus_client import ( + CollectorRegistry, + Counter, + Histogram, + make_asgi_app, +) +from prometheus_client import multiprocess +from starlette.requests import Request + +from antarest.core.config import Config +from antarest.core.exceptions import ConfigurationError + +logger = logging.getLogger(__name__) + + +_PROMETHEUS_MULTIPROCESS_ENV_VAR = "PROMETHEUS_MULTIPROC_DIR" + + +def _add_metrics_middleware( + application: FastAPI, registry: CollectorRegistry, worker_id: str +): + """ + Registers an HTTP middleware to report metrics about requests count and duration + """ + + request_counter = Counter( + "request_count", + "App Request Count", + ["worker_id", "method", "endpoint", "http_status"], + registry=registry, + ) + request_duration_histo = Histogram( + "request_duration_seconds", + "Request duration", + ["worker_id", "method", "endpoint", "http_status"], + registry=registry, + ) + + @application.middleware("http") + async def add_metrics(request: Request, call_next): + start_time = time.time() + response = await call_next(request) + process_time = time.time() - start_time + + if "route" in request.scope: + request_path = ( + request.scope["root_path"] + request.scope["route"].path + ) + else: + request_path = request.url.path + + request_counter.labels( + worker_id, request.method, request_path, response.status_code + ).inc() + request_duration_histo.labels( + worker_id, request.method, request_path, response.status_code + ).observe(process_time) + return response + + +def add_metrics(application: FastAPI, config: Config) -> None: + """ + If configured, adds "/metrics" endpoint to report metrics to prometheus. + Also registers metrics for HTTP requests. + """ + prometheus_config = config.metrics.prometheus + if not prometheus_config: + return + + if prometheus_config.multiprocess: + if _PROMETHEUS_MULTIPROCESS_ENV_VAR not in os.environ: + raise ConfigurationError( + f"Environment variable {_PROMETHEUS_MULTIPROCESS_ENV_VAR} must be defined for use of prometheus in a multiprocess environment" + ) + registry = CollectorRegistry() + multiprocess.MultiProcessCollector(registry) + worker_id = os.environ["APP_WORKER_ID"] + else: + registry = prometheus_client.REGISTRY + worker_id = "0" + + metrics_app = make_asgi_app(registry=registry) + application.mount("/metrics", metrics_app) + + _add_metrics_middleware(application, registry, worker_id) diff --git a/antarest/main.py b/antarest/main.py index 217022f162..90dfa711af 100644 --- a/antarest/main.py +++ b/antarest/main.py @@ -21,6 +21,7 @@ from antarest.core.config import Config from antarest.core.core_blueprint import create_utils_routes from antarest.core.logging.utils import configure_logger, LoggingMiddleware +from antarest.core.metrics import add_metrics from antarest.core.requests import RATE_LIMIT_CONFIG from antarest.core.swagger import customize_openapi from antarest.core.utils.utils import get_local_path @@ -135,6 +136,8 @@ def fastapi_app( application.add_middleware(LoggingMiddleware) + add_metrics(application, config) + if mount_front: application.mount( "/static", @@ -273,7 +276,7 @@ def handle_all_exception(request: Request, exc: Exception) -> Any: return application, services -if __name__ == "__main__": +def main(): ( config_file, display_version, @@ -297,3 +300,7 @@ def handle_all_exception(request: Request, exc: Exception) -> Any: else: services = SingletonServices(config_file, [module]) services.start() + + +if __name__ == "__main__": + main() diff --git a/conf/gunicorn.py b/conf/gunicorn.py index 49d0020160..8dc222700b 100644 --- a/conf/gunicorn.py +++ b/conf/gunicorn.py @@ -2,6 +2,7 @@ import multiprocessing import os +from prometheus_client import multiprocess bind = "0.0.0.0:5000" @@ -28,3 +29,17 @@ errorlog = "-" accesslog = "-" preload_app = False + + +def post_fork(server, worker): + """ + Put the worker_id into an env variable for further use within the app. + """ + os.environ["APP_WORKER_ID"] = str(worker.age) + + +def child_exit(server, worker): + """ + Notify prometheus that this worker stops + """ + multiprocess.mark_process_dead(worker.pid) diff --git a/docker-compose.yml b/docker-compose.yml index a18e3bc817..9fcd6a8ff8 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -49,9 +49,13 @@ services: volumes: - ./resources/deploy/db:/var/lib/postgresql/data command: [ "postgres", "-c", "log_statement=all", "-c", "log_destination=stderr" ] + ports: + - 5432:5432 redis: image: redis:latest container_name : redis + ports: + - 6379:6379 nginx: image: nginx:latest container_name: nginx @@ -62,4 +66,4 @@ services: volumes: - ./resources/deploy/nginx.conf:/etc/nginx/conf.d/default.conf:ro - ./webapp/build:/www - - ./resources/deploy/web.config.json:/www/config.json:ro \ No newline at end of file + - ./resources/deploy/web.config.json:/www/config.json:ro diff --git a/requirements.txt b/requirements.txt index 06f220802c..baf452224d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,7 +8,7 @@ checksumdir~=1.2.0 click~=8.0.3 contextvars~=2.4 fastapi-jwt-auth~=0.5.0 -fastapi[all]~=0.73.0 +fastapi[all]~=0.74.0 filelock~=3.4.2 gunicorn~=20.1.0 Jinja2~=3.0.3 @@ -18,6 +18,7 @@ MarkupSafe~=2.0.1 numpy~=1.22.1 pandas~=1.4.0 plyer~=2.0.0 +prometheus-client~=0.16.0 psycopg2-binary==2.9.4 pydantic~=1.9.0 PyQt5~=5.15.6 diff --git a/scripts/start.sh b/scripts/start.sh index 966fa187e0..0d1c87d8da 100755 --- a/scripts/start.sh +++ b/scripts/start.sh @@ -5,6 +5,12 @@ set -e CURDIR=$(cd `dirname $0` && pwd) BASEDIR=`dirname $CURDIR` +if [[ -v PROMETHEUS_MULTIPROC_DIR ]]; then + rm ${PROMETHEUS_MULTIPROC_DIR}/*.db + mkdir -p ${PROMETHEUS_MULTIPROC_DIR} + echo "Concatenating metrics into ${PROMETHEUS_MULTIPROC_DIR}" +fi + if [ -z "$1" ] ; then sh $CURDIR/pre-start.sh gunicorn --config $BASEDIR/conf/gunicorn.py --worker-class=uvicorn.workers.UvicornWorker antarest.wsgi:app