From b206c2b7ceb9fa334bff7ff046ca4b0f705034a5 Mon Sep 17 00:00:00 2001 From: Daniel Dietzler Date: Wed, 23 Oct 2024 22:07:49 +0200 Subject: [PATCH] refactor(server)!: telemetry env variables Co-authored-by: Mert <101130780+mertalev@users.noreply.github.com> --- docs/docs/features/monitoring.md | 4 +- docs/docs/install/environment-variables.md | 13 ++---- server/src/enum.ts | 8 ++++ server/src/interfaces/config.interface.ts | 8 +--- .../repositories/config.repository.spec.ts | 45 ++++++++----------- server/src/repositories/config.repository.ts | 43 +++++++++--------- .../src/repositories/telemetry.repository.ts | 15 ++++--- server/src/workers/api.ts | 2 +- server/src/workers/microservices.ts | 2 +- .../repositories/config.repository.mock.ts | 6 +-- 10 files changed, 67 insertions(+), 79 deletions(-) diff --git a/docs/docs/features/monitoring.md b/docs/docs/features/monitoring.md index 9de3feb7f6d79..184394abd047e 100644 --- a/docs/docs/features/monitoring.md +++ b/docs/docs/features/monitoring.md @@ -25,10 +25,10 @@ The metrics in immich are grouped into API (endpoint calls and response times), ### Configuration -Immich will not expose an endpoint for metrics by default. To enable this endpoint, you can add the `IMMICH_METRICS=true` environmental variable to your `.env` file. Note that only the server and microservices containers currently use this variable. +Immich will not expose an endpoint for metrics by default. To enable this endpoint, you can add the `IMMICH_TELEMETRY_INCLUDE=all` environmental variable to your `.env` file. Note that only the server container currently use this variable. :::tip -`IMMICH_METRICS` enables all metrics, but there are also [environmental variables](/docs/install/environment-variables.md#prometheus) to toggle specific metric groups. If you'd like to only expose certain kinds of metrics, you can set only those environmental variables to `true`. Explicitly setting the environmental variable for a metric group overrides `IMMICH_METRICS` for that group. For example, setting `IMMICH_METRICS=true` and `IMMICH_API_METRICS=false` will enable all metrics except API metrics. +`IMMICH_TELEMETRY_INCLUDE=all` enables all metrics. For a more granular configuration you can enumerate the telemetry metrics that should be included as a comma separated list (e.g. `IMMICH_TELEMETRY_INCLUDE=repo,api`). Alternatively, you can also exclude specific metrics with `IMMICH_TELEMETRY_EXCLUDE`. For more information refer to the [environment section](/docs/install/environment-variables.md#prometheus). ::: The next step is to configure a new or existing Prometheus instance to scrape this endpoint. The following steps assume that you do not have an existing Prometheus instance, but the steps will be similar either way. diff --git a/docs/docs/install/environment-variables.md b/docs/docs/install/environment-variables.md index e86199dc74d99..1f34b5c6d00a4 100644 --- a/docs/docs/install/environment-variables.md +++ b/docs/docs/install/environment-variables.md @@ -183,15 +183,10 @@ Other machine learning parameters can be tuned from the admin UI. ## Prometheus -| Variable | Description | Default | Containers | Workers | -| :----------------------------- | :-------------------------------------------------------------------------------------------- | :-----: | :--------- | :----------------- | -| `IMMICH_METRICS`\*1 | Toggle all metrics (one of [`true`, `false`]) | | server | api, microservices | -| `IMMICH_API_METRICS` | Toggle metrics for endpoints and response times (one of [`true`, `false`]) | | server | api, microservices | -| `IMMICH_HOST_METRICS` | Toggle metrics for CPU and memory utilization for host and process (one of [`true`, `false`]) | | server | api, microservices | -| `IMMICH_IO_METRICS` | Toggle metrics for database queries, image processing, etc. (one of [`true`, `false`]) | | server | api, microservices | -| `IMMICH_JOB_METRICS` | Toggle metrics for jobs and queues (one of [`true`, `false`]) | | server | api, microservices | - -\*1: Overridden for a metric group when its corresponding environmental variable is set. +| Variable | Description | Default | Containers | Workers | +| :------------------------- | :-------------------------------------------------------------------------------------------------------------------- | :-----: | :--------- | :----------------- | +| `IMMICH_TELEMETRY_INCLUDE` | Collect these telemetries. List of `host`, `api`, `io`, `repo`, `job`. Note: You can also specify `all` to enable all | | server | api, microservices | +| `IMMICH_TELEMETRY_EXCLUDE` | Do not collect these telemetries. List of `host`, `api`, `io`, `repo`, `job` | | server | api, microservices | ## Docker Secrets diff --git a/server/src/enum.ts b/server/src/enum.ts index 902d6635e7f52..1212f41ab07b2 100644 --- a/server/src/enum.ts +++ b/server/src/enum.ts @@ -363,3 +363,11 @@ export enum ImmichWorker { API = 'api', MICROSERVICES = 'microservices', } + +export enum ImmichTelemetry { + HOST = 'host', + API = 'api', + IO = 'io', + REPO = 'repo', + JOB = 'job', +} diff --git a/server/src/interfaces/config.interface.ts b/server/src/interfaces/config.interface.ts index 4391909df7131..e201241e82e3f 100644 --- a/server/src/interfaces/config.interface.ts +++ b/server/src/interfaces/config.interface.ts @@ -2,7 +2,7 @@ import { RegisterQueueOptions } from '@nestjs/bullmq'; import { QueueOptions } from 'bullmq'; import { RedisOptions } from 'ioredis'; import { OpenTelemetryModuleOptions } from 'nestjs-otel/lib/interfaces'; -import { ImmichEnvironment, ImmichWorker, LogLevel } from 'src/enum'; +import { ImmichEnvironment, ImmichTelemetry, ImmichWorker, LogLevel } from 'src/enum'; import { VectorExtension } from 'src/interfaces/database.interface'; export const IConfigRepository = 'IConfigRepository'; @@ -77,11 +77,7 @@ export interface EnvData { telemetry: { apiPort: number; microservicesPort: number; - enabled: boolean; - apiMetrics: boolean; - hostMetrics: boolean; - repoMetrics: boolean; - jobMetrics: boolean; + metrics: Set; }; storage: { diff --git a/server/src/repositories/config.repository.spec.ts b/server/src/repositories/config.repository.spec.ts index 84da211182793..516ceaaf8241a 100644 --- a/server/src/repositories/config.repository.spec.ts +++ b/server/src/repositories/config.repository.spec.ts @@ -1,3 +1,4 @@ +import { ImmichTelemetry } from 'src/enum'; import { clearEnvCache, ConfigRepository } from 'src/repositories/config.repository'; const getEnv = () => { @@ -12,11 +13,8 @@ const resetEnv = () => { 'IMMICH_TRUSTED_PROXIES', 'IMMICH_API_METRICS_PORT', 'IMMICH_MICROSERVICES_METRICS_PORT', - 'IMMICH_METRICS', - 'IMMICH_API_METRICS', - 'IMMICH_HOST_METRICS', - 'IMMICH_IO_METRICS', - 'IMMICH_JOB_METRICS', + 'IMMICH_TELEMETRY_INCLUDE', + 'IMMICH_TELEMETRY_EXCLUDE', 'DB_URL', 'DB_HOSTNAME', @@ -210,11 +208,7 @@ describe('getEnv', () => { expect(telemetry).toEqual({ apiPort: 8081, microservicesPort: 8082, - enabled: false, - apiMetrics: false, - hostMetrics: false, - jobMetrics: false, - repoMetrics: false, + metrics: new Set([]), }); }); @@ -225,32 +219,29 @@ describe('getEnv', () => { expect(telemetry).toMatchObject({ apiPort: 2001, microservicesPort: 2002, + metrics: expect.any(Set), }); }); it('should run with telemetry enabled', () => { - process.env.IMMICH_METRICS = 'true'; + process.env.IMMICH_TELEMETRY_INCLUDE = 'all'; const { telemetry } = getEnv(); - expect(telemetry).toMatchObject({ - enabled: true, - apiMetrics: true, - hostMetrics: true, - jobMetrics: true, - repoMetrics: true, - }); + expect(telemetry.metrics).toEqual(new Set(Object.values(ImmichTelemetry))); }); it('should run with telemetry enabled and jobs disabled', () => { - process.env.IMMICH_METRICS = 'true'; - process.env.IMMICH_JOB_METRICS = 'false'; + process.env.IMMICH_TELEMETRY_INCLUDE = 'all'; + process.env.IMMICH_TELEMETRY_EXCLUDE = 'job'; const { telemetry } = getEnv(); - expect(telemetry).toMatchObject({ - enabled: true, - apiMetrics: true, - hostMetrics: true, - jobMetrics: false, - repoMetrics: true, - }); + expect(telemetry.metrics).toEqual( + new Set([ImmichTelemetry.API, ImmichTelemetry.HOST, ImmichTelemetry.IO, ImmichTelemetry.REPO]), + ); + }); + + it('should run with specific telemetry metrics', () => { + process.env.IMMICH_TELEMETRY_INCLUDE = 'io, host, api'; + const { telemetry } = getEnv(); + expect(telemetry.metrics).toEqual(new Set([ImmichTelemetry.API, ImmichTelemetry.HOST, ImmichTelemetry.IO])); }); }); }); diff --git a/server/src/repositories/config.repository.ts b/server/src/repositories/config.repository.ts index fabccd78464d4..0abee0f603534 100644 --- a/server/src/repositories/config.repository.ts +++ b/server/src/repositories/config.repository.ts @@ -2,7 +2,7 @@ import { Injectable } from '@nestjs/common'; import { join } from 'node:path'; import { citiesFile, excludePaths } from 'src/constants'; import { Telemetry } from 'src/decorators'; -import { ImmichEnvironment, ImmichWorker, LogLevel } from 'src/enum'; +import { ImmichEnvironment, ImmichTelemetry, ImmichWorker, LogLevel } from 'src/enum'; import { EnvData, IConfigRepository } from 'src/interfaces/config.interface'; import { DatabaseExtension } from 'src/interfaces/database.interface'; import { QueueName } from 'src/interfaces/job.interface'; @@ -25,18 +25,17 @@ const stagingKeys = { }; const WORKER_TYPES = new Set(Object.values(ImmichWorker)); +const TELEMETRY_TYPES = new Set(Object.values(ImmichTelemetry)); -const asSet = (value: string | undefined, defaults: ImmichWorker[]) => { +const asSet = (value: string | undefined, defaults: T[]) => { const values = (value || '').replaceAll(/\s/g, '').split(',').filter(Boolean); - return new Set(values.length === 0 ? defaults : (values as ImmichWorker[])); + return new Set(values.length === 0 ? defaults : (values as T[])); }; -const parseBoolean = (value: string | undefined, defaultValue: boolean) => (value ? value === 'true' : defaultValue); - const getEnv = (): EnvData => { - const included = asSet(process.env.IMMICH_WORKERS_INCLUDE, [ImmichWorker.API, ImmichWorker.MICROSERVICES]); - const excluded = asSet(process.env.IMMICH_WORKERS_EXCLUDE, []); - const workers = [...setDifference(included, excluded)]; + const includedWorkers = asSet(process.env.IMMICH_WORKERS_INCLUDE, [ImmichWorker.API, ImmichWorker.MICROSERVICES]); + const excludedWorkers = asSet(process.env.IMMICH_WORKERS_EXCLUDE, []); + const workers = [...setDifference(includedWorkers, excludedWorkers)]; for (const worker of workers) { if (!WORKER_TYPES.has(worker)) { throw new Error(`Invalid worker(s) found: ${workers.join(',')}`); @@ -69,12 +68,18 @@ const getEnv = (): EnvData => { } } - const globalEnabled = parseBoolean(process.env.IMMICH_METRICS, false); - const hostMetrics = parseBoolean(process.env.IMMICH_HOST_METRICS, globalEnabled); - const apiMetrics = parseBoolean(process.env.IMMICH_API_METRICS, globalEnabled); - const repoMetrics = parseBoolean(process.env.IMMICH_IO_METRICS, globalEnabled); - const jobMetrics = parseBoolean(process.env.IMMICH_JOB_METRICS, globalEnabled); - const telemetryEnabled = globalEnabled || hostMetrics || apiMetrics || repoMetrics || jobMetrics; + const includedTelemetries = + process.env.IMMICH_TELEMETRY_INCLUDE === 'all' + ? new Set(Object.values(ImmichTelemetry)) + : asSet(process.env.IMMICH_TELEMETRY_INCLUDE, []); + + const excludedTelemetries = asSet(process.env.IMMICH_TELEMETRY_EXCLUDE, []); + const telemetries = setDifference(includedTelemetries, excludedTelemetries); + for (const telemetry of telemetries) { + if (!TELEMETRY_TYPES.has(telemetry)) { + throw new Error(`Invalid telemetry found: ${telemetry}`); + } + } return { host: process.env.IMMICH_HOST, @@ -136,9 +141,9 @@ const getEnv = (): EnvData => { otel: { metrics: { - hostMetrics, + hostMetrics: telemetries.has(ImmichTelemetry.HOST), apiMetrics: { - enable: apiMetrics, + enable: telemetries.has(ImmichTelemetry.API), ignoreRoutes: excludePaths, }, }, @@ -168,11 +173,7 @@ const getEnv = (): EnvData => { telemetry: { apiPort: Number(process.env.IMMICH_API_METRICS_PORT || '') || 8081, microservicesPort: Number(process.env.IMMICH_MICROSERVICES_METRICS_PORT || '') || 8082, - enabled: telemetryEnabled, - hostMetrics, - apiMetrics, - repoMetrics, - jobMetrics, + metrics: telemetries, }, workers, diff --git a/server/src/repositories/telemetry.repository.ts b/server/src/repositories/telemetry.repository.ts index f450c162dcdd2..25104609671c8 100644 --- a/server/src/repositories/telemetry.repository.ts +++ b/server/src/repositories/telemetry.repository.ts @@ -14,7 +14,7 @@ import { snakeCase, startCase } from 'lodash'; import { MetricService } from 'nestjs-otel'; import { copyMetadataFromFunctionToFunction } from 'nestjs-otel/lib/opentelemetry.utils'; import { serverVersion } from 'src/constants'; -import { MetadataKey } from 'src/enum'; +import { ImmichTelemetry, MetadataKey } from 'src/enum'; import { IConfigRepository } from 'src/interfaces/config.interface'; import { ILoggerRepository } from 'src/interfaces/logger.interface'; import { IMetricGroupRepository, ITelemetryRepository, MetricGroupOptions } from 'src/interfaces/telemetry.interface'; @@ -99,17 +99,18 @@ export class TelemetryRepository implements ITelemetryRepository { @Inject(ILoggerRepository) private logger: ILoggerRepository, ) { const { telemetry } = this.configRepository.getEnv(); - const { apiMetrics, hostMetrics, jobMetrics, repoMetrics } = telemetry; + const { metrics } = telemetry; - this.api = new MetricGroupRepository(metricService).configure({ enabled: apiMetrics }); - this.host = new MetricGroupRepository(metricService).configure({ enabled: hostMetrics }); - this.jobs = new MetricGroupRepository(metricService).configure({ enabled: jobMetrics }); - this.repo = new MetricGroupRepository(metricService).configure({ enabled: repoMetrics }); + this.api = new MetricGroupRepository(metricService).configure({ enabled: metrics.has(ImmichTelemetry.API) }); + this.host = new MetricGroupRepository(metricService).configure({ enabled: metrics.has(ImmichTelemetry.HOST) }); + this.jobs = new MetricGroupRepository(metricService).configure({ enabled: metrics.has(ImmichTelemetry.JOB) }); + this.repo = new MetricGroupRepository(metricService).configure({ enabled: metrics.has(ImmichTelemetry.REPO) }); } setup({ repositories }: { repositories: ClassConstructor[] }) { const { telemetry } = this.configRepository.getEnv(); - if (!telemetry.enabled || !telemetry.repoMetrics) { + const { metrics } = telemetry; + if (!metrics.has(ImmichTelemetry.REPO)) { return; } diff --git a/server/src/workers/api.ts b/server/src/workers/api.ts index 6451f1b79293e..bc8eb22b20589 100644 --- a/server/src/workers/api.ts +++ b/server/src/workers/api.ts @@ -20,7 +20,7 @@ async function bootstrap() { process.title = 'immich-api'; const { telemetry, network } = new ConfigRepository().getEnv(); - if (telemetry.enabled) { + if (telemetry.metrics.size > 0) { bootstrapTelemetry(telemetry.apiPort); } diff --git a/server/src/workers/microservices.ts b/server/src/workers/microservices.ts index df4abb01da8ab..bd1e65d6ccf48 100644 --- a/server/src/workers/microservices.ts +++ b/server/src/workers/microservices.ts @@ -11,7 +11,7 @@ import { isStartUpError } from 'src/services/storage.service'; export async function bootstrap() { const { telemetry } = new ConfigRepository().getEnv(); - if (telemetry.enabled) { + if (telemetry.metrics.size > 0) { bootstrapTelemetry(telemetry.microservicesPort); } diff --git a/server/test/repositories/config.repository.mock.ts b/server/test/repositories/config.repository.mock.ts index bb3cfcebb956c..462e9f832719b 100644 --- a/server/test/repositories/config.repository.mock.ts +++ b/server/test/repositories/config.repository.mock.ts @@ -73,11 +73,7 @@ const envData: EnvData = { telemetry: { apiPort: 8081, microservicesPort: 8082, - enabled: false, - hostMetrics: false, - apiMetrics: false, - jobMetrics: false, - repoMetrics: false, + metrics: new Set(), }, workers: [ImmichWorker.API, ImmichWorker.MICROSERVICES],