Skip to content

Commit

Permalink
add a monitoring probe
Browse files Browse the repository at this point in the history
  • Loading branch information
Sean Reilly committed Dec 9, 2024
1 parent e2ac89e commit 1bb1efe
Show file tree
Hide file tree
Showing 9 changed files with 533 additions and 197 deletions.
2 changes: 2 additions & 0 deletions api-contracts/openapi/components/schemas/_index.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -292,3 +292,5 @@ WebhookWorkerCreateResponse:
$ref: "./webhook_worker.yaml#/WebhookWorkerCreateResponse"
WebhookWorkerListResponse:
$ref: "./webhook_worker.yaml#/WebhookWorkerListResponse"
MonitoringRunProbe:
$ref: "./monitoring.yaml#/RunProbe"
2 changes: 2 additions & 0 deletions api-contracts/openapi/openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -178,3 +178,5 @@ paths:
$ref: "./paths/webhook-worker/webhook-worker.yaml#/webhookworkerRequests"
/api/v1/tenants/{tenant}/workflow-runs/{workflow-run}/input:
$ref: "./paths/workflow-run/workflow-run.yaml#/getWorkflowRunInput"
/api/v1/monitoring/{tenant}/probe:
$ref: "./paths/monitoring/monitoring.yaml#/probe"
498 changes: 303 additions & 195 deletions api/v1/server/oas/gen/openapi.gen.go

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions api/v1/server/run/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/hatchet-dev/hatchet/api/v1/server/handlers/ingestors"
"github.com/hatchet-dev/hatchet/api/v1/server/handlers/logs"
"github.com/hatchet-dev/hatchet/api/v1/server/handlers/metadata"
"github.com/hatchet-dev/hatchet/api/v1/server/handlers/monitoring"

Check failure on line 21 in api/v1/server/run/run.go

View workflow job for this annotation

GitHub Actions / e2e

no required module provides package github.com/hatchet-dev/hatchet/api/v1/server/handlers/monitoring; to add it:

Check failure on line 21 in api/v1/server/run/run.go

View workflow job for this annotation

GitHub Actions / e2e

no required module provides package github.com/hatchet-dev/hatchet/api/v1/server/handlers/monitoring; to add it:

Check failure on line 21 in api/v1/server/run/run.go

View workflow job for this annotation

GitHub Actions / integration

no required module provides package github.com/hatchet-dev/hatchet/api/v1/server/handlers/monitoring; to add it:

Check failure on line 21 in api/v1/server/run/run.go

View workflow job for this annotation

GitHub Actions / load

no required module provides package github.com/hatchet-dev/hatchet/api/v1/server/handlers/monitoring; to add it:

Check failure on line 21 in api/v1/server/run/run.go

View workflow job for this annotation

GitHub Actions / unit

no required module provides package github.com/hatchet-dev/hatchet/api/v1/server/handlers/monitoring; to add it:
rate_limits "github.com/hatchet-dev/hatchet/api/v1/server/handlers/rate-limits"
slackapp "github.com/hatchet-dev/hatchet/api/v1/server/handlers/slack-app"
stepruns "github.com/hatchet-dev/hatchet/api/v1/server/handlers/step-runs"
Expand Down Expand Up @@ -49,6 +50,7 @@ type apiService struct {
*slackapp.SlackAppService
*webhookworker.WebhookWorkersService
*workflowruns.WorkflowRunsService
*monitoring.MonitoringService
}

func newAPIService(config *server.ServerConfig) *apiService {
Expand All @@ -67,6 +69,7 @@ func newAPIService(config *server.ServerConfig) *apiService {
IngestorsService: ingestors.NewIngestorsService(config),
SlackAppService: slackapp.NewSlackAppService(config),
WebhookWorkersService: webhookworker.NewWebhookWorkersService(config),
MonitoringService: monitoring.NewMonitoringService(config),
}
}

Expand Down
17 changes: 17 additions & 0 deletions frontend/app/src/lib/api/generated/Api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ import {
ReplayWorkflowRunsRequest,
ReplayWorkflowRunsResponse,
RerunStepRunRequest,
RunProbe,
SNSIntegration,
ScheduleWorkflowRunRequest,
ScheduledRunStatus,
Expand Down Expand Up @@ -2095,4 +2096,20 @@ export class Api<SecurityDataType = unknown> extends HttpClient<SecurityDataType
format: 'json',
...params,
});
/**
* @description Triggers a workflow to check the status of the instance
*
* @name MonitoringPostRunProbe
* @summary Detailed Health Probe For the Instance
* @request POST:/api/v1/monitoring/{tenant}/probe
*/
monitoringPostRunProbe = (tenant: string, data: RunProbe, params: RequestParams = {}) =>
this.request<RunProbe, APIErrors>({
path: `/api/v1/monitoring/${tenant}/probe`,
method: 'POST',
body: data,
type: ContentType.Json,
format: 'json',
...params,
});
}
6 changes: 6 additions & 0 deletions frontend/app/src/lib/api/generated/data-contracts.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1374,6 +1374,10 @@ export interface WebhookWorkerRequestListResponse {
requests?: WebhookWorkerRequest[];
}

export interface RunProbe {
tenantId?: string;
}

export interface TenantList {
pagination?: PaginationResponse;
rows?: Tenant[];
Expand Down Expand Up @@ -1423,3 +1427,5 @@ export interface WebhookWorkerCreateResponse {
}

export type BulkCreateEventResponse = Events;

export type MonitoringRunProbe = RunProbe;
178 changes: 178 additions & 0 deletions pkg/client/rest/gen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 21 additions & 1 deletion pkg/config/server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ type ServerConfigFile struct {
TenantAlerting ConfigFileTenantAlerting `mapstructure:"tenantAlerting" json:"tenantAlerting,omitempty"`

Email ConfigFileEmail `mapstructure:"email" json:"email,omitempty"`

Monitoring ConfigFileMonitoring `mapstructure:"monitoring" json:"monitoring,omitempty"`
}

type ConfigFileAdditionalLoggers struct {
Expand Down Expand Up @@ -143,7 +145,7 @@ type ConfigFileRuntime struct {
MaxInternalRetryCount int32 `mapstructure:"maxInternalRetryCount" json:"maxInternalRetryCount,omitempty" default:"3"`

// WaitForFlush is the time to wait for the buffer to flush used for exerting some back pressure on writers
WaitForFlush time.Duration `mapstructure:"waitForFlush" json:"waitForFlush,omitempty" default:"1ms"`
WaitForFlush time.Duration `mapstructure:"waitForFlush" json:"waitForFlush,omitempty" default:"1"`

// MaxConcurrent is the maximum number of concurrent flushes
MaxConcurrent int `mapstructure:"maxConcurrent" json:"maxConcurrent,omitempty" default:"50"`
Expand All @@ -168,6 +170,8 @@ type ConfigFileRuntime struct {

// QueueStepRunBuffer represents the buffer settings for inserting step runs into the queue
QueueStepRunBuffer buffer.ConfigFileBuffer `mapstructure:"queueStepRunBuffer" json:"queueStepRunBuffer,omitempty"`

Monitoring ConfigFileMonitoring `mapstructure:"monitoring" json:"monitoring,omitempty"`
}

type SecurityCheckConfigFile struct {
Expand Down Expand Up @@ -348,6 +352,17 @@ type ConfigFileEmail struct {
Postmark PostmarkConfigFile `mapstructure:"postmark" json:"postmark,omitempty"`
}

type ConfigFileMonitoring struct {
// Enabled controls whether the monitoring service is enabled for this Hatchet instance.
Enabled bool `mapstructure:"enabled" json:"enabled,omitempty" default:"true"`

// PermittedTenants is a list of tenant IDs that are allowed to use the monitoring service.
PermittedTenants []string `mapstructure:"permittedTenants" json:"permittedTenants"`

// ProbeTimeout is the time to wait for the probe to complete
ProbeTimeout time.Duration `mapstructure:"probeTimeout" json:"probeTimeout,omitempty" default:"30s"`
}

type PostmarkConfigFile struct {
Enabled bool `mapstructure:"enabled" json:"enabled,omitempty"`

Expand Down Expand Up @@ -619,4 +634,9 @@ func BindAllEnv(v *viper.Viper) {
_ = v.BindEnv("email.postmark.fromName", "SERVER_EMAIL_POSTMARK_FROM_NAME")
_ = v.BindEnv("email.postmark.supportEmail", "SERVER_EMAIL_POSTMARK_SUPPORT_EMAIL")

// monitoring options
_ = v.BindEnv("runtime.monitoring.enabled", "SERVER_MONITORING_ENABLED")
_ = v.BindEnv("runtime.monitoring.permittedTenants", "SERVER_MONITORING_PERMITTED_TENANTS")
_ = v.BindEnv("runtime.monitoring.probeTimeout", "SERVER_MONITORING_PROBE_TIMEOUT")

}
2 changes: 1 addition & 1 deletion pkg/repository/buffer/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import "time"
type ConfigFileBuffer struct {

// WaitForFlush is the time to wait for the buffer to flush used for backpressure on writers
WaitForFlush time.Duration `mapstructure:"waitForFlush" json:"waitForFlush,omitempty" default:"1ms"`
WaitForFlush time.Duration `mapstructure:"waitForFlush" json:"waitForFlush,omitempty" default:"1"`

// MaxConcurrent is the maximum number of concurrent flushes
MaxConcurrent int `mapstructure:"maxConcurrent" json:"maxConcurrent,omitempty" default:"50"`
Expand Down

0 comments on commit 1bb1efe

Please sign in to comment.