Skip to content

Commit

Permalink
Docker s3 data lake app monitoring
Browse files Browse the repository at this point in the history
Signed-off-by: Brandon Shien <[email protected]>
  • Loading branch information
bshien committed Dec 2, 2024
1 parent 7d0c969 commit 3087d1e
Show file tree
Hide file tree
Showing 18 changed files with 864 additions and 9 deletions.
8 changes: 8 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ dependencies {

implementation 'org.json:json:20240303'

implementation 'io.jsonwebtoken:jjwt-api:0.12.6'
runtimeOnly 'io.jsonwebtoken:jjwt-impl:0.12.6'
runtimeOnly 'io.jsonwebtoken:jjwt-jackson:0.12.6'

implementation 'org.bouncycastle:bcprov-jdk18on:1.79'

implementation 'org.kohsuke:github-api:1.326'

testImplementation 'org.junit.jupiter:junit-jupiter-api:5.8.1'
testRuntimeOnly 'org.junit.jupiter:junit-jupiter-engine:5.8.1'

Expand Down
62 changes: 62 additions & 0 deletions infrastructure/lib/constructs/eventDataLakeSns.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

import {Alarm, ComparisonOperator, MathExpression, Metric, TreatMissingData} from "aws-cdk-lib/aws-cloudwatch";
import { Construct } from "constructs";
import { SnsMonitors, SnsMonitorsProps } from "./snsMonitor";
import {Duration} from "aws-cdk-lib";

interface eventDataLakeSnsProps extends SnsMonitorsProps {
readonly eventDataLakeSnsAlarms: Array<{ alertName: string }>;
}

export class EventDataLakeSns extends SnsMonitors {
private readonly eventDataLakeSnsAlarms: Array<{ alertName: string }>;
constructor(scope: Construct, id: string, props: eventDataLakeSnsProps) {
super(scope, id, props);
this.eventDataLakeSnsAlarms = props.eventDataLakeSnsAlarms;
this.eventDataLakeSnsAlarms.forEach(({ alertName }) => {
const alarm = this.eventDataLakeAppFailed(alertName);
this.map[alarm[1]] = alarm[0];
});
this.createTopic();
}

private eventDataLakeAppFailed(alertName: string): [Alarm, string] {
const metricPeriod = Duration.minutes(10);

const eventDataLakeAppFailedMetric = new Metric({
namespace: this.alarmNameSpace,
metricName: "LabelCanaryEvent",
statistic: "Sum",
period: metricPeriod,
});

const filledEventDataLakeAppFailedMetric = new MathExpression({
expression: "FILL(metric, 0)",
usingMetrics: {
metric: eventDataLakeAppFailedMetric,
},
period: metricPeriod,
});

const alarmObject = new Alarm(this, `error_alarm_${alertName}`, {
metric: filledEventDataLakeAppFailedMetric,
threshold: 1,
evaluationPeriods: 1,
comparisonOperator: ComparisonOperator.LESS_THAN_THRESHOLD,
datapointsToAlarm: 1,
treatMissingData: TreatMissingData.BREACHING,
alarmDescription: "Detect GitHub Event Data Lake App failure",
alarmName: alertName,
});
return [alarmObject, alertName];
}
}

4 changes: 3 additions & 1 deletion infrastructure/lib/enums/project.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ enum Project {
RESTRICTED_PREFIX = '',
LAMBDA_PACKAGE = 'opensearch-metrics-1.0.zip',
EC2_AMI_SSM = '',
SNS_ALERT_EMAIL = '[email protected]'
SNS_ALERT_EMAIL = '[email protected]',
EVENT_CANARY_OWNER_TARGET = '',
EVENT_CANARY_REPO_TARGET = '',
}
export default Project;
11 changes: 11 additions & 0 deletions infrastructure/lib/infrastructure-stack.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import { OpenSearchWAF } from "./stacks/waf";
import { GitHubWorkflowMonitorAlarms } from "./stacks/gitHubWorkflowMonitorAlarms";
import { OpenSearchS3EventIndexWorkflowStack } from "./stacks/s3EventIndexWorkflow";
import { OpenSearchMaintainerInactivityWorkflowStack } from "./stacks/maintainerInactivityWorkflow";
import {OpenSearchEventCanaryWorkflowStack} from "./stacks/eventCanaryWorkflow";

export class InfrastructureStack extends Stack {
constructor(scope: Construct, id: string, props?: StackProps) {
Expand Down Expand Up @@ -113,6 +114,16 @@ export class InfrastructureStack extends Stack {
secretName: 'metrics-creds'
});

// Create OpenSearch Event Canary Lambda setup
const openSearchEventCanaryWorkflowStack = new OpenSearchEventCanaryWorkflowStack(app, 'OpenSearchEventCanary-Workflow', {
vpcStack: vpcStack,
lambdaPackage: Project.LAMBDA_PACKAGE,
gitHubOwnerTarget: Project.EVENT_CANARY_OWNER_TARGET,
gitHubRepoTarget: Project.EVENT_CANARY_REPO_TARGET,
gitHubAppSecret: openSearchMetricsSecretsStack.secret,
})
openSearchEventCanaryWorkflowStack.node.addDependency(vpcStack);

// Create Monitoring Dashboard

const openSearchMetricsMonitoringStack = new OpenSearchMetricsMonitoringStack(app, "OpenSearchMetrics-Monitoring", {
Expand Down
103 changes: 103 additions & 0 deletions infrastructure/lib/stacks/eventCanaryWorkflow.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

import { Duration, Stack, StackProps } from "aws-cdk-lib";
import { Rule, RuleTargetInput, Schedule } from "aws-cdk-lib/aws-events";
import { SfnStateMachine } from "aws-cdk-lib/aws-events-targets";
import { Bucket } from "aws-cdk-lib/aws-s3";
import { JsonPath, StateMachine, TaskInput } from "aws-cdk-lib/aws-stepfunctions";
import { LambdaInvoke } from "aws-cdk-lib/aws-stepfunctions-tasks";
import { Construct } from 'constructs';
import { OpenSearchLambda } from "../constructs/lambda";
import { OpenSearchDomainStack } from "./opensearch";
import { VpcStack } from "./vpc";
import {Effect, ManagedPolicy, PolicyDocument, PolicyStatement, Role, ServicePrincipal} from "aws-cdk-lib/aws-iam";
import {Secret} from "aws-cdk-lib/aws-secretsmanager";

export interface OpenSearchEventCanaryWorkflowStackProps extends StackProps {
readonly vpcStack: VpcStack;
readonly lambdaPackage: string;
readonly gitHubOwnerTarget: string;
readonly gitHubRepoTarget: string;
readonly gitHubAppSecret: Secret;
}

export interface WorkflowComponent {
opensearchEventCanaryWorkflowStateMachineName: string
}

export class OpenSearchEventCanaryWorkflowStack extends Stack {
public readonly workflowComponent: WorkflowComponent;
constructor(scope: Construct, id: string, props: OpenSearchEventCanaryWorkflowStackProps) {
super(scope, id, props);

const eventCanaryTask = this.createEventCanaryTask(this,
props.vpcStack,
props.lambdaPackage,
props.gitHubOwnerTarget,
props.gitHubRepoTarget,
props.gitHubAppSecret,
);

const opensearchEventCanaryWorkflow = new StateMachine(this, 'OpenSearchEventCanaryWorkflow', {
definition: eventCanaryTask,
timeout: Duration.minutes(15),
stateMachineName: 'OpenSearchEventCanaryWorkflow'
})

new Rule(this, 'OpenSearchEventCanaryWorkflow-Every-10mins', {
schedule: Schedule.expression('cron(0/10 * * * ? *)'),
targets: [new SfnStateMachine(opensearchEventCanaryWorkflow)],
});

this.workflowComponent = {
opensearchEventCanaryWorkflowStateMachineName: opensearchEventCanaryWorkflow.stateMachineName
}
}

private createEventCanaryTask(scope: Construct, vpcStack: VpcStack, lambdaPackage: string, gitHubOwnerTarget: string, gitHubRepoTarget: string, gitHubAppSecret: Secret) {
const eventCanaryLambdaRole = new Role(this, 'OpenSearchEventCanaryLambdaRole', {
assumedBy: new ServicePrincipal('lambda.amazonaws.com'),
description: "OpenSearch Metrics Event Canary Lambda Execution Role",
roleName: "OpenSearchEventCanaryLambdaRole",
managedPolicies: [
ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSLambdaBasicExecutionRole'),
ManagedPolicy.fromAwsManagedPolicyName('service-role/AWSLambdaVPCAccessExecutionRole'),
]
});

eventCanaryLambdaRole.addToPolicy(
new PolicyStatement({
effect: Effect.ALLOW,
actions: ["secretsmanager:GetSecretValue"],
resources: [`${gitHubAppSecret.secretFullArn}`],
}),
);

const eventCanaryLambda = new OpenSearchLambda(this, "OpenSearchMetricsEventCanaryLambdaFunction", {
lambdaNameBase: "OpenSearchMetricsEventCanary",
handler: "org.opensearchmetrics.lambda.EventCanaryLambda",
lambdaZipPath: `../../../build/distributions/${lambdaPackage}`,
vpc: vpcStack.vpc,
securityGroup: vpcStack.securityGroup,
role: eventCanaryLambdaRole,
environment: {
GITHUB_OWNER_TARGET: gitHubOwnerTarget,
GITHUB_REPO_TARGET: gitHubRepoTarget,
API_CREDENTIALS_SECRETS: gitHubAppSecret.secretName,
SECRETS_MANAGER_REGION: gitHubAppSecret.env.region,
}
}).lambda;
return new LambdaInvoke(scope, 'Event Canary Lambda', {
lambdaFunction: eventCanaryLambda,
resultPath: JsonPath.DISCARD,
timeout: Duration.minutes(15)
}).addRetry();
}
}
22 changes: 21 additions & 1 deletion infrastructure/lib/stacks/monitoringDashboard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import { OpenSearchLambda } from "../constructs/lambda";
import { StepFunctionSns } from "../constructs/stepFunctionSns";
import Project from "../enums/project";
import { VpcStack } from "./vpc";
import {EventDataLakeSns} from "../constructs/eventDataLakeSns";


interface OpenSearchMetricsMonitoringStackProps extends StackProps {
Expand Down Expand Up @@ -56,12 +57,13 @@ export class OpenSearchMetricsMonitoringStack extends Stack {
lambdaZipPath: `../../../build/distributions/${props.lambdaPackage}`,
role: slackLambdaRole,
environment: {
SLACK_CREDENTIALS_SECRETS: props.secrets.secretName,
API_CREDENTIALS_SECRETS: props.secrets.secretName,
SECRETS_MANAGER_REGION: props.secrets.env.region
}
});
this.snsMonitorStepFunctionExecutionsFailed();
this.snsMonitorCanaryFailed('metrics_heartbeat', `https://${Project.METRICS_HOSTED_ZONE}`, props.vpcStack);
this.snsMonitorEventDataLakeAppFailed();
}

/**
Expand Down Expand Up @@ -117,5 +119,23 @@ export class OpenSearchMetricsMonitoringStack extends Stack {
slackLambda: this.slackLambda
});
}

/**
* Create SNS alarms for if the GitHub Event Data Lake App goes down.
*/
private snsMonitorEventDataLakeAppFailed(): void {
const eventDataLakeSnsAlarms = [
{ alertName: 'Event_data_lake_app_failed'},
];

new EventDataLakeSns(this, "SnsMonitors-EventDataLakeAppFailed", {
region: this.props.region,
accountId: this.props.account,
eventDataLakeSnsAlarms: eventDataLakeSnsAlarms,
alarmNameSpace: "GitHubCanary",
snsTopicName: "EventDataLakeAppFailed",
slackLambda: this.slackLambda
});
}
}

90 changes: 90 additions & 0 deletions infrastructure/test/event-canary-workflow-stack.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

import { App } from "aws-cdk-lib";
import { Template } from "aws-cdk-lib/assertions";
import Project from "../lib/enums/project";
import { VpcStack } from "../lib/stacks/vpc";
import {OpenSearchEventCanaryWorkflowStack} from "../lib/stacks/eventCanaryWorkflow";
import {OpenSearchMetricsSecretsStack} from "../lib/stacks/secrets";

test('Event Canary Workflow Stack Test', () => {
const app = new App();
const vpcStack = new VpcStack(app, 'Test-OpenSearchHealth-VPC', {});

// Create Secret Manager for the metrics project
const openSearchMetricsSecretsStack = new OpenSearchMetricsSecretsStack(app, "OpenSearchMetrics-Secrets", {
secretName: 'metrics-creds'
});

const openSearchEventCanaryWorkflowStack = new OpenSearchEventCanaryWorkflowStack(app, 'OpenSearchEventCanary-Workflow', {
vpcStack: vpcStack,
lambdaPackage: Project.LAMBDA_PACKAGE,
gitHubOwnerTarget: Project.EVENT_CANARY_OWNER_TARGET,
gitHubRepoTarget: Project.EVENT_CANARY_REPO_TARGET,
gitHubAppSecret: openSearchMetricsSecretsStack.secret,
})

openSearchEventCanaryWorkflowStack.node.addDependency(vpcStack);
const template = Template.fromStack(openSearchEventCanaryWorkflowStack);
template.resourceCountIs('AWS::IAM::Role', 3);
template.resourceCountIs('AWS::Lambda::Function', 1);
template.hasResourceProperties('AWS::Lambda::Function', {
"FunctionName": "OpenSearchMetricsEventCanaryLambda",
"Handler": "org.opensearchmetrics.lambda.EventCanaryLambda"
});
template.resourceCountIs('AWS::StepFunctions::StateMachine', 1);
template.hasResourceProperties('AWS::StepFunctions::StateMachine', {
"DefinitionString": {
"Fn::Join": [
"",
[
"{\"StartAt\":\"Event Canary Lambda\",\"States\":{\"Event Canary Lambda\":{\"End\":true,\"Retry\":[{\"ErrorEquals\":[\"Lambda.ClientExecutionTimeoutException\",\"Lambda.ServiceException\",\"Lambda.AWSLambdaException\",\"Lambda.SdkClientException\"],\"IntervalSeconds\":2,\"MaxAttempts\":6,\"BackoffRate\":2},{\"ErrorEquals\":[\"States.ALL\"]}],\"Type\":\"Task\",\"TimeoutSeconds\":900,\"ResultPath\":null,\"Resource\":\"arn:",
{
"Ref": "AWS::Partition"
},
":states:::lambda:invoke\",\"Parameters\":{\"FunctionName\":\"",
{
"Fn::GetAtt": [
"OpenSearchMetricsEventCanaryLambda358BAA07",
"Arn"
]
},
"\",\"Payload.$\":\"$\"}}},\"TimeoutSeconds\":900}"
]
]
},
"RoleArn": {
"Fn::GetAtt": [
"OpenSearchEventCanaryWorkflowRoleDC920D0E",
"Arn"
]
},
"StateMachineName": "OpenSearchEventCanaryWorkflow"
});
template.resourceCountIs('AWS::Events::Rule', 1);
template.hasResourceProperties('AWS::Events::Rule', {
"ScheduleExpression": "cron(0/10 * * * ? *)",
"State": "ENABLED",
"Targets": [
{
"Arn": {
"Ref": "OpenSearchEventCanaryWorkflowEB1017B7"
},
"Id": "Target0",
"RoleArn": {
"Fn::GetAtt": [
"OpenSearchEventCanaryWorkflowEventsRoleA5644829",
"Arn"
]
}
}
]
});
});
Loading

0 comments on commit 3087d1e

Please sign in to comment.