Skip to content

Commit

Permalink
feat(perf): Add perf testing and monitoring framework (datahub-projec…
Browse files Browse the repository at this point in the history
  • Loading branch information
Dexter Lee authored Sep 8, 2021
1 parent e30d723 commit 8747fbe
Show file tree
Hide file tree
Showing 61 changed files with 6,684 additions and 129 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ venv.bak/
.dmypy.json
dmypy.json
MANIFEST
*.pyc

# Added by mp-maker
**/build
Expand Down
4 changes: 4 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ project.ext.externalDependency = [
'commonsLang': 'commons-lang:commons-lang:2.6',
'commonsCollections': 'commons-collections:commons-collections:3.2.2',
'data' : 'com.linkedin.pegasus:data:' + pegasusVersion,
'dropwizardMetricsCore': 'io.dropwizard.metrics:metrics-core:4.2.3',
'dropwizardMetricsJmx': 'io.dropwizard.metrics:metrics-jmx:4.2.3',
'ebean': 'io.ebean:ebean:11.33.3',
'ebeanAgent': 'io.ebean:ebean-agent:11.27.1',
'elasticSearchRest': 'org.elasticsearch.client:elasticsearch-rest-high-level-client:7.9.3',
Expand Down Expand Up @@ -90,6 +92,8 @@ project.ext.externalDependency = [
'mysqlConnector': 'mysql:mysql-connector-java:8.0.20',
'neo4jHarness': 'org.neo4j.test:neo4j-harness:3.4.11',
'neo4jJavaDriver': 'org.neo4j.driver:neo4j-java-driver:4.0.1',
'opentelemetryApi': 'io.opentelemetry:opentelemetry-api:1.0.0',
'opentelemetryAnnotations': 'io.opentelemetry:opentelemetry-extension-annotations:1.0.0',
'parseqTest': 'com.linkedin.parseq:parseq:3.0.7:test',
'parquet': 'org.apache.parquet:parquet-avro:1.12.0',
'picocli': 'info.picocli:picocli:4.5.0',
Expand Down
5 changes: 4 additions & 1 deletion docker/datahub-gms/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@ ARG APP_ENV=prod

FROM adoptopenjdk/openjdk8:alpine-jre as base
ENV DOCKERIZE_VERSION v0.6.1
RUN apk --no-cache add curl tar \
RUN apk --no-cache add curl tar wget \
&& curl https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-runner/9.4.20.v20190813/jetty-runner-9.4.20.v20190813.jar --output jetty-runner.jar \
&& curl https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-jmx/9.4.20.v20190813/jetty-jmx-9.4.20.v20190813.jar --output jetty-jmx.jar \
&& curl https://repo1.maven.org/maven2/org/eclipse/jetty/jetty-util/9.4.20.v20190813/jetty-util-9.4.20.v20190813.jar --output jetty-util.jar \
&& wget https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.4.1/opentelemetry-javaagent-all.jar \
&& wget https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/0.16.1/jmx_prometheus_javaagent-0.16.1.jar -O jmx_prometheus_javaagent.jar \
&& curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv

FROM adoptopenjdk/openjdk8:alpine-slim as prod-build
Expand All @@ -19,6 +21,7 @@ FROM base as prod-install
COPY --from=prod-build /war.war /datahub/datahub-gms/bin/war.war
COPY --from=prod-build /datahub-src/metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-gms/resources/entity-registry.yml
COPY --from=prod-build /datahub-src/docker/datahub-gms/start.sh /datahub/datahub-gms/scripts/start.sh
COPY --from=prod-build /datahub-src/docker/monitoring/client-prometheus-config.yaml /datahub/datahub-gms/scripts/prometheus-config.yaml
RUN chmod +x /datahub/datahub-gms/scripts/start.sh

FROM base as dev-install
Expand Down
1 change: 0 additions & 1 deletion docker/datahub-gms/env/docker.env
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ NEO4J_PASSWORD=datahub
JAVA_OPTS=-Xms1g -Xmx1g
GRAPH_SERVICE_IMPL=neo4j
ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml

MAE_CONSUMER_ENABLED=true
MCE_CONSUMER_ENABLED=true

Expand Down
18 changes: 15 additions & 3 deletions docker/datahub-gms/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ fi

# Add default header if needed
if [[ -z $ELASTICSEARCH_AUTH_HEADER ]]; then
ELASTICSEARCH_AUTH_HEADER="Accept: */*"
ELASTICSEARCH_AUTH_HEADER="Accept: */*"
fi

if [[ $ELASTICSEARCH_USE_SSL == true ]]; then
ELASTICSEARCH_PROTOCOL=https
ELASTICSEARCH_PROTOCOL=https
else
ELASTICSEARCH_PROTOCOL=http
ELASTICSEARCH_PROTOCOL=http
fi

WAIT_FOR_NEO4J=""
Expand All @@ -32,13 +32,25 @@ if [[ $GRAPH_SERVICE_IMPL != elasticsearch ]]; then
WAIT_FOR_NEO4J=" -wait $NEO4J_HOST "
fi

OTEL_AGENT=""
if [[ $ENABLE_OTEL == true ]]; then
OTEL_AGENT="-javaagent:opentelemetry-javaagent-all.jar "
fi

PROMETHEUS_AGENT=""
if [[ $ENABLE_PROMETHEUS == true ]]; then
PROMETHEUS_AGENT="-javaagent:jmx_prometheus_javaagent.jar=4318:/datahub/datahub-gms/scripts/prometheus-config.yaml "
fi

dockerize \
-wait tcp://$EBEAN_DATASOURCE_HOST \
-wait tcp://$(echo $KAFKA_BOOTSTRAP_SERVER | sed 's/,/ -wait tcp:\/\//g') \
-wait $ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT -wait-http-header "$ELASTICSEARCH_AUTH_HEADER" \
$WAIT_FOR_NEO4J \
-timeout 240s \
java $JAVA_OPTS $JMX_OPTS \
$OTEL_AGENT \
$PROMETHEUS_AGENT \
-jar /jetty-runner.jar \
--jar jetty-util.jar \
--jar jetty-jmx.jar \
Expand Down
5 changes: 4 additions & 1 deletion docker/datahub-mae-consumer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ ARG APP_ENV=prod

FROM adoptopenjdk/openjdk8:alpine-jre as base
ENV DOCKERIZE_VERSION v0.6.1
RUN apk --no-cache add curl tar \
RUN apk --no-cache add curl tar wget \
&& wget https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.4.1/opentelemetry-javaagent-all.jar \
&& wget https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/0.16.1/jmx_prometheus_javaagent-0.16.1.jar -O jmx_prometheus_javaagent.jar \
&& curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv

FROM adoptopenjdk/openjdk8:alpine-slim as prod-build
Expand All @@ -16,6 +18,7 @@ FROM base as prod-install
COPY --from=prod-build /mae-consumer-job.jar /datahub/datahub-mae-consumer/bin/
COPY --from=prod-build /datahub-src/metadata-models/src/main/resources/entity-registry.yml /datahub/datahub-mae-consumer/resources/entity-registry.yml
COPY --from=prod-build /datahub-src/docker/datahub-mae-consumer/start.sh /datahub/datahub-mae-consumer/scripts/
COPY --from=prod-build /datahub-src/docker/monitoring/client-prometheus-config.yaml /datahub/datahub-mae-consumer/scripts/prometheus-config.yaml
RUN chmod +x /datahub/datahub-mae-consumer/scripts/start.sh

FROM base as dev-install
Expand Down
22 changes: 16 additions & 6 deletions docker/datahub-mae-consumer/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,19 @@ if [[ $GRAPH_SERVICE_IMPL != elasticsearch ]]; then
WAIT_FOR_NEO4J=" -wait $NEO4J_HOST "
fi

dockerize \
-wait tcp://$(echo $KAFKA_BOOTSTRAP_SERVER | sed 's/,/ -wait tcp:\/\//g') \
-wait $ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT -wait-http-header "$ELASTICSEARCH_AUTH_HEADER" \
$WAIT_FOR_NEO4J \
-timeout 240s \
java $JAVA_OPTS $JMX_OPTS -jar /datahub/datahub-mae-consumer/bin/mae-consumer-job.jar
OTEL_AGENT=""
if [[ $ENABLE_OTEL == true ]]; then
OTEL_AGENT="-javaagent:opentelemetry-javaagent-all.jar "
fi

PROMETHEUS_AGENT=""
if [[ $ENABLE_PROMETHEUS == true ]]; then
PROMETHEUS_AGENT="-javaagent:jmx_prometheus_javaagent.jar=4318:/datahub/datahub-mae-consumer/scripts/prometheus-config.yaml "
fi

dockerize \
-wait tcp://$(echo $KAFKA_BOOTSTRAP_SERVER | sed 's/,/ -wait tcp:\/\//g') \
-wait $ELASTICSEARCH_PROTOCOL://$ELASTICSEARCH_HOST_URL:$ELASTICSEARCH_PORT -wait-http-header "$ELASTICSEARCH_AUTH_HEADER" \
$WAIT_FOR_NEO4J \
-timeout 240s \
java $JAVA_OPTS $JMX_OPTS $OTEL_AGENT $PROMETHEUS_AGENT -jar /datahub/datahub-mae-consumer/bin/mae-consumer-job.jar
5 changes: 4 additions & 1 deletion docker/datahub-mce-consumer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ ARG APP_ENV=prod

FROM adoptopenjdk/openjdk8:alpine-jre as base
ENV DOCKERIZE_VERSION v0.6.1
RUN apk --no-cache add curl tar \
RUN apk --no-cache add curl tar wget \
&& wget https://github.com/open-telemetry/opentelemetry-java-instrumentation/releases/download/v1.4.1/opentelemetry-javaagent-all.jar \
&& wget https://repo1.maven.org/maven2/io/prometheus/jmx/jmx_prometheus_javaagent/0.16.1/jmx_prometheus_javaagent-0.16.1.jar -O jmx_prometheus_javaagent.jar \
&& curl -L https://github.com/jwilder/dockerize/releases/download/$DOCKERIZE_VERSION/dockerize-linux-amd64-$DOCKERIZE_VERSION.tar.gz | tar -C /usr/local/bin -xzv

FROM openjdk:8 as prod-build
Expand All @@ -14,6 +16,7 @@ RUN cd datahub-src && cp metadata-jobs/mce-consumer-job/build/libs/mce-consumer-
FROM base as prod-install
COPY --from=prod-build /mce-consumer-job.jar /datahub/datahub-mce-consumer/bin/
COPY --from=prod-build /datahub-src/docker/datahub-mce-consumer/start.sh /datahub/datahub-mce-consumer/scripts/
COPY --from=prod-build /datahub-src/docker/monitoring/client-prometheus-config.yaml /datahub/datahub-mce-consumer/scripts/prometheus-config.yaml
RUN chmod +x /datahub/datahub-mce-consumer/scripts/start.sh

FROM base as dev-install
Expand Down
12 changes: 11 additions & 1 deletion docker/datahub-mce-consumer/start.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
#!/bin/sh

OTEL_AGENT=""
if [[ $ENABLE_OTEL == true ]]; then
OTEL_AGENT="-javaagent:opentelemetry-javaagent-all.jar "
fi

PROMETHEUS_AGENT=""
if [[ $ENABLE_PROMETHEUS == true ]]; then
PROMETHEUS_AGENT="-javaagent:jmx_prometheus_javaagent.jar=4318:/datahub/datahub-mce-consumer/scripts/prometheus-config.yaml "
fi

dockerize \
-wait tcp://$(echo $KAFKA_BOOTSTRAP_SERVER | sed 's/,/ -wait tcp:\/\//g') \
-timeout 240s \
java $JAVA_OPTS $JMX_OPTS -jar /datahub/datahub-mce-consumer/bin/mce-consumer-job.jar
java $JAVA_OPTS $JMX_OPTS $OTEL_AGENT $PROMETHEUS_AGENT -jar /datahub/datahub-mce-consumer/bin/mce-consumer-job.jar
17 changes: 15 additions & 2 deletions docker/dev-without-neo4j.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
#!/bin/bash

MONITORING_COMPOSE=""
if [[ $MONITORING == true ]]; then
MONITORING_COMPOSE="-f monitoring/docker-compose.monitoring.yml"
fi

CONSUMERS_COMPOSE=""
if [[ $SEPARATE_CONSUMERS == true ]]; then
CONSUMERS_COMPOSE="-f docker-compose.consumers-without-neo4j.yml -f docker-compose.consumers.dev.yml"
if [[ $MONITORING == true ]]; then
MONITORING_COMPOSE="-f monitoring/docker-compose.monitoring.yml -f monitoring/docker-compose.consumers.monitoring.yml"
fi
fi

# Launches dev instances of DataHub images. See documentation for more details.
# YOU MUST BUILD VIA GRADLE BEFORE RUNNING THIS.
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
Expand All @@ -8,10 +21,10 @@ cd $DIR && \
-f docker-compose-without-neo4j.yml \
-f docker-compose-without-neo4j.override.yml \
-f docker-compose.dev.yml \
pull \
$CONSUMERS_COMPOSE $MONITORING_COMPOSE pull \
&& \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -p datahub \
-f docker-compose-without-neo4j.yml \
-f docker-compose-without-neo4j.override.yml \
-f docker-compose.dev.yml \
up --build $@
$CONSUMERS_COMPOSE $MONITORING_COMPOSE up --build $@
17 changes: 15 additions & 2 deletions docker/dev.sh
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
#!/bin/bash

MONITORING_COMPOSE=""
if [[ $MONITORING == true ]]; then
MONITORING_COMPOSE="-f monitoring/docker-compose.monitoring.yml"
fi

CONSUMERS_COMPOSE=""
if [[ $SEPARATE_CONSUMERS == true ]]; then
CONSUMERS_COMPOSE="-f docker-compose.consumers.yml -f docker-compose.consumers.dev.yml"
if [[ $MONITORING == true ]]; then
MONITORING_COMPOSE="-f monitoring/docker-compose.monitoring.yml -f monitoring/docker-compose.consumers.monitoring.yml"
fi
fi

# Launches dev instances of DataHub images. See documentation for more details.
# YOU MUST BUILD VIA GRADLE BEFORE RUNNING THIS.
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
Expand All @@ -8,10 +21,10 @@ cd $DIR && \
-f docker-compose.yml \
-f docker-compose.override.yml \
-f docker-compose.dev.yml \
pull \
$CONSUMERS_COMPOSE $MONITORING_COMPOSE pull \
&& \
COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose -p datahub \
-f docker-compose.yml \
-f docker-compose.override.yml \
-f docker-compose.dev.yml \
up --build $@
$CONSUMERS_COMPOSE $MONITORING_COMPOSE up --build $@
5 changes: 5 additions & 0 deletions docker/docker-compose.consumers-without-neo4j.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Service definitions for standalone Kafka consumer containers.
version: '3.8'
services:
datahub-gms:
environment:
- MAE_CONSUMER_ENABLED=false
- MCE_CONSUMER_ENABLED=false

datahub-mae-consumer:
build:
context: ../
Expand Down
4 changes: 3 additions & 1 deletion docker/docker-compose.consumers.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ services:
- ./datahub-mae-consumer/start.sh:/datahub/datahub-mae-consumer/scripts/start.sh
- ../metadata-models/src/main/resources/:/datahub/datahub-mae-consumer/resources
- ../metadata-jobs/mae-consumer-job/build/libs/:/datahub/datahub-mae-consumer/bin/
- ./monitoring/client-prometheus-config.yaml:/datahub/datahub-mae-consumer/scripts/prometheus-config.yaml

datahub-mce-consumer:
image: linkedin/datahub-mce-consumer:debug
Expand All @@ -21,4 +22,5 @@ services:
APP_ENV: dev
volumes:
- ./datahub-mce-consumer/start.sh:/datahub/datahub-mce-consumer/scripts/start.sh
- ../metadata-jobs/mce-consumer-job/build/libs/:/datahub/datahub-mce-consumer/bin
- ../metadata-jobs/mce-consumer-job/build/libs/:/datahub/datahub-mce-consumer/bin
- ./monitoring/client-prometheus-config.yaml:/datahub/datahub-mce-consumer/scripts/prometheus-config.yaml
10 changes: 5 additions & 5 deletions docker/docker-compose.consumers.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
# Service definitions for standalone Kafka consumer containers.
version: '3.8'
services:
datahub-gms:
environment:
- MAE_CONSUMER_ENABLED=false
- MCE_CONSUMER_ENABLED=false

datahub-mae-consumer:
build:
context: ../
Expand Down Expand Up @@ -29,8 +34,3 @@ services:
depends_on:
- kafka-setup
- datahub-gms

datahub-gms:
environment:
- MAE_CONSUMER_ENABLED=false
- MCE_CONSUMER_ENABLED=false
1 change: 1 addition & 0 deletions docker/docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ services:
APP_ENV: dev
volumes:
- ./datahub-gms/start.sh:/datahub/datahub-gms/scripts/start.sh
- ./monitoring/client-prometheus-config.yaml:/datahub/datahub-gms/scripts/prometheus-config.yaml
- ../metadata-models/src/main/resources/:/datahub/datahub-gms/resources
- ../metadata-service/war/build/libs/:/datahub/datahub-gms/bin

Expand Down
4 changes: 4 additions & 0 deletions docker/monitoring/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# DataHub Monitoring Quickstart

This directory contains setup for monitoring DataHub performance and enabling distributed tracing. More details can be
found [here](../../docs/advanced/monitoring.md)
4 changes: 4 additions & 0 deletions docker/monitoring/client-prometheus-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
startDelaySeconds: 0
ssl: false
lowercaseOutputName: false
lowercaseOutputLabelNames: false
24 changes: 24 additions & 0 deletions docker/monitoring/docker-compose.consumers.monitoring.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
---
version: '3.5'
services:
datahub-mae-consumer:
environment:
- ENABLE_PROMETHEUS=true
- ENABLE_OTEL=true
- OTEL_TRACES_EXPORTER=jaeger
- OTEL_EXPORTER_JAEGER_ENDPOINT=http://jaeger-all-in-one:14250
- OTEL_METRICS_EXPORTER=none
- OTEL_SERVICE_NAME=datahub-mae-consumer
ports:
- "4318"

datahub-mce-consumer:
environment:
- ENABLE_PROMETHEUS=true
- ENABLE_OTEL=true
- OTEL_TRACES_EXPORTER=jaeger
- OTEL_EXPORTER_JAEGER_ENDPOINT=http://jaeger-all-in-one:14250
- OTEL_METRICS_EXPORTER=none
- OTEL_SERVICE_NAME=datahub-mce-consumer
ports:
- "4318"
43 changes: 43 additions & 0 deletions docker/monitoring/docker-compose.monitoring.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
---
version: '3.5'
services:
datahub-gms:
environment:
- ENABLE_PROMETHEUS=true
- ENABLE_OTEL=true
- OTEL_TRACES_EXPORTER=jaeger
- OTEL_EXPORTER_JAEGER_ENDPOINT=http://jaeger-all-in-one:14250
- OTEL_METRICS_EXPORTER=none
- OTEL_SERVICE_NAME=datahub-gms
ports:
- "4318"

# Jaeger
jaeger-all-in-one:
image: jaegertracing/all-in-one:latest
ports:
- "16686:16686"
- "14268"
- "14250"

prometheus:
container_name: prometheus
image: prom/prometheus:latest
volumes:
- ./monitoring/prometheus.yaml:/etc/prometheus/prometheus.yml
ports:
- "9089:9090"

grafana:
image: grafana/grafana:latest
ports:
- "3001:3000"
volumes:
- grafana-storage:/var/lib/grafana
- ./monitoring/grafana/datasources:/etc/grafana/provisioning/datasources
- ./monitoring/grafana/dashboards:/etc/grafana/provisioning/dashboards
depends_on:
- prometheus

volumes:
grafana-storage:
9 changes: 9 additions & 0 deletions docker/monitoring/grafana/dashboards/dashboard.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: 1

providers:
- name: Default # A uniquely identifiable name for the provider
folder: Services # The folder where to place the dashboards
type: file
allowUiUpdates: true
options:
path: /etc/grafana/provisioning/dashboards
Loading

0 comments on commit 8747fbe

Please sign in to comment.