From 28545fd618b874fef19c2f50d4c3a3cf21b3a7c3 Mon Sep 17 00:00:00 2001 From: Ricardo Campos Date: Tue, 9 Jan 2024 14:36:33 -0300 Subject: [PATCH] feat: add files for saving service api logs into files (#169) * feat: add files for saving service api logs into files issue 269 on jira * feat: add missing values for init files * feat: add fluentbit deployment, logging at console only * feat: fix openshift deployment file paths * feat: file mapping with wrong values --- .github/workflows/merge.yml | 34 +- .github/workflows/pr-open.yml | 12 +- backend/openshift.deploy.yml | 13 + backend/pom.xml | 3 + .../src/main/resources/application.properties | 7 +- backend/src/main/resources/logback-spring.xml | 70 ++++ common/openshift.fluentbit.yml | 344 ++++++++++++++++++ common/openshift.init.yml | 23 ++ 8 files changed, 499 insertions(+), 7 deletions(-) create mode 100644 backend/src/main/resources/logback-spring.xml create mode 100644 common/openshift.fluentbit.yml diff --git a/.github/workflows/merge.yml b/.github/workflows/merge.yml index a35deff3..e42e284e 100644 --- a/.github/workflows/merge.yml +++ b/.github/workflows/merge.yml @@ -49,6 +49,11 @@ jobs: overwrite: false parameters: -p ZONE=test -p NAME=${{ github.event.repository.name }} + -p AWS_KINESIS_STREAM='${{ secrets.AWS_KINESIS_STREAM }}' + -p AWS_KINESIS_ROLE_ARN='${{ secrets.AWS_KINESIS_ROLE_ARN }}' + -p AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }} + -p AWS_ACCESS_KEY_SECRET='${{ secrets.AWS_ACCESS_KEY_SECRET }}' + triggers: ('common/' 'database/' 'backend/' 'frontend/') deploys-test: name: TEST Deployments @@ -59,19 +64,27 @@ jobs: issues: write strategy: matrix: - name: [database, backend, frontend] + name: [database, backend, frontend, fluentbit] include: - name: database overwrite: false + file: database/openshift.deploy.yml - name: backend verification_path: actuator/health + file: backend/openshift.deploy.yml + parameters: + -p RESULTS_ENV_OPENSEARCH=test - name: frontend + file: frontend/openshift.deploy.yml parameters: -p VITE_USER_POOLS_WEB_CLIENT_ID=${{ vars.VITE_USER_POOLS_WEB_CLIENT_ID }} + - name: fluentbit + file: common/openshift.fluentbit.yml + overwrite: true steps: - uses: bcgov-nr/action-deployer-openshift@v2.0.0 with: - file: ${{ matrix.name }}/openshift.deploy.yml + file: ${{ matrix.file }} oc_namespace: ${{ vars.OC_NAMESPACE }} oc_server: ${{ vars.OC_SERVER }} oc_token: ${{ secrets.OC_TOKEN }} @@ -98,6 +111,11 @@ jobs: overwrite: false parameters: -p ZONE=prod -p NAME=${{ github.event.repository.name }} + -p AWS_KINESIS_STREAM='${{ secrets.AWS_KINESIS_STREAM }}' + -p AWS_KINESIS_ROLE_ARN='${{ secrets.AWS_KINESIS_ROLE_ARN }}' + -p AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }} + -p AWS_ACCESS_KEY_SECRET='${{ secrets.AWS_ACCESS_KEY_SECRET }}' + triggers: ('common/' 'database/' 'backend/' 'frontend/') image-promotions: name: Promote images to PROD @@ -123,19 +141,27 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - name: [database, backend, frontend] + name: [database, backend, frontend, fluentbit] include: - name: database overwrite: false + file: database/openshift.deploy.yml - name: backend verification_path: actuator/health + file: backend/openshift.deploy.yml + parameters: + -p RESULTS_ENV_OPENSEARCH=production - name: frontend + file: frontend/openshift.deploy.yml parameters: -p VITE_USER_POOLS_WEB_CLIENT_ID=${{ vars.VITE_USER_POOLS_WEB_CLIENT_ID }} + - name: fluentbit + file: common/openshift.fluentbit.yml + overwrite: true steps: - uses: bcgov-nr/action-deployer-openshift@v2.0.0 with: - file: ${{ matrix.name }}/openshift.deploy.yml + file: ${{ matrix.file }} oc_namespace: ${{ vars.OC_NAMESPACE }} oc_server: ${{ vars.OC_SERVER }} oc_token: ${{ secrets.OC_TOKEN }} diff --git a/.github/workflows/pr-open.yml b/.github/workflows/pr-open.yml index f2147e2a..753da2ca 100644 --- a/.github/workflows/pr-open.yml +++ b/.github/workflows/pr-open.yml @@ -54,6 +54,10 @@ jobs: parameters: -p ZONE=${{ steps.tag.outputs.tag }} -p NAME=${{ github.event.repository.name }} + -p AWS_KINESIS_STREAM='${{ secrets.AWS_KINESIS_STREAM }}' + -p AWS_KINESIS_ROLE_ARN='${{ secrets.AWS_KINESIS_ROLE_ARN }}' + -p AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }} + -p AWS_ACCESS_KEY_SECRET='${{ secrets.AWS_ACCESS_KEY_SECRET }}' triggers: ('common/' 'database/' 'backend/' 'frontend/') builds: @@ -90,7 +94,7 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - name: [database, backend, frontend] + name: [database, backend, frontend, fluentbit] include: - name: database file: database/openshift.deploy.yml @@ -112,10 +116,14 @@ jobs: -p VITE_USER_POOLS_WEB_CLIENT_ID=${{ vars.VITE_USER_POOLS_WEB_CLIENT_ID }} -p MIN_REPLICAS=1 -p MAX_REPLICAS=2 + - name: fluentbit + file: common/openshift.fluentbit.yml + overwrite: true + triggers: ('common/' 'database/' 'backend/' 'frontend/') steps: - uses: bcgov-nr/action-deployer-openshift@v2.0.0 with: - file: ${{ matrix.name }}/openshift.deploy.yml + file: ${{ matrix.file }} oc_namespace: ${{ vars.OC_NAMESPACE }} oc_server: ${{ vars.OC_SERVER }} oc_token: ${{ secrets.OC_TOKEN }} diff --git a/backend/openshift.deploy.yml b/backend/openshift.deploy.yml index fa45a1cc..7db7830d 100644 --- a/backend/openshift.deploy.yml +++ b/backend/openshift.deploy.yml @@ -44,6 +44,10 @@ parameters: - name: DB_POOL_MAX_LIFETIME description: Maximum lifetime of a connection in the pool. value: "1800000" + - name: RESULTS_ENV_OPENSEARCH + description: Environment name for OpenSearch. # One of: development, test, production + required: true + value: development objects: - apiVersion: v1 kind: ImageStream @@ -93,6 +97,9 @@ objects: - image: "${NAME}-${ZONE}-${COMPONENT}:${IMAGE_TAG}" imagePullPolicy: Always name: "${NAME}" + volumeMounts: + - name: ${NAME}-${ZONE}-fluentbit-logs + mountPath: /logs env: - name: POSTGRES_HOST value: ${NAME}-${ZONE}-database @@ -117,6 +124,8 @@ objects: value: ${DB_POOL_IDLE_TIMEOUT} - name: DB_POOL_MAX_LIFETIME value: ${DB_POOL_MAX_LIFETIME} + - name: RESULTS_ENV_OPENSEARCH + value: ${RESULTS_ENV_OPENSEARCH} ports: - containerPort: 8080 protocol: TCP @@ -147,6 +156,10 @@ objects: initialDelaySeconds: 60 periodSeconds: 30 timeoutSeconds: 5 + volumes: + - name: ${NAME}-${ZONE}-fluentbit-logs + persistentVolumeClaim: + claimName: ${NAME}-${ZONE}-fluentbit-logs - apiVersion: v1 kind: Service metadata: diff --git a/backend/pom.xml b/backend/pom.xml index 43a0e0a3..27dedfe8 100644 --- a/backend/pom.xml +++ b/backend/pom.xml @@ -212,6 +212,9 @@ lombok + + dev + diff --git a/backend/src/main/resources/application.properties b/backend/src/main/resources/application.properties index 12a2b9b0..8ed81ba7 100644 --- a/backend/src/main/resources/application.properties +++ b/backend/src/main/resources/application.properties @@ -1,6 +1,6 @@ # General application logging.level.ca.bc.gov.restapi.results = ${LOGGING_LEVEL:INFO} -spring.application.name = results-backend-api +spring.application.name = results-api server.error.include-message=always server.port = ${SERVER_PORT:8080} @@ -31,3 +31,8 @@ spring.jpa.show-sql = true spring.jpa.hibernate.ddl-auto = update spring.jpa.defer-datasource-initialization=true spring.sql.init.mode=always + +# OpenSearch settings +nr-results-ecs-version = 8.9 +nr-results-backend-env-opensearch = ${RESULTS_ENV_OPENSEARCH:development} +nr-results-team-email-address = Team.Silva@gov.bc.ca diff --git a/backend/src/main/resources/logback-spring.xml b/backend/src/main/resources/logback-spring.xml new file mode 100644 index 00000000..384463e6 --- /dev/null +++ b/backend/src/main/resources/logback-spring.xml @@ -0,0 +1,70 @@ + + + + + + + + + + + + + + + + + + + %date{yyyy-MM-dd HH:mm:ss.SSS} %highlight(%-5level) [%blue(%t)] %yellow(%c): %msg%n%throwable + + + + + + ${LOGS}/results-api.log + + {%nopex"labels.project":"${applicationName}","service.environment":"${serviceEnv}","@timestamp":"%date{yyyy-MM-dd HH:mm:ss.SSS}","log.level":"%p","log.logger":"%logger{36}","message":"%replace(%msg){'\"','\\"'}","ecs.version":"${ecsVersion}","event.category":"web","event.dataset":"application.log.utc","event.ingested":"diagnostic","event.kind":"event","organization.id":"${teamEmail}","organization.name":"TeamSILVA"}%n + + + + ${LOGS}/archived/results-api-%d{yyyy-MM-dd}.%i.log + + 10MB + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/common/openshift.fluentbit.yml b/common/openshift.fluentbit.yml new file mode 100644 index 00000000..9d6a851b --- /dev/null +++ b/common/openshift.fluentbit.yml @@ -0,0 +1,344 @@ +apiVersion: template.openshift.io/v1 +kind: Template +labels: + app: ${NAME}-${ZONE} +parameters: + - name: NAME + description: Product name + value: nr-silva + - name: COMPONENT + description: Component name + value: fluentbit + - name: ZONE + description: Deployment zone, e.g. pr-### or prod + required: true + - name: AWS_KINESIS_STREAM + description: AWS Kinesis Stream identifier + required: false + - name: AWS_KINESIS_ROLE_ARN + description: AWS OpenSearch/Kinesis Resource Name + required: false + - name: FLUENT_CONF_HOME + description: FluentBit configuration home + value: "/fluent-bit/etc" + - name: FLUENT_VERSION + description: FluentBit version (docker tag). You may include a debug version. + value: "2.1" + - name: LOGGING_CPU_LIMIT + description: Limit Peak CPU per pod (in millicores ex. 1000m) + displayName: CPU Limit + value: 100m + - name: LOGGING_CPU_REQUEST + description: Requested CPU per pod (in millicores ex. 500m) + displayName: CPU Request + value: 10m + - name: LOGGING_MEMORY_LIMIT + description: Limit Peak Memory per pod (in gigabytes Gi or megabytes Mi ex. 2Gi) + displayName: Memory Limit + value: 64Mi + - name: LOGGING_MEMORY_REQUEST + description: Requested Memory per pod (in gigabytes Gi or megabytes Mi ex. 500Mi) + displayName: Memory Request + value: 16Mi +objects: + - kind: DeploymentConfig + apiVersion: v1 + metadata: + labels: + app: ${NAME}-${ZONE} + name: ${NAME}-${ZONE}-${COMPONENT} + spec: + replicas: 1 + selector: + deploymentconfig: ${NAME}-${ZONE}-${COMPONENT} + strategy: + type: Rolling + template: + metadata: + labels: + app: ${NAME}-${ZONE} + deploymentconfig: ${NAME}-${ZONE}-${COMPONENT} + spec: + containers: + - name: ${NAME} + image: docker.io/fluent/fluent-bit:${FLUENT_VERSION} + imagePullPolicy: Always + ports: + - containerPort: 2020 + name: metrics + protocol: TCP + - containerPort: 80 + name: http-plugin + protocol: TCP + resources: + requests: + cpu: "${LOGGING_CPU_REQUEST}" + memory: "${LOGGING_MEMORY_REQUEST}" + limits: + cpu: "${LOGGING_CPU_LIMIT}" + memory: "${LOGGING_MEMORY_LIMIT}" + env: + - name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: ${NAME}-${ZONE}-fluentbit + key: aws-access-key-id + - name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: ${NAME}-${ZONE}-fluentbit + key: aws-access-key-secret + - name: STREAM_NAME + valueFrom: + secretKeyRef: + name: ${NAME}-${ZONE}-fluentbit + key: aws-kinesis-stream + - name: ROLE_ARN + valueFrom: + secretKeyRef: + name: ${NAME}-${ZONE}-fluentbit + key: aws-kinesis-role-arn + - name: FLUENT_CONF_HOME + value: ${FLUENT_CONF_HOME} + - name: FLUENT_VERSION + value: ${FLUENT_VERSION} + - name: AGENT_NAME + value: ${NAME}-${ZONE} + volumeMounts: + - name: ${NAME}-${ZONE}-${COMPONENT}-logs + mountPath: /logs + - name: ${NAME}-${ZONE}-${COMPONENT}-configs + mountPath: ${FLUENT_CONF_HOME} + # TLS cert for connecting to fluentd (enable for TLS) + # - mountPath: /fluent-bit/ssl + # name: fluent-bit-tls + volumes: + - name: ${NAME}-${ZONE}-${COMPONENT}-logs + persistentVolumeClaim: + claimName: ${NAME}-${ZONE}-${COMPONENT}-logs + - name: ${NAME}-${ZONE}-${COMPONENT}-configs + configMap: + name: ${NAME}-${ZONE}-${COMPONENT}-configs + items: + - key: filters.conf + path: filters.conf + - key: fluent-bit.conf + path: fluent-bit.conf + - key: generic_json_parsers.conf + path: generic_json_parsers.conf + - key: host_metadata.lua + path: host_metadata.lua + - key: outputs.conf + path: outputs.conf + - key: parsers.conf + path: parsers.conf + - key: results_filter_filters.conf + path: results/filter/filters.conf + - key: results_input_inputs.conf + path: results/input/inputs.conf + - key: results_parser_parsers.conf + path: results/parser/parsers.conf + - key: timestamp.lua + path: timestamp.lua + defaultMode: 0644 + - kind: PersistentVolumeClaim + apiVersion: v1 + metadata: + labels: + app: ${NAME}-${ZONE} + name: ${NAME}-${ZONE}-${COMPONENT}-logs + spec: + accessModes: + - ReadWriteMany + resources: + requests: + storage: "50Mi" + storageClassName: netapp-file-standard + - kind: ConfigMap + apiVersion: v1 + metadata: + name: ${NAME}-${ZONE}-${COMPONENT}-configs + data: + filters.conf: | + [FILTER] + Name modify + Match * + Add agent.type fluentbit + Add agent.version ${FLUENT_VERSION} + Add agent.name ${AGENT_NAME} + Add ecs.version 8.9 + Rename event_sequence event.sequence + Rename log_file_path log.file.path + + [FILTER] + Name lua + Match results + script ${FLUENT_CONF_HOME}/timestamp.lua + time_as_table True + call append_event_created + + + # There is a bug when resolving environment variables with spaces in the value :( + # So, we have to use Lua script for now + # Reference: https://github.com/fluent/fluent-bit/issues/1225 + + [FILTER] + Name lua + Match * + script ${FLUENT_CONF_HOME}/host_metadata.lua + time_as_table True + call add_host_metadata + + fluent-bit.conf: | + [SERVICE] + Log_Level info + Parsers_File parsers.conf + + @INCLUDE results/input/inputs.conf + @INCLUDE results/filter/filters.conf + @INCLUDE filters.conf + @INCLUDE outputs.conf + + generic_json_parsers.conf: | + [PARSER] + Name generic_json + Format json + + host_metadata.lua: | + -- Space delimited values to array + function sdv2array(s) + delimiter = "%S+" + result = {}; + for match in string.gmatch(s, delimiter) do + table.insert(result, match); + end + return result; + end + + function isempty(s) + return s == nil or s == '' + end + + function copy(obj) + if type(obj) ~= 'table' then return obj end + local res = {} + for k, v in pairs(obj) do res[copy(k)] = copy(v) end + return res + end + + function remove_nil_fields(tag, timestamp, record) + return 2, timestamp, record + end + + + function add_host_metadata(tag, timestamp, record) + new_record = record + if isempty(new_record["host"]) then + new_record["host"] = {} + end + local host = new_record["host"] + if isempty(host["os"]) then + host["os"] = {} + end + host["os"]["name"] = os.getenv("HOST_OS_NAME") + host["os"]["type"] = os.getenv("HOST_OS_TYPE") + host["os"]["family"] = os.getenv("HOST_OS_FAMILY") + host["os"]["kernel"] = os.getenv("HOST_OS_KERNEL") + host["os"]["full"] = os.getenv("HOST_OS_FULL") + host["os"]["version"] = os.getenv("HOST_OS_VERSION") + host["ip"] = os.getenv("HOST_IP") + host["mac"] = os.getenv("HOST_MAC") + if os.getenv("HOSTNAME") ~= nil then + host["name"] = string.lower(os.getenv("HOSTNAME")) + end + if os.getenv("HOST_HOSTNAME") ~= nil then + host["hostname"] = string.lower(os.getenv("HOST_HOSTNAME")) + end + host["domain"] = os.getenv("HOST_DOMAIN") + host["architecture"] = os.getenv("HOST_ARCH") + + if not(isempty(host["ip"])) then + host["ip"] = sdv2array(host["ip"]) + else + host["ip"] = nil + end + + if not(isempty(host["mac"])) then + host["mac"] = sdv2array(host["mac"]) + else + host["mac"] = nil + end + + if not(isempty(host["name"])) then + host["name"] = sdv2array(host["name"]) + else + host["name"] = nil + end + + if not(isempty(host["domain"])) then + host["domain"] = sdv2array(host["domain"]) + else + host["domain"] = nil + end + + return 2, timestamp, new_record + end + + outputs.conf: | + [OUTPUT] + Name stdout + Match * + Retry_Limit 3 + + parsers.conf: | + @INCLUDE generic_json_parsers.conf + @INCLUDE results/parser/parsers.conf + + results_filter_filters.conf: | + [FILTER] + Name lua + Match results.* + script ${FLUENT_CONF_HOME}/timestamp.lua + time_as_table True + call append_timestamp + + [FILTER] + Name modify + Match results.* + Add service.name results + Add service.type results_api + Add @metadata.keyAsPath true + + results_input_inputs.conf: | + [INPUT] + Name tail + Tag results.log + Buffer_Max_Size 1024k + Parser results.json + Path /logs/results-api.log + Path_Key log_file_path + Offset_Key event_sequence + DB /logs/fluent-bit-logs.db + Read_from_Head True + Refresh_Interval 15 + + results_parser_parsers.conf: | + [PARSER] + Name results.json + Match * + Format json + Time_Key @timestamp + Time_Format %Y-%m-%d %H:%M:%S.%L + + timestamp.lua: | + function append_event_created(tag, timestamp, record) + new_record = record + new_record["event.created"] = (os.date("!%Y-%m-%dT%H:%M:%S", timestamp["sec"]) .. '.' .. math.floor(timestamp["nsec"] / 1000000) .. 'Z') + return 2, timestamp, new_record + end + + function append_timestamp(tag, timestamp, record) + new_record = record + new_record["@timestamp"] = (os.date("!%Y-%m-%dT%H:%M:%S", timestamp["sec"]) .. '.' .. math.floor(timestamp["nsec"] / 1000000) .. 'Z') + return 2, timestamp, new_record + end diff --git a/common/openshift.init.yml b/common/openshift.init.yml index 1168f410..49e4155b 100644 --- a/common/openshift.init.yml +++ b/common/openshift.init.yml @@ -14,6 +14,18 @@ parameters: description: Password for the PostgreSQL connection user. from: "[a-zA-Z0-9]{16}" generate: expression + - name: AWS_ACCESS_KEY_ID + description: AWS Access Key ID + value: default-value-for-now + - name: AWS_ACCESS_KEY_SECRET + description: AWS Access Key Secret + value: default-value-for-now + - name: AWS_KINESIS_STREAM + description: AWS Kinesis stream name + value: default-value-for-now + - name: AWS_KINESIS_ROLE_ARN + description: AWS Kinesis Role ARN + value: default-value-for-now objects: - apiVersion: v1 kind: Secret @@ -25,6 +37,17 @@ objects: database-name: ${NAME} database-password: ${DB_PASSWORD} database-user: ${NAME} + - apiVersion: v1 + kind: Secret + metadata: + name: ${NAME}-${ZONE}-fluentbit + labels: + app: ${NAME}-${ZONE} + stringData: + aws-access-key-id: ${AWS_ACCESS_KEY_ID} + aws-access-key-secret: ${AWS_ACCESS_KEY_SECRET} + aws-kinesis-stream: ${AWS_KINESIS_STREAM} + aws-kinesis-role-arn: ${AWS_KINESIS_ROLE_ARN} - apiVersion: networking.k8s.io/v1 kind: NetworkPolicy metadata: