Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add post prod deploy smoke test and alert #7057

Merged
merged 24 commits into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions .github/actions/post-deploy-smoke-test/action.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
name: Smoke test post deploy
description: Invoke a script that visits a deploy smoke check page that displays whether the backend / db are healthy.
inputs:
base_domain_name:
description: The domain where the application is deployed (e.g. "simplereport.gov" or "test.simplereport.gov")
required: true
runs:
using: composite
steps:
- name: create env file
shell: bash
working-directory: frontend
run: |
touch .env
echo REACT_APP_BASE_URL=${{ inputs.base_domain_name }}>> .env.production.local
- name: Run smoke test script
shell: bash
working-directory: frontend
run: yarn smoke:deploy:ci
2 changes: 2 additions & 0 deletions .github/workflows/deployProd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,5 @@ jobs:
:siren-gif: Deploy to ${{ env.DEPLOY_ENV }} failed. ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} :siren-gif:
webhook_url: ${{ secrets.SR_ALERTS_SLACK_WEBHOOK_URL }}
user_map: $${{ secrets.SR_ALERTS_GITHUB_SLACK_MAP }}

# a post-prod health check workflow is defined in smokeTestDeployProd. See the Alert response wiki page for more details
48 changes: 48 additions & 0 deletions .github/workflows/smokeTestDeployProd.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
name: Smoke test deploy Prod
run-name: Smoke test the deploy for prod by @${{ github.actor }}

on:
workflow_run:
workflows: [ "Deploy Prod" ]
types:
- completed

env:
NODE_VERSION: 18

jobs:
smoke-test-front-and-back-end:
runs-on: ubuntu-latest
environment: Production
steps:
- uses: actions/checkout@v4
- name: Use Node.js
uses: actions/setup-node@v3
with:
node-version: ${{env.NODE_VERSION}}
- name: Cache yarn
uses: actions/[email protected]
with:
path: ~/.cache/yarn
key: ${{ runner.os }}-yarn-${{ hashFiles('**/yarn.lock') }}
- name: Set up dependencies
working-directory: frontend
run: yarn install --prefer-offline
- name: Smoke test the env
uses: ./.github/actions/post-deploy-smoke-test
with:
base_domain_name: ${{ vars.BASE_DOMAIN_NAME }}
slack_alert:
runs-on: ubuntu-latest
if: failure()
needs: [ smoke-test-front-and-back-end ]
steps:
- uses: actions/checkout@v4
- name: Send alert to Slack
uses: ./.github/actions/slack-message
with:
username: ${{ github.actor }}
description: |
:siren-gif: Post-deploy smoke test couldn't verify that the frontend is talking to the backend. ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} :siren-gif:
webhook_url: ${{ secrets.SR_ALERTS_SLACK_WEBHOOK_URL }}
user_map: $${{ secrets.SR_ALERTS_GITHUB_SLACK_MAP }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package gov.cdc.usds.simplereport.api.heathcheck;

import com.okta.sdk.resource.client.ApiException;
import gov.cdc.usds.simplereport.db.repository.FeatureFlagRepository;
import gov.cdc.usds.simplereport.idp.repository.OktaRepository;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.hibernate.exception.JDBCConnectionException;
import org.springframework.boot.actuate.health.Health;
import org.springframework.boot.actuate.health.HealthIndicator;
import org.springframework.stereotype.Component;

@Component("backend-and-db-smoke-test")
@Slf4j
@RequiredArgsConstructor
public class BackendAndDatabaseHealthIndicator implements HealthIndicator {
private final FeatureFlagRepository _ffRepo;
private final OktaRepository _oktaRepo;
public static final String ACTIVE_LITERAL = "ACTIVE";

@Override
public Health health() {
try {
_ffRepo.findAll();
String oktaStatus = _oktaRepo.getApplicationStatusForHealthCheck();

if (!ACTIVE_LITERAL.equals(oktaStatus)) {
log.info("Okta status didn't return ACTIVE, instead returned " + oktaStatus);
return Health.down().build();
}

return Health.up().build();
} catch (JDBCConnectionException e) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wasn't sure if there was a better error to throw for "database didn't connect". If folks have suggestions happy to change this!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I forget if I mentioned this or not while we were pairing, but my thought was to grab the most recent entry from the databasechangelog table and report something from that table (maybe the md5checksum?) back to the frontend that might be useful to know. Ideally some value that is useful but also definitely not sensitive information.

cc @alismx for the idea of having the checksum be visible.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Happy to change the db call to the checksum table, but if we want to pass that info back to the frontend, we'll need to configure the show-details flag on the health check config to always since we'd be hitting it unauthed. Those details include info about the filepath of the application, state of DB, etc that someone would theoretically be able to see if they found that endpoint.

Don't think there's anything too sensitive here, but since passing data back to the frontend was more of a nice to have, I decided to lean against flipping the flag. If we feel strongly that the checksum is worth having on the frontend / we get a security consult that the extra info isn't an issue, happy to make the relevant changes.

Let me know what you think!

return Health.down().build();
// Okta API call errored
} catch (ApiException e) {
log.info(e.getMessage());
return Health.down().build();
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package gov.cdc.usds.simplereport.config;

import com.okta.spring.boot.oauth.Okta;
import gov.cdc.usds.simplereport.api.heathcheck.BackendAndDatabaseHealthIndicator;
import gov.cdc.usds.simplereport.service.model.IdentityAttributes;
import gov.cdc.usds.simplereport.service.model.IdentitySupplier;
import lombok.extern.slf4j.Slf4j;
Expand Down Expand Up @@ -57,6 +58,8 @@ public SecurityFilterChain filterChain(HttpSecurity http) throws Exception {
.permitAll()
.requestMatchers(EndpointRequest.to(InfoEndpoint.class))
.permitAll()
.requestMatchers(EndpointRequest.to(BackendAndDatabaseHealthIndicator.class))
.permitAll()
// Patient experience authorization is handled in PatientExperienceController
// If this configuration changes, please update the documentation on both sides
.requestMatchers(HttpMethod.POST, WebConfiguration.PATIENT_EXPERIENCE)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package gov.cdc.usds.simplereport.idp.repository;

import static gov.cdc.usds.simplereport.api.heathcheck.BackendAndDatabaseHealthIndicator.ACTIVE_LITERAL;

import com.okta.sdk.resource.model.UserStatus;
import gov.cdc.usds.simplereport.api.CurrentTenantDataAccessContextHolder;
import gov.cdc.usds.simplereport.api.model.errors.ConflictingUserException;
Expand Down Expand Up @@ -431,4 +433,9 @@ public Integer getUsersInSingleFacility(Facility facility) {

return accessCount;
}

@Override
public String getApplicationStatusForHealthCheck() {
return ACTIVE_LITERAL;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,11 @@ public PartialOktaUser findUser(String username) {
.build();
}

@Override
public String getApplicationStatusForHealthCheck() {
return app.getStatus().toString();
}

private Optional<OrganizationRoleClaims> getOrganizationRoleClaimsFromAuthorities(
Collection<String> authorities) {
List<OrganizationRoleClaims> claims = extractor.convertClaims(authorities);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,6 @@ List<String> updateUserPrivilegesAndGroupAccess(
Integer getUsersInSingleFacility(Facility facility);

PartialOktaUser findUser(String username);

String getApplicationStatusForHealthCheck();
}
1 change: 1 addition & 0 deletions backend/src/main/resources/application.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ management:
endpoint.health.probes.enabled: true
endpoint.info.enabled: true
endpoints.web.exposure.include: health, info
endpoint.health.show-components: always
Copy link
Contributor Author

@fzhao99 fzhao99 Dec 12, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Need this tweak since the new actuator endpoint is a component of the health actuator. Without it, the API endpoint (very frustratingly) doesn't load

https://www.baeldung.com/spring-boot-health-indicators#customhealthindicators

okta:
oauth2:
issuer: https://hhs-prime.okta.com/oauth2/default
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package gov.cdc.usds.simplereport.api.healthcheck;

import static gov.cdc.usds.simplereport.api.heathcheck.BackendAndDatabaseHealthIndicator.ACTIVE_LITERAL;
import static org.assertj.core.api.Assertions.assertThat;
import static org.mockito.Mockito.when;

import gov.cdc.usds.simplereport.api.heathcheck.BackendAndDatabaseHealthIndicator;
import gov.cdc.usds.simplereport.db.repository.BaseRepositoryTest;
import gov.cdc.usds.simplereport.db.repository.FeatureFlagRepository;
import gov.cdc.usds.simplereport.idp.repository.OktaRepository;
import java.sql.SQLException;
import java.util.List;
import lombok.RequiredArgsConstructor;
import org.hibernate.exception.JDBCConnectionException;
import org.junit.jupiter.api.Test;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.actuate.health.Health;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.boot.test.mock.mockito.SpyBean;

@RequiredArgsConstructor
@EnableConfigurationProperties
class BackendAndDatabaseHealthIndicatorTest extends BaseRepositoryTest {

@SpyBean private FeatureFlagRepository mockFeatureFlagRepo;
@SpyBean private OktaRepository mockOktaRepo;

@Autowired private BackendAndDatabaseHealthIndicator indicator;

@Test
void health_succeedsWhenReposDoesntThrow() {
when(mockFeatureFlagRepo.findAll()).thenReturn(List.of());
when(mockOktaRepo.getApplicationStatusForHealthCheck()).thenReturn(ACTIVE_LITERAL);

assertThat(indicator.health()).isEqualTo(Health.up().build());
}

@Test
void health_failsWhenFeatureFlagRepoDoesntThrow() {
JDBCConnectionException dbConnectionException =
new JDBCConnectionException(
"connection issue", new SQLException("some reason", "some state"));
when(mockFeatureFlagRepo.findAll()).thenThrow(dbConnectionException);
assertThat(indicator.health()).isEqualTo(Health.down().build());
}

@Test
void health_failsWhenOktaRepoDoesntReturnActive() {
when(mockOktaRepo.getApplicationStatusForHealthCheck()).thenReturn("INACTIVE");
assertThat(indicator.health()).isEqualTo(Health.down().build());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import gov.cdc.usds.simplereport.api.CurrentOrganizationRolesContextHolder;
import gov.cdc.usds.simplereport.api.CurrentTenantDataAccessContextHolder;
import gov.cdc.usds.simplereport.api.WebhookContextHolder;
import gov.cdc.usds.simplereport.api.heathcheck.BackendAndDatabaseHealthIndicator;
import gov.cdc.usds.simplereport.api.pxp.CurrentPatientContextHolder;
import gov.cdc.usds.simplereport.config.AuditingConfig;
import gov.cdc.usds.simplereport.config.AuthorizationProperties;
Expand Down Expand Up @@ -100,7 +101,8 @@
CurrentTenantDataAccessContextHolder.class,
WebhookContextHolder.class,
TenantDataAccessService.class,
PatientSelfRegistrationLinkService.class
PatientSelfRegistrationLinkService.class,
BackendAndDatabaseHealthIndicator.class
})
@EnableConfigurationProperties({InitialSetupProperties.class, AuthorizationProperties.class})
public class SliceTestConfiguration {
Expand Down
35 changes: 35 additions & 0 deletions frontend/deploy-smoke.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Script that does a simple Selenium scrape of
// - A frontend page with a simple status message that hits a health check backend
// endpoint which does a simple ping to a non-sensitive DB table to verify
// all the connections are good.
// https://github.com/CDCgov/prime-simplereport/pull/7057

require("dotenv").config();
let { Builder } = require("selenium-webdriver");
const Chrome = require("selenium-webdriver/chrome");

const appUrl = process.env.REACT_APP_BASE_URL.includes("localhost")
? `${process.env.REACT_APP_BASE_URL}/health/deploy-smoke-test`
: `${process.env.REACT_APP_BASE_URL}/app/health/deploy-smoke-test`;

console.log(`Running smoke test for ${appUrl}`);
const options = new Chrome.Options();
const driver = new Builder()
.forBrowser("chrome")
.setChromeOptions(options.addArguments("--headless=new"))
.build();
driver
.navigate()
.to(`${appUrl}`)
.then(() => {
let value = driver.findElement({ id: "root" }).getText();
return value;
})
.then((value) => {
driver.quit();
return value;
})
.then((value) => {
if (value.includes("success")) process.exit(0);
process.exit(1);
});
6 changes: 5 additions & 1 deletion frontend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@
"storybook": "yarn create-storybook-public && SASS_PATH=$(cd ./node_modules && pwd):$(cd ./node_modules/@uswds && pwd):$(cd ./node_modules/@uswds/uswds/packages && pwd):$(cd ./src/scss && pwd) storybook dev -p 6006 -s ../storybook_public",
"build-storybook": "yarn create-storybook-public && REACT_APP_BACKEND_URL=http://localhost:8080 SASS_PATH=$(cd ./node_modules && pwd):$(cd ./node_modules/@uswds && pwd):$(cd ./node_modules/@uswds/uswds/packages && pwd):$(cd ./src/scss && pwd) storybook build -s storybook_public",
"maintenance:start": "[ -z \"$MAINTENANCE_MESSAGE\" ] && echo \"MAINTENANCE_MESSAGE must be set!\" || (echo $MAINTENANCE_MESSAGE > maintenance.json && yarn maintenance:deploy && rm maintenance.json)",
"maintenance:deploy": "[ -z \"$MAINTENANCE_ENV\" ] && echo \"MAINTENANCE_ENV must be set!\" || az storage blob upload -f maintenance.json -n maintenance.json -c '$web' --account-name simplereport${MAINTENANCE_ENV}app --overwrite"
"maintenance:deploy": "[ -z \"$MAINTENANCE_ENV\" ] && echo \"MAINTENANCE_ENV must be set!\" || az storage blob upload -f maintenance.json -n maintenance.json -c '$web' --account-name simplereport${MAINTENANCE_ENV}app --overwrite",
"smoke:deploy:local": "node -r dotenv/config deploy-smoke.js dotenv_config_path=.env.local",
"smoke:deploy:ci": "node -r dotenv/config deploy-smoke.js dotenv_config_path=.env.production.local"
},
"prettier": {
"singleQuote": false
Expand Down Expand Up @@ -205,6 +207,7 @@
"chromatic": "^6.10.2",
"dayjs": "^1.10.7",
"depcheck": "^1.4.3",
"dotenv": "^16.3.1",
"eslint-config-prettier": "^8.8.0",
"eslint-plugin-graphql": "^4.0.0",
"eslint-plugin-import": "^2.29.0",
Expand All @@ -224,6 +227,7 @@
"prettier": "^2.8.4",
"redux-mock-store": "^1.5.4",
"sass": "^1.63.6",
"selenium-webdriver": "^4.16.0",
"storybook": "^7.5.2",
"storybook-addon-apollo-client": "^5.0.0",
"stylelint": "^13.13.1",
Expand Down
30 changes: 30 additions & 0 deletions frontend/src/app/DeploySmokeTest.test.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import { render, screen, waitFor } from "@testing-library/react";
import { FetchMock } from "jest-fetch-mock";

import DeploySmokeTest from "./DeploySmokeTest";

describe("DeploySmokeTest", () => {
beforeEach(() => {
(fetch as FetchMock).resetMocks();
});

it("renders success when returned from the API endpoint", async () => {
(fetch as FetchMock).mockResponseOnce(JSON.stringify({ status: "UP" }));

render(<DeploySmokeTest />);
await waitFor(() =>
expect(screen.queryByText("Status loading...")).not.toBeInTheDocument()
);
expect(screen.getByText("Status returned success :)"));
});

it("renders failure when returned from the API endpoint", async () => {
(fetch as FetchMock).mockResponseOnce(JSON.stringify({ status: "DOWN" }));

render(<DeploySmokeTest />);
await waitFor(() =>
expect(screen.queryByText("Status loading...")).not.toBeInTheDocument()
);
expect(screen.getByText("Status returned failure :("));
});
});
27 changes: 27 additions & 0 deletions frontend/src/app/DeploySmokeTest.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import { useEffect, useState } from "react";

import FetchClient from "./utils/api";

const api = new FetchClient(undefined, { mode: "cors" });
const DeploySmokeTest = (): JSX.Element => {
const [success, setSuccess] = useState<boolean>();
useEffect(() => {
api
.getRequest("/actuator/health/backend-and-db-smoke-test")
.then((response) => {
const status = JSON.parse(response);
if (status.status === "UP") return setSuccess(true);
setSuccess(false);
})
.catch((e) => {
console.error(e);
setSuccess(false);
});
}, []);

if (success === undefined) return <>Status loading...</>;
if (success) return <> Status returned success :) </>;
return <> Status returned failure :( </>;
};

export default DeploySmokeTest;
3 changes: 3 additions & 0 deletions frontend/src/app/HealthChecks.tsx
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import { Route, Routes } from "react-router-dom";

import DeploySmokeTest from "./DeploySmokeTest";

const HealthChecks = () => (
<Routes>
<Route path="ping" element={<div>pong</div>} />
<Route
path="commit"
element={<div>{process.env.REACT_APP_CURRENT_COMMIT}</div>}
/>
<Route path="deploy-smoke-test" element={<DeploySmokeTest />} />
</Routes>
);

Expand Down
Loading
Loading