Skip to content

Commit

Permalink
Merge pull request #1 from nationalarchives/TDR-3600_create_draft_met…
Browse files Browse the repository at this point in the history
…adata_validator_lambda

Tdr 3600 create draft metadata validator lambda
  • Loading branch information
vimleshtna authored Feb 23, 2024
2 parents 6af1051 + 442ce6b commit 0aa9634
Show file tree
Hide file tree
Showing 25 changed files with 1,010 additions and 5 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: TDR Tag and pre deploy
on:
push:
branches:
- main
jobs:
pre-deploy:
uses: nationalarchives/tdr-github-actions/.github/workflows/lambda_build.yml@main
with:
repo-name: tdr-draft-metadata-validator
artifact-name: draft-metadata-validator
build-command: |
sbt assembly
secrets:
MANAGEMENT_ACCOUNT: ${{ secrets.MANAGEMENT_ACCOUNT }}
WORKFLOW_PAT: ${{ secrets.WORKFLOW_PAT }}
deploy:
needs: pre-deploy
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- run: gh workflow run deploy.yml -f environment=intg -f to-deploy=${{ needs.pre-deploy.outputs.next-version }}
env:
GITHUB_TOKEN: ${{ secrets.WORKFLOW_PAT }}
33 changes: 33 additions & 0 deletions .github/workflows/deploy.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
name: TDR Deploy Draft Metadata Validator Lambda
on:
workflow_dispatch:
inputs:
environment:
type: choice
description: 'Environment'
required: true
options:
- intg
- staging
- prod
default: 'intg'
to-deploy:
description: 'Version to deploy'
required: true

permissions:
id-token: write
contents: write
jobs:
deploy:
uses: nationalarchives/tdr-github-actions/.github/workflows/lambda_deploy.yml@main
with:
lambda-name: draft-metadata-validator
deployment-package: draft-metadata-validator.jar
environment: ${{ github.event.inputs.environment }}
to-deploy: ${{ github.event.inputs.to-deploy }}
secrets:
ACCOUNT_NUMBER: ${{ secrets.ACCOUNT_NUMBER }}
MANAGEMENT_ACCOUNT: ${{ secrets.MANAGEMENT_ACCOUNT }}
WORKFLOW_PAT: ${{ secrets.WORKFLOW_PAT }}
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
18 changes: 18 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
name: TDR Run Lambda Tests
on:
push:
branches-ignore:
- main
- release-*
permissions:
id-token: write
contents: read
jobs:
test:
uses: nationalarchives/tdr-github-actions/.github/workflows/tdr_test.yml@main
with:
repo-name: tdr-draft-metadata-validator
test-command: |
sbt scalafmtCheckAll test
secrets:
SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK }}
9 changes: 4 additions & 5 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
*.class
*.log

# virtual machine crash logs, see http://www.java.com/en/download/help/error_hotspot.xml
hs_err_pid*
.idea
target
project/target
project/project
4 changes: 4 additions & 0 deletions .scalafmt.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
version = 3.7.17
preset = default
runner.dialect = scala213
maxColumn = 180
35 changes: 35 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import Dependencies._

ThisBuild / scalaVersion := "2.13.10"
ThisBuild / version := "0.1.0-SNAPSHOT"
ThisBuild / organization := "uk.gov.nationalarchives"

lazy val root = (project in file("."))
.settings(
name := "tdr-draft-metadata-validator",
libraryDependencies ++= Seq(
scalaCsv,
typeSafeConfig,
awsLambda,
awsSsm,
metadataValidation,
generatedGraphql,
graphqlClient,
authUtils,
s3Utils,
log4catsSlf4j,
scalaTest % Test,
mockitoScala % Test,
mockitoScalaTest % Test
),
assembly / assemblyJarName := "draft-metadata-validator.jar"
)

(assembly / assemblyMergeStrategy) := {
case PathList("META-INF", xs@_*) => MergeStrategy.discard
case _ => MergeStrategy.first
}

(Test / fork) := true
(Test / javaOptions) += s"-Dconfig.file=${sourceDirectory.value}/test/resources/application.conf"
(Test / envVars) := Map("AWS_ACCESS_KEY_ID" -> "test", "AWS_SECRET_ACCESS_KEY" -> "test")
21 changes: 21 additions & 0 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import sbt._

object Dependencies {

private val log4CatsVersion = "2.6.0"
private val mockitoScalaVersion = "1.17.27"

lazy val scalaCsv = "com.github.tototoshi" %% "scala-csv" % "1.3.10"
lazy val scalaTest = "org.scalatest" %% "scalatest" % "3.2.15"
lazy val metadataValidation = "uk.gov.nationalarchives" %% "tdr-metadata-validation" % "0.0.13"
lazy val generatedGraphql = "uk.gov.nationalarchives" %% "tdr-generated-graphql" % "0.0.357"
lazy val graphqlClient = "uk.gov.nationalarchives" %% "tdr-graphql-client" % "0.0.144"
lazy val authUtils = "uk.gov.nationalarchives" %% "tdr-auth-utils" % "0.0.187"
lazy val typeSafeConfig = "com.typesafe" % "config" % "1.4.3"
lazy val awsLambda = "com.amazonaws" % "aws-lambda-java-core" % "1.2.3"
lazy val awsSsm = "software.amazon.awssdk" % "ssm" % "2.23.17"
lazy val s3Utils = "uk.gov.nationalarchives" %% "s3-utils" % "0.1.105"
lazy val log4catsSlf4j = "org.typelevel" %% "log4cats-slf4j" % log4CatsVersion
lazy val mockitoScala = "org.mockito" %% "mockito-scala" % mockitoScalaVersion
lazy val mockitoScalaTest = "org.mockito" %% "mockito-scala-scalatest" % mockitoScalaVersion
}
1 change: 1 addition & 0 deletions project/build.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sbt.version=1.9.7
2 changes: 2 additions & 0 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
addSbtPlugin("com.eed3si9n" %% "sbt-assembly" % "2.1.5")
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.5.2")
24 changes: 24 additions & 0 deletions src/main/resources/application.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
api {
url = "https://api.tdr-integration.nationalarchives.gov.uk/graphql"
url = ${?API_URL}
}
auth {
url = "https://auth.tdr-integration.nationalarchives.gov.uk/"
url = ${?AUTH_URL}
clientId = "tdr-backend-checks"
clientSecretPath = "/intg/keycloak/backend_checks_client/secret"
clientSecretPath = ${?CLIENT_SECRET_PATH}
realm = "tdr"
}
ssm {
endpoint = "https://ssm.eu-west-2.amazonaws.com"
}
s3 {
draftMetadataBucket = "tdr-draft-metadata-intg"
draftMetadataBucket = ${?BUCKET_NAME}
endpoint = "https://s3.eu-west-2.amazonaws.com/"
}
draftMetadata {
fileName = "draft-metadata.csv"
}
root.directory = "/tmp"
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package uk.gov.nationalarchives.draftmetadatavalidator

import com.typesafe.config.{ConfigFactory, Config => TypeSafeConfig}

object ApplicationConfig {

val configFactory: TypeSafeConfig = ConfigFactory.load
val authUrl: String = configFactory.getString("auth.url")
val apiUrl: String = configFactory.getString("api.url")
val clientSecretPath: String = configFactory.getString("auth.clientSecretPath")
val clientId: String = configFactory.getString("auth.clientId")
val endpoint: String = configFactory.getString("ssm.endpoint")
val s3Endpoint: String = configFactory.getString("s3.endpoint")
val bucket: String = configFactory.getString("s3.draftMetadataBucket")
val rootDirectory: String = configFactory.getString("root.directory")
val fileName: String = configFactory.getString("draftMetadata.fileName")
val timeToLiveSecs: Int = 60
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package uk.gov.nationalarchives.draftmetadatavalidator

import com.github.tototoshi.csv.{CSVReader, CSVWriter}
import uk.gov.nationalarchives.tdr.validation.{FileRow, Metadata}

import java.io.ByteArrayOutputStream
import java.nio.file.{Files, Paths}

class CSVHandler {

def loadCSV(filePath: String, metadataNames: List[String]): FileData = {
val reader = CSVReader.open(filePath)
val allRowsWithHeader = reader.all()
val fileRows = allRowsWithHeader match {
case _ :: rows =>
rows.map { case fileName :: data =>
FileRow(
fileName,
metadataNames.zipWithIndex.map { case (name, index) => Metadata(name, data(index)) }
)
}
}
FileData(allRowsWithHeader.head, fileRows)
}

def writeCsv(rows: List[List[String]], filePath: String): Unit = {
val bas = new ByteArrayOutputStream()
val writer = CSVWriter.open(bas)
writer.writeAll(rows)
Files.writeString(Paths.get(filePath), bas.toString("UTF-8"))
}
}

case class FileData(header: List[String], fileRows: List[FileRow])
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package uk.gov.nationalarchives.draftmetadatavalidator

import cats.effect.IO
import cats.implicits.catsSyntaxOptionId
import com.typesafe.scalalogging.Logger
import graphql.codegen.GetCustomMetadata.{customMetadata => cm}
import graphql.codegen.GetDisplayProperties.{displayProperties => dp}
import sttp.client3._
import uk.gov.nationalarchives.draftmetadatavalidator.ApplicationConfig.clientId
import uk.gov.nationalarchives.tdr.GraphQLClient
import uk.gov.nationalarchives.tdr.keycloak.{KeycloakUtils, TdrKeycloakDeployment}

import java.util.UUID
import scala.concurrent.{ExecutionContext, Future}

class GraphQlApi(keycloak: KeycloakUtils, customMetadataClient: GraphQLClient[cm.Data, cm.Variables], displayPropertiesClient: GraphQLClient[dp.Data, dp.Variables])(implicit
logger: Logger,
keycloakDeployment: TdrKeycloakDeployment,
backend: SttpBackend[Identity, Any]
) {

def getCustomMetadata(consignmentId: UUID, clientSecret: String)(implicit executionContext: ExecutionContext): IO[List[cm.CustomMetadata]] = for {
token <- keycloak.serviceAccountToken(clientId, clientSecret).toIO
metadata <- customMetadataClient.getResult(token, cm.document, cm.Variables(consignmentId).some).toIO
data <- IO.fromOption(metadata.data)(new RuntimeException("No custom metadata definitions found"))
} yield data.customMetadata

def getDisplayProperties(consignmentId: UUID, clientSecret: String)(implicit executionContext: ExecutionContext): IO[List[dp.DisplayProperties]] = for {
token <- keycloak.serviceAccountToken(clientId, clientSecret).toIO
metadata <- displayPropertiesClient.getResult(token, dp.document, dp.Variables(consignmentId).some).toIO
data <- IO.fromOption(metadata.data)(new RuntimeException("No display properties definitions found"))
} yield data.displayProperties

implicit class FutureUtils[T](f: Future[T]) {
def toIO: IO[T] = IO.fromFuture(IO(f))
}
}

object GraphQlApi {
def apply(keycloak: KeycloakUtils, customMetadataClient: GraphQLClient[cm.Data, cm.Variables], displayPropertiesClient: GraphQLClient[dp.Data, dp.Variables])(implicit
backend: SttpBackend[Identity, Any],
keycloakDeployment: TdrKeycloakDeployment
): GraphQlApi = {
val logger: Logger = Logger[GraphQlApi]
new GraphQlApi(keycloak, customMetadataClient, displayPropertiesClient)(logger, keycloakDeployment, backend)
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
package uk.gov.nationalarchives.draftmetadatavalidator

import cats.effect.IO
import graphql.codegen.GetCustomMetadata.customMetadata.CustomMetadata
import graphql.codegen.GetCustomMetadata.{customMetadata => cm}
import graphql.codegen.GetDisplayProperties.displayProperties.DisplayProperties
import graphql.codegen.GetDisplayProperties.{displayProperties => dp}
import io.circe.generic.auto._
import io.circe.parser.decode
import org.typelevel.log4cats.SelfAwareStructuredLogger
import org.typelevel.log4cats.slf4j.Slf4jLogger
import software.amazon.awssdk.http.apache.ApacheHttpClient
import software.amazon.awssdk.regions.Region
import software.amazon.awssdk.services.ssm.SsmClient
import software.amazon.awssdk.services.ssm.model.GetParameterRequest
import sttp.client3.{HttpURLConnectionBackend, Identity, SttpBackend}
import uk.gov.nationalarchives.aws.utils.s3.S3Clients._
import uk.gov.nationalarchives.aws.utils.s3.S3Utils
import uk.gov.nationalarchives.draftmetadatavalidator.ApplicationConfig._
import uk.gov.nationalarchives.draftmetadatavalidator.Lambda.{DraftMetadata, getFilePath}
import uk.gov.nationalarchives.tdr.GraphQLClient
import uk.gov.nationalarchives.tdr.keycloak.{KeycloakUtils, TdrKeycloakDeployment}

import java.io.{InputStream, OutputStream}
import java.net.URI
import java.util.UUID
import scala.concurrent.ExecutionContext.Implicits.global
import scala.io.Source

class Lambda {

implicit val backend: SttpBackend[Identity, Any] = HttpURLConnectionBackend()
implicit val keycloakDeployment: TdrKeycloakDeployment = TdrKeycloakDeployment(authUrl, "tdr", timeToLiveSecs)
implicit def logger: SelfAwareStructuredLogger[IO] = Slf4jLogger.getLogger[IO]

val keycloakUtils = new KeycloakUtils()
val customMetadataClient = new GraphQLClient[cm.Data, cm.Variables](apiUrl)
val displayPropertiesClient = new GraphQLClient[dp.Data, dp.Variables](apiUrl)
val graphQlApi: GraphQlApi = GraphQlApi(keycloakUtils, customMetadataClient, displayPropertiesClient)

def handleRequest(input: InputStream, output: OutputStream): Unit = {
val body: String = Source.fromInputStream(input).mkString
val s3Files = S3Files(S3Utils(s3Async(s3Endpoint)))

for {
draftMetadata <- IO.fromEither(decode[DraftMetadata](body))
_ <- s3Files.downloadFile(bucket, draftMetadata)
hasErrors <- validateMetadata(draftMetadata)
_ <- if (hasErrors) s3Files.uploadFile(bucket, draftMetadata) else IO.unit
} yield ()
}.unsafeRunSync()(cats.effect.unsafe.implicits.global)

private def validateMetadata(draftMetadata: DraftMetadata): IO[Boolean] = {
for {
customMetadata <- graphQlApi.getCustomMetadata(draftMetadata.consignmentId, getClientSecret(clientSecretPath, endpoint))
displayProperties <- graphQlApi.getDisplayProperties(draftMetadata.consignmentId, getClientSecret(clientSecretPath, endpoint))
metadataValidator = MetadataValidationUtils.createMetadataValidation(customMetadata)
} yield {
val csvHandler = new CSVHandler()
val filePath = getFilePath(draftMetadata)
val fileData = csvHandler.loadCSV(filePath, getMetadataNames(displayProperties, customMetadata))
val errors = metadataValidator.validateMetadata(fileData.fileRows)
if (errors.values.flatten.isEmpty) {
// This would be where the valid metadata would be saved to the DB
false
} else {
val updatedFileRows = fileData.fileRows.map(file => {
List(file.fileName) ++ file.metadata.map(_.value) ++ List(errors(file.fileName).map(p => s"${p.propertyName}: ${p.errorCode}").mkString(" | "))
})
csvHandler.writeCsv((fileData.header :+ "Error") :: updatedFileRows, filePath)
true
}
}
}

private def getClientSecret(secretPath: String, endpoint: String): String = {
val httpClient = ApacheHttpClient.builder.build
val ssmClient: SsmClient = SsmClient
.builder()
.endpointOverride(URI.create(endpoint))
.httpClient(httpClient)
.region(Region.EU_WEST_2)
.build()
val getParameterRequest = GetParameterRequest.builder.name(secretPath).withDecryption(true).build
ssmClient.getParameter(getParameterRequest).parameter().value()
}

private def getMetadataNames(displayProperties: List[DisplayProperties], customMetadata: List[CustomMetadata]): List[String] = {
val nameMap = displayProperties.filter(dp => dp.attributes.find(_.attribute == "Active").getBoolean).map(_.propertyName)
val filteredMetadata: List[CustomMetadata] = customMetadata.filter(cm => nameMap.contains(cm.name) && cm.allowExport).sortBy(_.exportOrdinal.getOrElse(Int.MaxValue))
filteredMetadata.map(_.name)
}

implicit class AttributeHelper(attribute: Option[DisplayProperties.Attributes]) {
def getStringValue: String = {
attribute match {
case Some(a) => a.value.getOrElse("")
case _ => ""
}
}

def getBoolean: Boolean = {
attribute match {
case Some(a) => a.value.contains("true")
case _ => false
}
}
}

}

object Lambda {
case class DraftMetadata(consignmentId: UUID)
def getFilePath(draftMetadata: DraftMetadata) = s"""${rootDirectory}/${draftMetadata.consignmentId}/$fileName"""
}
Loading

0 comments on commit 0aa9634

Please sign in to comment.