diff --git a/.github/mergify.yml b/.github/mergify.yml new file mode 100644 index 0000000..b4bee73 --- /dev/null +++ b/.github/mergify.yml @@ -0,0 +1,20 @@ +queue_rules: + - name: default + conditions: + - "check-success=test / test" + - "check-success=security/snyk (nationalarchives)" +pull_request_rules: + - name: automatic merge for Scala Steward + conditions: + - author=tna-digital-archiving-jenkins + - "check-success=test / test" + - "check-success=security/snyk (nationalarchives)" + - or: + - files=build.sbt + - files~=^(!?project/) + actions: + review: + type: APPROVE + message: Automatically approving Scala Steward + queue: + name: default diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 96c797f..9078f17 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,5 +1,6 @@ name: TDR Tag and pre deploy on: + workflow_dispatch: push: branches: - main diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 7f83057..a60abe4 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -15,6 +15,7 @@ on: description: 'Version to deploy' required: true +run-name: Deploying Draft Metadata Validator Lambda ${{inputs.to-deploy}} to ${{inputs.environment}} permissions: id-token: write contents: write diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 27c85a4..d902b0b 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -1,4 +1,4 @@ -import sbt._ +import sbt.* object Dependencies { diff --git a/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/CSVHandler.scala b/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/CSVHandler.scala index 338b59d..932d7bd 100644 --- a/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/CSVHandler.scala +++ b/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/CSVHandler.scala @@ -23,6 +23,20 @@ class CSVHandler { FileData(allRowsWithHeader, fileRows) } + /** Reads a CSV file into a list of FileRows The FileRow.fileName is the identifier for the row and has been used to store the UUID in above loadCSV def (expecting the UUID to be + * in the last column). What the identifier to be used is to be decided FileRow metadata key(header) unaltered and the value maintained as a string + * @param filePath + * path to csv + * @return + * List of FileRows + */ + def loadCSV(filePath: String): List[FileRow] = { + val reader = CSVReader.open(filePath) + val all: Seq[Map[String, String]] = reader.allWithHeaders() + val fileRows = all.map(row => FileRow(row("UUID"), row.map(columnHeaderValue => Metadata(columnHeaderValue._1, columnHeaderValue._2)).toList)) + fileRows.toList + } + def writeCsv(rows: List[List[String]], filePath: String): Unit = { val bas = new ByteArrayOutputStream() val writer = CSVWriter.open(bas) diff --git a/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/Lambda.scala b/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/Lambda.scala index 2743f22..0ad9805 100644 --- a/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/Lambda.scala +++ b/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/Lambda.scala @@ -1,8 +1,8 @@ package uk.gov.nationalarchives.draftmetadatavalidator import cats.effect.IO -import com.amazonaws.services.lambda.runtime.Context -import com.amazonaws.services.lambda.runtime.events.{APIGatewayProxyRequestEvent, APIGatewayProxyResponseEvent} +import com.amazonaws.services.lambda.runtime.{Context, RequestHandler} +import com.amazonaws.services.lambda.runtime.events.APIGatewayProxyResponseEvent import graphql.codegen.GetCustomMetadata.customMetadata.CustomMetadata import graphql.codegen.GetCustomMetadata.{customMetadata => cm} import graphql.codegen.GetDisplayProperties.displayProperties.DisplayProperties @@ -24,16 +24,18 @@ import uk.gov.nationalarchives.draftmetadatavalidator.ApplicationConfig._ import uk.gov.nationalarchives.draftmetadatavalidator.Lambda.{DraftMetadata, getFilePath} import uk.gov.nationalarchives.tdr.GraphQLClient import uk.gov.nationalarchives.tdr.keycloak.{KeycloakUtils, TdrKeycloakDeployment} -import uk.gov.nationalarchives.tdr.validation.Metadata +import uk.gov.nationalarchives.tdr.validation.{FileRow, Metadata} +import uk.gov.nationalarchives.tdr.validation.schema.MetadataValidationJsonSchema import java.net.URI import java.sql.Timestamp import java.time.LocalDate import java.time.format.DateTimeFormatter +import java.util import java.util.UUID import scala.concurrent.ExecutionContext.Implicits.global -class Lambda { +class Lambda extends RequestHandler[java.util.Map[String, Object], APIGatewayProxyResponseEvent] { implicit val backend: SttpBackend[Identity, Any] = HttpURLConnectionBackend() implicit val keycloakDeployment: TdrKeycloakDeployment = TdrKeycloakDeployment(authUrl, "tdr", timeToLiveSecs) @@ -46,12 +48,11 @@ class Lambda { private val addOrUpdateBulkFileMetadataClient = new GraphQLClient[afm.Data, afm.Variables](apiUrl) private val graphQlApi: GraphQlApi = GraphQlApi(keycloakUtils, customMetadataClient, updateConsignmentStatusClient, addOrUpdateBulkFileMetadataClient, displayPropertiesClient) - def handleRequest(event: APIGatewayProxyRequestEvent, context: Context): APIGatewayProxyResponseEvent = { - val pathParam = event.getPathParameters - + def handleRequest(input: java.util.Map[String, Object], context: Context): APIGatewayProxyResponseEvent = { + val consignmentId = extractConsignmentId(input) val s3Files = S3Files(S3Utils(s3Async(s3Endpoint))) for { - draftMetadata <- IO(DraftMetadata(UUID.fromString(pathParam.get("consignmentId")))) + draftMetadata <- IO(DraftMetadata(UUID.fromString(consignmentId))) _ <- s3Files.downloadFile(bucket, draftMetadata) hasErrors <- validateMetadata(draftMetadata) _ <- if (hasErrors) s3Files.uploadFile(bucket, draftMetadata) else IO.unit @@ -62,17 +63,30 @@ class Lambda { } }.unsafeRunSync()(cats.effect.unsafe.implicits.global) + private def extractConsignmentId(input: util.Map[String, Object]): String = { + val inputParameters = input match { + case stepFunctionInput if stepFunctionInput.containsKey("consignmentId") => stepFunctionInput + case apiProxyRequestInput if apiProxyRequestInput.containsKey("pathParameters") => + apiProxyRequestInput.get("pathParameters").asInstanceOf[util.Map[String, Object]] + } + inputParameters.get("consignmentId").toString + } + private def validateMetadata(draftMetadata: DraftMetadata): IO[Boolean] = { val clientSecret = getClientSecret(clientSecretPath, endpoint) for { customMetadata <- graphQlApi.getCustomMetadata(draftMetadata.consignmentId, clientSecret) displayProperties <- graphQlApi.getDisplayProperties(draftMetadata.consignmentId, clientSecret) - metadataValidator = MetadataValidationUtils.createMetadataValidation(customMetadata) result <- { val csvHandler = new CSVHandler() val filePath = getFilePath(draftMetadata) - val fileData = csvHandler.loadCSV(filePath, getMetadataNames(displayProperties, customMetadata)) - val errors = metadataValidator.validateMetadata(fileData.fileRows) + // Loading CSV twice as validation and writing of CSV currently done using different style + // The important fact is the .fileName that is used to match errors to rows written. + // Currently using last column UUID. If it is decided to use the UUID the 'fileName' attribute + // should be renamed + val fileData: FileData = csvHandler.loadCSV(filePath, getMetadataNames(displayProperties, customMetadata)) + val fileRows: List[FileRow] = csvHandler.loadCSV(filePath) + val errors = MetadataValidationJsonSchema.validate(fileRows) if (errors.values.exists(_.nonEmpty)) { val updatedFileRows = "Error" :: fileData.fileRows.map(file => { errors(file.fileName).map(p => s"${p.propertyName}: ${p.errorCode}").mkString(" | ") diff --git a/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/LambdaRunner.scala b/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/LambdaRunner.scala index 8188394..fc95b81 100644 --- a/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/LambdaRunner.scala +++ b/src/main/scala/uk/gov/nationalarchives/draftmetadatavalidator/LambdaRunner.scala @@ -5,8 +5,6 @@ import com.amazonaws.services.lambda.runtime.events.APIGatewayProxyRequestEvent import scala.jdk.CollectionConverters.MapHasAsJava object LambdaRunner extends App { - val pathParams = Map("consignmentId" -> "f82af3bf-b742-454c-9771-bfd6c5eae749").asJava - val event = new APIGatewayProxyRequestEvent() - event.setPathParameters(pathParams) - new Lambda().handleRequest(event, null) + val input = Map("consignmentId" -> "f82af3bf-b742-454c-9771-bfd6c5eae749".asInstanceOf[Object]).asJava + new Lambda().handleRequest(input, null) } diff --git a/src/test/resources/invalid-sample.csv b/src/test/resources/invalid-sample.csv index bb1ea83..80ccb7a 100644 --- a/src/test/resources/invalid-sample.csv +++ b/src/test/resources/invalid-sample.csv @@ -1,4 +1,4 @@ Filename,Filepath,Date last modified,Closure status,Closure Start Date,Closure Period,FOI exemption code,FOI decision asserted,Is the title sensitive for the public?,Add alternative title without the file extension,Description,Is the description sensitive for the public?,Alternative description,Language,Date of the record,Translated title of record,Former reference,UUID -test3.txt,test/test3.txt,2024-03-26,Closed,,,,,No,,hhhhh,No,,English,,,,a060c57d-1639-4828-9a7a-67a7c64dbf6c -test1.txt,test/test1.txt,2024-03-26,Closed,1990-01-01,12,27(1)|27(2),1990-01-01,Yes,asd,hello,No,,English,,,,cbf2cba5-f1dc-45bd-ae6d-2b042336ce6c +test3.txt,test/test3.txt,12/2/2345,Closed,,,,,No,,hhhhh,No,,English,,,,a060c57d-1639-4828-9a7a-67a7c64dbf6c +test1.txt,test/test1.txt,2024-03-26,Closed,1990-01-01,12,78|27(1)|27(2),1990-01-01,Yes,asd,hello,No,,English,,,,cbf2cba5-f1dc-45bd-ae6d-2b042336ce6c test2.txt,test/test2.txt,2024-03-26,Open,,,,,No,,sfsdfd,No,,English,,,,c4d5e0f1-f6e1-4a77-a7c0-a4317404da00 diff --git a/src/test/scala/uk/gov/nationalarchives/draftmetadatavalidator/CSVHandlerSpec.scala b/src/test/scala/uk/gov/nationalarchives/draftmetadatavalidator/CSVHandlerSpec.scala index a555314..8ce9d7d 100644 --- a/src/test/scala/uk/gov/nationalarchives/draftmetadatavalidator/CSVHandlerSpec.scala +++ b/src/test/scala/uk/gov/nationalarchives/draftmetadatavalidator/CSVHandlerSpec.scala @@ -13,7 +13,7 @@ class CSVHandlerSpec extends AnyFlatSpec with BeforeAndAfterEach { val filePath: String = getClass.getResource("/sample-for-csv-handler.csv").getPath val metadataNames: List[String] = List("ClosureStatus", "ClosurePeriod") - "loadCSV" should "read the file and return FileData with all the rows" in { + "loadCSV with path and metadata names" should "read the file and return FileData with all the rows" in { val csvHandler = new CSVHandler val fileData = csvHandler.loadCSV(filePath, metadataNames) @@ -34,6 +34,49 @@ class CSVHandlerSpec extends AnyFlatSpec with BeforeAndAfterEach { fileData should be(expected) } + "loadCSV with path " should "read the file and return FileRows" in { + val csvHandler = new CSVHandler + val fileRows = csvHandler.loadCSV(filePath) + + val expected = List( + FileRow( + "16b2f65c-ec50-494b-824b-f8c08e6b575c", + List( + Metadata("Closure status", "Closed"), + Metadata("UUID", "16b2f65c-ec50-494b-824b-f8c08e6b575c"), + Metadata("Closure Period", "10"), + Metadata("Filename", "file1.jpg"), + Metadata("Date last modified", "2020-05-29"), + Metadata("Filepath", "aa/file.jpg") + ) + ), + FileRow( + "18449d9b-6a86-40b4-8855-b872a79bebad", + List( + Metadata("Closure status", "Open"), + Metadata("UUID", "18449d9b-6a86-40b4-8855-b872a79bebad"), + Metadata("Closure Period", ""), + Metadata("Filename", "file2.jpg"), + Metadata("Date last modified", "2020-05-29"), + Metadata("Filepath", "aa/file.jpg") + ) + ), + FileRow( + "61b49923-daf7-4140-98f1-58ba6cbed61f", + List( + Metadata("Closure status", "Open"), + Metadata("UUID", "61b49923-daf7-4140-98f1-58ba6cbed61f"), + Metadata("Closure Period", ""), + Metadata("Filename", "file3.jpg"), + Metadata("Date last modified", "2020-05-29"), + Metadata("Filepath", "aa/file.jpg") + ) + ) + ) + + fileRows should be(expected) + } + "writeCsv" should "read the file and return FileData with all the rows" in { val csvHandler = new CSVHandler diff --git a/src/test/scala/uk/gov/nationalarchives/draftmetadatavalidator/LambdaSpec.scala b/src/test/scala/uk/gov/nationalarchives/draftmetadatavalidator/LambdaSpec.scala index 9c3484f..3a10195 100644 --- a/src/test/scala/uk/gov/nationalarchives/draftmetadatavalidator/LambdaSpec.scala +++ b/src/test/scala/uk/gov/nationalarchives/draftmetadatavalidator/LambdaSpec.scala @@ -1,18 +1,18 @@ package uk.gov.nationalarchives.draftmetadatavalidator import com.amazonaws.services.lambda.runtime.Context -import com.amazonaws.services.lambda.runtime.events.APIGatewayProxyRequestEvent import com.github.tomakehurst.wiremock.client.WireMock.{aResponse, get, put, urlEqualTo} -import com.github.tomakehurst.wiremock.stubbing.StubMapping +import com.github.tomakehurst.wiremock.http.RequestMethod +import com.github.tomakehurst.wiremock.stubbing.{ServeEvent, StubMapping} import org.mockito.MockitoSugar.mock import org.scalatest.matchers.should.Matchers.{convertToAnyShouldWrapper, equal} import java.nio.file.{Files, Paths} -import scala.jdk.CollectionConverters.MapHasAsJava +import scala.jdk.CollectionConverters.{CollectionHasAsScala, MapHasAsJava} class LambdaSpec extends ExternalServicesSpec { - val consignmentId = "f82af3bf-b742-454c-9771-bfd6c5eae749" + val consignmentId: Object = "f82af3bf-b742-454c-9771-bfd6c5eae749" val mockContext: Context = mock[Context] def mockS3GetResponse(fileName: String): StubMapping = { @@ -31,21 +31,12 @@ class LambdaSpec extends ExternalServicesSpec { ) } - def createEvent: APIGatewayProxyRequestEvent = { - val pathParams = Map("consignmentId" -> consignmentId).asJava - val event = new APIGatewayProxyRequestEvent() - event.setPathParameters(pathParams) - event - } - "handleRequest" should "download the draft metadata csv file, validate and save to db if it has no errors" in { authOkJson() graphqlOkJson(true) mockS3GetResponse("sample.csv") - val pathParams = Map("consignmentId" -> consignmentId).asJava - val event = new APIGatewayProxyRequestEvent() - event.setPathParameters(pathParams) - val response = new Lambda().handleRequest(createEvent, mockContext) + val input = Map("consignmentId" -> consignmentId).asJava + val response = new Lambda().handleRequest(input, mockContext) response.getStatusCode should equal(200) } @@ -54,10 +45,19 @@ class LambdaSpec extends ExternalServicesSpec { graphqlOkJson() mockS3GetResponse("invalid-sample.csv") mockS3PutResponse() - val pathParams = Map("consignmentId" -> consignmentId).asJava - val event = new APIGatewayProxyRequestEvent() - event.setPathParameters(pathParams) - val response = new Lambda().handleRequest(createEvent, mockContext) + val input = Map("consignmentId" -> consignmentId).asJava + val response = new Lambda().handleRequest(input, mockContext) response.getStatusCode should equal(200) + + val s3Interactions: Iterable[ServeEvent] = wiremockS3.getAllServeEvents.asScala.filter(serveEvent => serveEvent.getRequest.getMethod == RequestMethod.PUT).toList + s3Interactions.size shouldBe 1 + + val csvWriteEvent = s3Interactions.head + val expectedCSVHeader = + "Filename,Filepath,Date last modified,Closure status,Closure Start Date,Closure Period,FOI exemption code,FOI decision asserted,Is the title sensitive for the public?,Add alternative title without the file extension,Description,Is the description sensitive for the public?,Alternative description,Language,Date of the record,Translated title of record,Former reference,UUID,Error" + val expectedCSVRow1 = "test3.txt,test/test3.txt,12/2/2345,Closed,,,,,No,,hhhhh,No,,English,,,,a060c57d-1639-4828-9a7a-67a7c64dbf6c,date_last_modified: format.date" + val csvLines = csvWriteEvent.getRequest.getBodyAsString.split("\\n") + csvLines(0).strip() shouldBe expectedCSVHeader + csvLines(1).strip() shouldBe expectedCSVRow1 } }