Skip to content

Commit

Permalink
Tdrd 215 validate metadata using schema validation library (#73)
Browse files Browse the repository at this point in the history
 use latest tdr-metadata-validation using schema validation
  • Loading branch information
ian-hoyle authored Jun 11, 2024
1 parent 48e646e commit e032012
Show file tree
Hide file tree
Showing 6 changed files with 86 additions and 13 deletions.
4 changes: 2 additions & 2 deletions project/Dependencies.scala
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import sbt._
import sbt.*

object Dependencies {

Expand All @@ -7,7 +7,7 @@ object Dependencies {

lazy val scalaCsv = "com.github.tototoshi" %% "scala-csv" % "1.3.10"
lazy val scalaTest = "org.scalatest" %% "scalatest" % "3.2.15"
lazy val metadataValidation = "uk.gov.nationalarchives" %% "tdr-metadata-validation" % "0.0.21"
lazy val metadataValidation = "uk.gov.nationalarchives" %% "tdr-metadata-validation" % "0.0.27"
lazy val generatedGraphql = "uk.gov.nationalarchives" %% "tdr-generated-graphql" % "0.0.372"
lazy val graphqlClient = "uk.gov.nationalarchives" %% "tdr-graphql-client" % "0.0.144"
lazy val authUtils = "uk.gov.nationalarchives" %% "tdr-auth-utils" % "0.0.187"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,20 @@ class CSVHandler {
FileData(allRowsWithHeader, fileRows)
}

/** Reads a CSV file into a list of FileRows The FileRow.fileName is the identifier for the row and has been used to store the UUID in above loadCSV def (expecting the UUID to be
* in the last column). What the identifier to be used is to be decided FileRow metadata key(header) unaltered and the value maintained as a string
* @param filePath
* path to csv
* @return
* List of FileRows
*/
def loadCSV(filePath: String): List[FileRow] = {
val reader = CSVReader.open(filePath)
val all: Seq[Map[String, String]] = reader.allWithHeaders()
val fileRows = all.map(row => FileRow(row("UUID"), row.map(columnHeaderValue => Metadata(columnHeaderValue._1, columnHeaderValue._2)).toList))
fileRows.toList
}

def writeCsv(rows: List[List[String]], filePath: String): Unit = {
val bas = new ByteArrayOutputStream()
val writer = CSVWriter.open(bas)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ package uk.gov.nationalarchives.draftmetadatavalidator

import cats.effect.IO
import com.amazonaws.services.lambda.runtime.{Context, RequestHandler}
import com.amazonaws.services.lambda.runtime.events.{APIGatewayProxyResponseEvent}
import com.amazonaws.services.lambda.runtime.events.APIGatewayProxyResponseEvent
import graphql.codegen.GetCustomMetadata.customMetadata.CustomMetadata
import graphql.codegen.GetCustomMetadata.{customMetadata => cm}
import graphql.codegen.GetDisplayProperties.displayProperties.DisplayProperties
Expand All @@ -24,7 +24,8 @@ import uk.gov.nationalarchives.draftmetadatavalidator.ApplicationConfig._
import uk.gov.nationalarchives.draftmetadatavalidator.Lambda.{DraftMetadata, getFilePath}
import uk.gov.nationalarchives.tdr.GraphQLClient
import uk.gov.nationalarchives.tdr.keycloak.{KeycloakUtils, TdrKeycloakDeployment}
import uk.gov.nationalarchives.tdr.validation.Metadata
import uk.gov.nationalarchives.tdr.validation.{FileRow, Metadata}
import uk.gov.nationalarchives.tdr.validation.schema.MetadataValidationJsonSchema

import java.net.URI
import java.sql.Timestamp
Expand Down Expand Up @@ -76,12 +77,16 @@ class Lambda extends RequestHandler[java.util.Map[String, Object], APIGatewayPro
for {
customMetadata <- graphQlApi.getCustomMetadata(draftMetadata.consignmentId, clientSecret)
displayProperties <- graphQlApi.getDisplayProperties(draftMetadata.consignmentId, clientSecret)
metadataValidator = MetadataValidationUtils.createMetadataValidation(customMetadata)
result <- {
val csvHandler = new CSVHandler()
val filePath = getFilePath(draftMetadata)
val fileData = csvHandler.loadCSV(filePath, getMetadataNames(displayProperties, customMetadata))
val errors = metadataValidator.validateMetadata(fileData.fileRows)
// Loading CSV twice as validation and writing of CSV currently done using different style
// The important fact is the .fileName that is used to match errors to rows written.
// Currently using last column UUID. If it is decided to use the UUID the 'fileName' attribute
// should be renamed
val fileData: FileData = csvHandler.loadCSV(filePath, getMetadataNames(displayProperties, customMetadata))
val fileRows: List[FileRow] = csvHandler.loadCSV(filePath)
val errors = MetadataValidationJsonSchema.validate(fileRows)
if (errors.values.exists(_.nonEmpty)) {
val updatedFileRows = "Error" :: fileData.fileRows.map(file => {
errors(file.fileName).map(p => s"${p.propertyName}: ${p.errorCode}").mkString(" | ")
Expand Down
4 changes: 2 additions & 2 deletions src/test/resources/invalid-sample.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Filename,Filepath,Date last modified,Closure status,Closure Start Date,Closure Period,FOI exemption code,FOI decision asserted,Is the title sensitive for the public?,Add alternative title without the file extension,Description,Is the description sensitive for the public?,Alternative description,Language,Date of the record,Translated title of record,Former reference,UUID
test3.txt,test/test3.txt,2024-03-26,Closed,,,,,No,,hhhhh,No,,English,,,,a060c57d-1639-4828-9a7a-67a7c64dbf6c
test1.txt,test/test1.txt,2024-03-26,Closed,1990-01-01,12,27(1)|27(2),1990-01-01,Yes,asd,hello,No,,English,,,,cbf2cba5-f1dc-45bd-ae6d-2b042336ce6c
test3.txt,test/test3.txt,12/2/2345,Closed,,,,,No,,hhhhh,No,,English,,,,a060c57d-1639-4828-9a7a-67a7c64dbf6c
test1.txt,test/test1.txt,2024-03-26,Closed,1990-01-01,12,78|27(1)|27(2),1990-01-01,Yes,asd,hello,No,,English,,,,cbf2cba5-f1dc-45bd-ae6d-2b042336ce6c
test2.txt,test/test2.txt,2024-03-26,Open,,,,,No,,sfsdfd,No,,English,,,,c4d5e0f1-f6e1-4a77-a7c0-a4317404da00
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ class CSVHandlerSpec extends AnyFlatSpec with BeforeAndAfterEach {
val filePath: String = getClass.getResource("/sample-for-csv-handler.csv").getPath
val metadataNames: List[String] = List("ClosureStatus", "ClosurePeriod")

"loadCSV" should "read the file and return FileData with all the rows" in {
"loadCSV with path and metadata names" should "read the file and return FileData with all the rows" in {
val csvHandler = new CSVHandler
val fileData = csvHandler.loadCSV(filePath, metadataNames)

Expand All @@ -34,6 +34,49 @@ class CSVHandlerSpec extends AnyFlatSpec with BeforeAndAfterEach {
fileData should be(expected)
}

"loadCSV with path " should "read the file and return FileRows" in {
val csvHandler = new CSVHandler
val fileRows = csvHandler.loadCSV(filePath)

val expected = List(
FileRow(
"16b2f65c-ec50-494b-824b-f8c08e6b575c",
List(
Metadata("Closure status", "Closed"),
Metadata("UUID", "16b2f65c-ec50-494b-824b-f8c08e6b575c"),
Metadata("Closure Period", "10"),
Metadata("Filename", "file1.jpg"),
Metadata("Date last modified", "2020-05-29"),
Metadata("Filepath", "aa/file.jpg")
)
),
FileRow(
"18449d9b-6a86-40b4-8855-b872a79bebad",
List(
Metadata("Closure status", "Open"),
Metadata("UUID", "18449d9b-6a86-40b4-8855-b872a79bebad"),
Metadata("Closure Period", ""),
Metadata("Filename", "file2.jpg"),
Metadata("Date last modified", "2020-05-29"),
Metadata("Filepath", "aa/file.jpg")
)
),
FileRow(
"61b49923-daf7-4140-98f1-58ba6cbed61f",
List(
Metadata("Closure status", "Open"),
Metadata("UUID", "61b49923-daf7-4140-98f1-58ba6cbed61f"),
Metadata("Closure Period", ""),
Metadata("Filename", "file3.jpg"),
Metadata("Date last modified", "2020-05-29"),
Metadata("Filepath", "aa/file.jpg")
)
)
)

fileRows should be(expected)
}

"writeCsv" should "read the file and return FileData with all the rows" in {
val csvHandler = new CSVHandler

Expand Down
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
package uk.gov.nationalarchives.draftmetadatavalidator

import com.amazonaws.services.lambda.runtime.Context
import com.amazonaws.services.lambda.runtime.events.APIGatewayProxyRequestEvent
import com.github.tomakehurst.wiremock.client.WireMock.{aResponse, get, put, urlEqualTo}
import com.github.tomakehurst.wiremock.stubbing.StubMapping
import com.github.tomakehurst.wiremock.http.RequestMethod
import com.github.tomakehurst.wiremock.stubbing.{ServeEvent, StubMapping}
import org.mockito.MockitoSugar.mock
import org.scalatest.matchers.should.Matchers.{convertToAnyShouldWrapper, equal}

import java.nio.file.{Files, Paths}
import scala.jdk.CollectionConverters.MapHasAsJava
import scala.jdk.CollectionConverters.{CollectionHasAsScala, MapHasAsJava}

class LambdaSpec extends ExternalServicesSpec {

Expand Down Expand Up @@ -48,5 +48,16 @@ class LambdaSpec extends ExternalServicesSpec {
val input = Map("consignmentId" -> consignmentId).asJava
val response = new Lambda().handleRequest(input, mockContext)
response.getStatusCode should equal(200)

val s3Interactions: Iterable[ServeEvent] = wiremockS3.getAllServeEvents.asScala.filter(serveEvent => serveEvent.getRequest.getMethod == RequestMethod.PUT).toList
s3Interactions.size shouldBe 1

val csvWriteEvent = s3Interactions.head
val expectedCSVHeader =
"Filename,Filepath,Date last modified,Closure status,Closure Start Date,Closure Period,FOI exemption code,FOI decision asserted,Is the title sensitive for the public?,Add alternative title without the file extension,Description,Is the description sensitive for the public?,Alternative description,Language,Date of the record,Translated title of record,Former reference,UUID,Error"
val expectedCSVRow1 = "test3.txt,test/test3.txt,12/2/2345,Closed,,,,,No,,hhhhh,No,,English,,,,a060c57d-1639-4828-9a7a-67a7c64dbf6c,date_last_modified: format.date"
val csvLines = csvWriteEvent.getRequest.getBodyAsString.split("\\n")
csvLines(0).strip() shouldBe expectedCSVHeader
csvLines(1).strip() shouldBe expectedCSVRow1
}
}

0 comments on commit e032012

Please sign in to comment.