From 729d8115135477b8eb48e1dd0a6a006b9fee62cf Mon Sep 17 00:00:00 2001 From: David An Date: Tue, 10 Sep 2024 16:22:03 -0400 Subject: [PATCH 01/24] beginning of POC --- .../rawls/dataaccess/slick/DataAccess.scala | 1 + .../slick/JsonEntityComponent.scala | 103 +++++++++++++++++ .../dsde/rawls/entities/EntityManager.scala | 15 ++- .../entities/json/JsonEntityProvider.scala | 104 ++++++++++++++++++ .../json/JsonEntityProviderBuilder.scala | 31 ++++++ 5 files changed, 249 insertions(+), 5 deletions(-) create mode 100644 core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala create mode 100644 core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala create mode 100644 core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProviderBuilder.scala diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/DataAccess.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/DataAccess.scala index 264f446d5d..5d99e9a9d0 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/DataAccess.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/DataAccess.scala @@ -13,6 +13,7 @@ trait DataAccess with RawlsBillingProjectComponent with WorkspaceComponent with EntityComponent + with JsonEntityComponent with AttributeComponent with MethodConfigurationComponent with SubmissionComponent diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala new file mode 100644 index 0000000000..c1401e68ec --- /dev/null +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala @@ -0,0 +1,103 @@ +package org.broadinstitute.dsde.rawls.dataaccess.slick + +import org.broadinstitute.dsde.rawls.RawlsException +import org.broadinstitute.dsde.rawls.model.WorkspaceJsonSupport._ +import org.broadinstitute.dsde.rawls.model._ +import slick.jdbc._ +import spray.json.DefaultJsonProtocol._ +import spray.json._ + +import java.sql.Timestamp +import java.util.UUID +import scala.language.postfixOps + +/** + * model class for rows in the ENTITY table + */ +// TODO AJ-2008: handle the all_attribute_values column +case class JsonEntityRecord(id: Long, + name: String, + entityType: String, + workspaceId: UUID, + recordVersion: Long, + deleted: Boolean, + deletedDate: Option[Timestamp], + attributes: JsValue +) + +/** + * companion object for constants, etc. + */ +object JsonEntityComponent { + // the length of the all_attribute_values column, which is TEXT, minus a few bytes because i'm nervous + val allAttributeValuesColumnSize = 65532 +} + +/** + * Slick component for reading/writing JSON-based entities + */ +trait JsonEntityComponent { + this: DriverComponent => + + import slick.jdbc.MySQLProfile.api._ + + // json codec for entity attributes + implicit val attributeFormat: AttributeFormat = new AttributeFormat with PlainArrayAttributeListSerializer + + /** + * SQL queries for working with the ENTITY table + */ + object jsonEntityQuery extends RawSqlQuery { + val driver: JdbcProfile = JsonEntityComponent.this.driver + + // read a json column from the db and translate into a JsValue + implicit val GetJsValueResult: GetResult[JsValue] = GetResult(r => r.nextString().parseJson) + + // write a JsValue to the database by converting it to a string (the db column is still JSON) + implicit object SetJsValueParameter extends SetParameter[JsValue] { + def apply(v: JsValue, pp: PositionedParameters): Unit = + pp.setString(v.compactPrint) + } + + // select id, name, entity_type, workspace_id, record_version, deleted, deleted_date, attributes + // into a JsonEntityRecord + implicit val getJsonEntityRecord: GetResult[JsonEntityRecord] = + GetResult(r => JsonEntityRecord(r.<<, r.<<, r.<<, r.<<, r.<<, r.<<, r.<<, r.<<)) + + /** + * Insert a single entity to the db + */ + def createEntity(workspaceId: UUID, entity: Entity): ReadWriteAction[JsonEntityRecord] = { + val attributesJson: JsValue = entity.attributes.toJson + + // create insert statement + val insertStatement = + sqlu"""insert into ENTITY(name, entity_type, workspace_id, record_version, deleted, attributes) + values (${entity.name}, ${entity.entityType}, $workspaceId, 0, 0, $attributesJson)""" + + // execute insert statement + insertStatement flatMap { _ => + // return the actually-saved entity + getEntity(workspaceId, entity.entityType, entity.name) + } + } + + /** + * Read a single entity from the db + */ + def getEntity(workspaceId: UUID, entityType: String, entityName: String): ReadAction[JsonEntityRecord] = { + val selectStatement: SQLActionBuilder = + sql"""select id, name, entity_type, workspace_id, record_version, deleted, deleted_date, attributes + from ENTITY where workspace_id = $workspaceId and entity_type = $entityType and name = $entityName""" + + // execute select statement + selectStatement.as[JsonEntityRecord].map { + case Seq() => throw new RawlsException(s"Expected at least one result") + case Seq(one) => one + case tooMany => throw new RawlsException(s"Expected 1 result but found ${tooMany.size}") + } + } + + } + +} diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/EntityManager.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/EntityManager.scala index 14cc72b0eb..f1bc4588d6 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/EntityManager.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/EntityManager.scala @@ -14,6 +14,7 @@ import org.broadinstitute.dsde.rawls.dataaccess.{ import org.broadinstitute.dsde.rawls.entities.base.{EntityProvider, EntityProviderBuilder} import org.broadinstitute.dsde.rawls.entities.datarepo.{DataRepoEntityProvider, DataRepoEntityProviderBuilder} import org.broadinstitute.dsde.rawls.entities.exceptions.DataEntityException +import org.broadinstitute.dsde.rawls.entities.json.{JsonEntityProvider, JsonEntityProviderBuilder} import org.broadinstitute.dsde.rawls.entities.local.{LocalEntityProvider, LocalEntityProviderBuilder} import org.broadinstitute.dsde.rawls.model.{ErrorReport, WorkspaceType} @@ -57,10 +58,12 @@ class EntityManager(providerBuilders: Set[EntityProviderBuilder[_ <: EntityProvi // soon: look up the reference name to ensure it exists. // for now, this simplistic logic illustrates the approach: choose the right builder for the job. - val targetTag = if (requestArguments.dataReference.isDefined) { - typeTag[DataRepoEntityProvider] - } else { - typeTag[LocalEntityProvider] + + // TODO AJ-2008: this is a temporary hack to get JsonEntityProvider working + val targetTag = (requestArguments.dataReference, requestArguments.workspace) match { + case (Some(_), _) => typeTag[DataRepoEntityProvider] + case (_, x) if x.name.contains("AJ-2008") => typeTag[JsonEntityProvider] + case _ => typeTag[LocalEntityProvider] } providerBuilders.find(_.builds == targetTag) match { @@ -110,6 +113,8 @@ object EntityManager { config ) // implicit executionContext - new EntityManager(Set(defaultEntityProviderBuilder, dataRepoEntityProviderBuilder)) + val jsonEntityProviderBuilder = new JsonEntityProviderBuilder(dataSource, cacheEnabled, queryTimeout, metricsPrefix) + + new EntityManager(Set(defaultEntityProviderBuilder, dataRepoEntityProviderBuilder, jsonEntityProviderBuilder)) } } diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala new file mode 100644 index 0000000000..8c00f84a94 --- /dev/null +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -0,0 +1,104 @@ +package org.broadinstitute.dsde.rawls.entities.json + +import akka.stream.scaladsl.Source +import com.typesafe.scalalogging.LazyLogging +import org.broadinstitute.dsde.rawls.dataaccess.SlickDataSource +import org.broadinstitute.dsde.rawls.entities.EntityRequestArguments +import org.broadinstitute.dsde.rawls.entities.base.ExpressionEvaluationSupport.LookupExpression +import org.broadinstitute.dsde.rawls.entities.base.{EntityProvider, ExpressionEvaluationContext, ExpressionValidator} +import org.broadinstitute.dsde.rawls.jobexec.MethodConfigResolver +import org.broadinstitute.dsde.rawls.model.Attributable.AttributeMap +import org.broadinstitute.dsde.rawls.model.{ + AttributeEntityReference, + AttributeUpdateOperations, + AttributeValue, + Entity, + EntityCopyResponse, + EntityQuery, + EntityQueryResponse, + EntityQueryResultMetadata, + EntityTypeMetadata, + RawlsRequestContext, + SubmissionValidationEntityInputs, + Workspace +} + +import spray.json._ +import DefaultJsonProtocol._ +import org.broadinstitute.dsde.rawls.model.WorkspaceJsonSupport._ + +import java.time.Duration +import scala.concurrent.{ExecutionContext, Future} +import scala.util.Try + +class JsonEntityProvider(requestArguments: EntityRequestArguments, + implicit protected val dataSource: SlickDataSource, + cacheEnabled: Boolean, + queryTimeout: Duration, + val workbenchMetricBaseName: String +)(implicit protected val executionContext: ExecutionContext) + extends EntityProvider + with LazyLogging { + + override def entityStoreId: Option[String] = None + + /** + * Insert a single entity to the db + */ + override def createEntity(entity: Entity): Future[Entity] = + dataSource.inTransaction { dataAccess => + dataAccess.jsonEntityQuery.createEntity(requestArguments.workspace.workspaceIdAsUUID, entity) + } map { jsonEntityRecord => + Entity(jsonEntityRecord.name, jsonEntityRecord.entityType, jsonEntityRecord.attributes.convertTo[AttributeMap]) + } + + /** + * Read a single entity from the db + */ + override def getEntity(entityType: String, entityName: String): Future[Entity] = dataSource.inTransaction { + dataAccess => + dataAccess.jsonEntityQuery.getEntity(requestArguments.workspace.workspaceIdAsUUID, entityType, entityName) + } map { jsonEntityRecord => + Entity(jsonEntityRecord.name, jsonEntityRecord.entityType, jsonEntityRecord.attributes.convertTo[AttributeMap]) + } + + override def deleteEntities(entityRefs: Seq[AttributeEntityReference]): Future[Int] = ??? + + override def entityTypeMetadata(useCache: Boolean): Future[Map[String, EntityTypeMetadata]] = ??? + + override def queryEntitiesSource(entityType: String, + query: EntityQuery, + parentContext: RawlsRequestContext + ): Future[(EntityQueryResultMetadata, Source[Entity, _])] = ??? + + override def queryEntities(entityType: String, + query: EntityQuery, + parentContext: RawlsRequestContext + ): Future[EntityQueryResponse] = ??? + + override def batchUpdateEntities( + entityUpdates: Seq[AttributeUpdateOperations.EntityUpdateDefinition] + ): Future[Traversable[Entity]] = ??? + + override def batchUpsertEntities( + entityUpdates: Seq[AttributeUpdateOperations.EntityUpdateDefinition] + ): Future[Traversable[Entity]] = ??? + + override def copyEntities(sourceWorkspaceContext: Workspace, + destWorkspaceContext: Workspace, + entityType: String, + entityNames: Seq[String], + linkExistingEntities: Boolean, + parentContext: RawlsRequestContext + ): Future[EntityCopyResponse] = ??? + + override def deleteEntitiesOfType(entityType: String): Future[Int] = ??? + + override def evaluateExpressions(expressionEvaluationContext: ExpressionEvaluationContext, + gatherInputsResult: MethodConfigResolver.GatherInputsResult, + workspaceExpressionResults: Map[LookupExpression, Try[Iterable[AttributeValue]]] + ): Future[LazyList[SubmissionValidationEntityInputs]] = ??? + + override def expressionValidator: ExpressionValidator = ??? + +} diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProviderBuilder.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProviderBuilder.scala new file mode 100644 index 0000000000..5594d6a8bb --- /dev/null +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProviderBuilder.scala @@ -0,0 +1,31 @@ +package org.broadinstitute.dsde.rawls.entities.json + +import org.broadinstitute.dsde.rawls.dataaccess.SlickDataSource +import org.broadinstitute.dsde.rawls.entities.EntityRequestArguments +import org.broadinstitute.dsde.rawls.entities.base.EntityProviderBuilder +import org.broadinstitute.dsde.rawls.entities.local.LocalEntityProvider + +import java.time.Duration +import scala.concurrent.ExecutionContext +import scala.reflect.runtime.universe +import scala.reflect.runtime.universe.typeTag +import scala.util.{Success, Try} + +class JsonEntityProviderBuilder(dataSource: SlickDataSource, + cacheEnabled: Boolean, + queryTimeout: Duration, + metricsPrefix: String +)(implicit + protected val executionContext: ExecutionContext +) extends EntityProviderBuilder[JsonEntityProvider] { + + /** declares the type of EntityProvider this builder will build. + */ + override def builds: universe.TypeTag[JsonEntityProvider] = typeTag[JsonEntityProvider] + + /** create the EntityProvider this builder knows how to create. + */ + override def build(requestArguments: EntityRequestArguments): Try[JsonEntityProvider] = Success( + new JsonEntityProvider(requestArguments, dataSource, cacheEnabled, queryTimeout, metricsPrefix) + ) +} From 9536d3454a44edd03db77882447e25ad723842ef Mon Sep 17 00:00:00 2001 From: David An Date: Tue, 10 Sep 2024 17:31:55 -0400 Subject: [PATCH 02/24] liquibase for schema changes (no references yet) --- .../dsde/rawls/liquibase/changelog.xml | 1 + .../20240910_entity_json_support.xml | 83 +++++++++++++++++++ 2 files changed, 84 insertions(+) create mode 100644 core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml diff --git a/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changelog.xml b/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changelog.xml index 37961a2d26..d37ea4e74d 100644 --- a/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changelog.xml +++ b/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changelog.xml @@ -125,4 +125,5 @@ + diff --git a/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml b/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml new file mode 100644 index 0000000000..a8f3ed6cf0 --- /dev/null +++ b/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml @@ -0,0 +1,83 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DROP TRIGGER IF EXISTS after_entity_insert ~ + DROP TRIGGER IF EXISTS after_entity_update ~ + + CREATE TRIGGER after_entity_insert AFTER INSERT ON ENTITY + FOR EACH ROW + INSERT INTO ENTITY_KEYS + (id, workspace_id, entity_type, attribute_keys, last_updated) + VALUES + (new.id, new.workspace_id, new.entity_type, JSON_KEYS(new.attributes), now(3)); ~ + + CREATE TRIGGER after_entity_update AFTER UPDATE ON ENTITY + FOR EACH ROW + BEGIN + -- is this row soft-deleted? + if new.deleted = 1 then + DELETE FROM ENTITY_KEYS WHERE id = new.id; + else + -- compare old keys to new keys; update the ENTITY_KEYS table only if they are different + if JSON_KEYS(new.attributes) != JSON_KEYS(old.attributes) then + UPDATE ENTITY_KEYS SET attribute_keys=JSON_KEYS(new.attributes), last_updated=now(3) + WHERE id = new.id; + end if; + end if; + END ~ + + + DROP TRIGGER IF EXISTS after_entity_insert; + DROP TRIGGER IF EXISTS after_entity_update; + + + + + + + From 761bd55a7b367c63e996aef8abab778285f9aeb9 Mon Sep 17 00:00:00 2001 From: David An Date: Wed, 11 Sep 2024 09:30:11 -0400 Subject: [PATCH 03/24] streamline triggers --- .../changesets/20240910_entity_json_support.xml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml b/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml index a8f3ed6cf0..3652980399 100644 --- a/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml +++ b/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml @@ -60,12 +60,14 @@ FOR EACH ROW BEGIN -- is this row soft-deleted? - if new.deleted = 1 then + if old.deleted = 0 and new.deleted = 1 then DELETE FROM ENTITY_KEYS WHERE id = new.id; - else + elseif new.attributes is not null then -- compare old keys to new keys; update the ENTITY_KEYS table only if they are different - if JSON_KEYS(new.attributes) != JSON_KEYS(old.attributes) then - UPDATE ENTITY_KEYS SET attribute_keys=JSON_KEYS(new.attributes), last_updated=now(3) + set @new_keys := JSON_KEYS(new.attributes); + set @old_keys := JSON_KEYS(old.attributes); + if JSON_LENGTH(@new_keys) != JSON_LENGTH(@old_keys) OR JSON_CONTAINS(@new_keys, @old_keys) = 0 then + UPDATE ENTITY_KEYS SET attribute_keys=@new_keys, last_updated=now(3) WHERE id = new.id; end if; end if; From 262a6e6d9c8f678a33106ebd836fe25d3ba0512d Mon Sep 17 00:00:00 2001 From: David An Date: Wed, 11 Sep 2024 09:36:07 -0400 Subject: [PATCH 04/24] add TODOs --- .../dsde/rawls/dataaccess/slick/JsonEntityComponent.scala | 3 +++ .../dsde/rawls/entities/EntityService.scala | 8 ++++++++ .../dsde/rawls/entities/json/JsonEntityProvider.scala | 1 + 3 files changed, 12 insertions(+) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala index c1401e68ec..ebec5246d7 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala @@ -67,6 +67,7 @@ trait JsonEntityComponent { /** * Insert a single entity to the db */ + // TODO AJ-2008: return Entity instead of JsonEntityRecord? def createEntity(workspaceId: UUID, entity: Entity): ReadWriteAction[JsonEntityRecord] = { val attributesJson: JsValue = entity.attributes.toJson @@ -78,6 +79,7 @@ trait JsonEntityComponent { // execute insert statement insertStatement flatMap { _ => // return the actually-saved entity + // TODO AJ-2008: move this logic up to JsonEntityProvider getEntity(workspaceId, entity.entityType, entity.name) } } @@ -85,6 +87,7 @@ trait JsonEntityComponent { /** * Read a single entity from the db */ + // TODO AJ-2008: return Entity instead of JsonEntityRecord? def getEntity(workspaceId: UUID, entityType: String, entityName: String): ReadAction[JsonEntityRecord] = { val selectStatement: SQLActionBuilder = sql"""select id, name, entity_type, workspace_id, record_version, deleted, deleted_date, attributes diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/EntityService.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/EntityService.scala index 2528404079..3d8d69ba72 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/EntityService.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/EntityService.scala @@ -105,6 +105,7 @@ class EntityService(protected val ctx: RawlsRequestContext, .recover(bigQueryRecover) } + // TODO AJ-2008: move to EntityProvider def updateEntity(workspaceName: WorkspaceName, entityType: String, entityName: String, @@ -199,6 +200,7 @@ class EntityService(protected val ctx: RawlsRequestContext, .recover(bigQueryRecover) } + // TODO AJ-2008: move to EntityProvider def deleteEntityAttributes(workspaceName: WorkspaceName, entityType: String, attributeNames: Set[AttributeName] @@ -220,6 +222,7 @@ class EntityService(protected val ctx: RawlsRequestContext, } } + // TODO AJ-2008: move to EntityProvider def renameEntity(workspaceName: WorkspaceName, entityType: String, entityName: String, newName: String): Future[Int] = getV2WorkspaceContextAndPermissions(workspaceName, SamWorkspaceActions.write, @@ -239,6 +242,7 @@ class EntityService(protected val ctx: RawlsRequestContext, } } + // TODO AJ-2008: move to EntityProvider def renameEntityType(workspaceName: WorkspaceName, oldName: String, renameInfo: EntityTypeRename): Future[Int] = { import org.broadinstitute.dsde.rawls.dataaccess.slick.{DataAccess, ReadAction} @@ -289,6 +293,7 @@ class EntityService(protected val ctx: RawlsRequestContext, } } + // TODO AJ-2008: move to EntityProvider def evaluateExpression(workspaceName: WorkspaceName, entityType: String, entityName: String, @@ -374,6 +379,7 @@ class EntityService(protected val ctx: RawlsRequestContext, Source.fromPublisher(dataSource.database.stream(allAttrsStream)) } + // TODO AJ-2008: move to EntityProvider def listEntities(workspaceName: WorkspaceName, entityType: String) = getWorkspaceContextAndPermissions(workspaceName, SamWorkspaceActions.read, @@ -410,6 +416,7 @@ class EntityService(protected val ctx: RawlsRequestContext, } } + // TODO AJ-2008: move to EntityProvider def copyEntities(entityCopyDef: EntityCopyDefinition, linkExistingEntities: Boolean): Future[EntityCopyResponse] = for { destWsCtx <- getV2WorkspaceContextAndPermissions(entityCopyDef.destinationWorkspace, @@ -472,6 +479,7 @@ class EntityService(protected val ctx: RawlsRequestContext, ): Future[Traversable[Entity]] = batchUpdateEntitiesInternal(workspaceName, entityUpdates, upsert = true, dataReference, billingProject) + // TODO AJ-2008: move to EntityProvider def renameAttribute(workspaceName: WorkspaceName, entityType: String, oldAttributeName: AttributeName, diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index 8c00f84a94..8b80911991 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -31,6 +31,7 @@ import java.time.Duration import scala.concurrent.{ExecutionContext, Future} import scala.util.Try +// TODO AJ-2008: tracing class JsonEntityProvider(requestArguments: EntityRequestArguments, implicit protected val dataSource: SlickDataSource, cacheEnabled: Boolean, From 24659948da95646b0020c8748341a679766d2d1e Mon Sep 17 00:00:00 2001 From: David An Date: Wed, 11 Sep 2024 10:21:34 -0400 Subject: [PATCH 05/24] entityTypeMetadata --- .../slick/JsonEntityComponent.scala | 18 +++++++++++ .../entities/json/JsonEntityProvider.scala | 31 +++++++++++++++++-- 2 files changed, 46 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala index ebec5246d7..4d4bb512d8 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala @@ -101,6 +101,24 @@ trait JsonEntityComponent { } } + /** + * All entity types for the given workspace, with their counts of active entities + */ + def typesAndCounts(workspaceId: UUID): ReadAction[Seq[(String, Int)]] = + sql"""select entity_type, count(1) from ENTITY where workspace_id = $workspaceId and deleted = 0 group by entity_type""" + .as[(String, Int)] + + /** + * All attribute names for the given workspace, paired to their entity type + * The ENTITY_KEYS table is automatically populated via triggers on the ENTITY table; see the db + * to understand those triggers. + */ + def typesAndAttributes(workspaceId: UUID): ReadAction[Seq[(String, String)]] = + sql"""SELECT DISTINCT entity_type, json_key FROM ENTITY_KEYS, + JSON_TABLE(attribute_keys, '$$[*]' COLUMNS(json_key VARCHAR(256) PATH '$$')) t + where workspace_id = $workspaceId;""" + .as[(String, String)] + } } diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index 8b80911991..d1137fe3ba 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -7,9 +7,10 @@ import org.broadinstitute.dsde.rawls.entities.EntityRequestArguments import org.broadinstitute.dsde.rawls.entities.base.ExpressionEvaluationSupport.LookupExpression import org.broadinstitute.dsde.rawls.entities.base.{EntityProvider, ExpressionEvaluationContext, ExpressionValidator} import org.broadinstitute.dsde.rawls.jobexec.MethodConfigResolver -import org.broadinstitute.dsde.rawls.model.Attributable.AttributeMap +import org.broadinstitute.dsde.rawls.model.Attributable.{entityIdAttributeSuffix, AttributeMap} import org.broadinstitute.dsde.rawls.model.{ AttributeEntityReference, + AttributeName, AttributeUpdateOperations, AttributeValue, Entity, @@ -22,7 +23,6 @@ import org.broadinstitute.dsde.rawls.model.{ SubmissionValidationEntityInputs, Workspace } - import spray.json._ import DefaultJsonProtocol._ import org.broadinstitute.dsde.rawls.model.WorkspaceJsonSupport._ @@ -56,6 +56,7 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, /** * Read a single entity from the db */ + // TODO AJ-2008: mark transaction as read-only override def getEntity(entityType: String, entityName: String): Future[Entity] = dataSource.inTransaction { dataAccess => dataAccess.jsonEntityQuery.getEntity(requestArguments.workspace.workspaceIdAsUUID, entityType, entityName) @@ -65,7 +66,31 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, override def deleteEntities(entityRefs: Seq[AttributeEntityReference]): Future[Int] = ??? - override def entityTypeMetadata(useCache: Boolean): Future[Map[String, EntityTypeMetadata]] = ??? + // TODO AJ-2008: mark transaction as read-only + // TODO AJ-2008: probably needs caching for the attribute calculations + override def entityTypeMetadata(useCache: Boolean): Future[Map[String, EntityTypeMetadata]] = + dataSource.inTransaction { dataAccess => + // get the types and counts + for { + typesAndCounts <- dataAccess.jsonEntityQuery.typesAndCounts(requestArguments.workspace.workspaceIdAsUUID) + typesAndAttributes <- dataAccess.jsonEntityQuery.typesAndAttributes( + requestArguments.workspace.workspaceIdAsUUID + ) + } yield { + // group attribute names by entity type + val groupedAttributeNames: Map[String, Seq[String]] = + typesAndAttributes + .groupMap(_._1)(_._2) + + // loop through the types and counts and build the EntityTypeMetadata + typesAndCounts.map { case (entityType: String, count: Int) => + // grab attribute names + val attrNames = groupedAttributeNames.getOrElse(entityType, Seq()) + val metadata = EntityTypeMetadata(count, s"$entityType$entityIdAttributeSuffix", attrNames) + (entityType, metadata) + }.toMap + } + } override def queryEntitiesSource(entityType: String, query: EntityQuery, From 3de80445d25fac6bb0487e20a845b9b3dfedfbdb Mon Sep 17 00:00:00 2001 From: David An Date: Wed, 11 Sep 2024 11:41:16 -0400 Subject: [PATCH 06/24] basic entityQuery --- .../slick/JsonEntityComponent.scala | 24 +++++- .../entities/json/JsonEntityProvider.scala | 76 ++++++++++++++++--- 2 files changed, 87 insertions(+), 13 deletions(-) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala index 4d4bb512d8..d31cf02832 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala @@ -1,6 +1,7 @@ package org.broadinstitute.dsde.rawls.dataaccess.slick import org.broadinstitute.dsde.rawls.RawlsException +import org.broadinstitute.dsde.rawls.model.Attributable.AttributeMap import org.broadinstitute.dsde.rawls.model.WorkspaceJsonSupport._ import org.broadinstitute.dsde.rawls.model._ import slick.jdbc._ @@ -23,7 +24,10 @@ case class JsonEntityRecord(id: Long, deleted: Boolean, deletedDate: Option[Timestamp], attributes: JsValue -) +) { + def toEntity: Entity = + Entity(name, entityType, attributes.convertTo[AttributeMap]) +} /** * companion object for constants, etc. @@ -119,6 +123,24 @@ trait JsonEntityComponent { where workspace_id = $workspaceId;""" .as[(String, String)] + def queryEntities(workspaceId: UUID, entityType: String, queryParams: EntityQuery): ReadAction[Seq[Entity]] = { + + val offset = queryParams.pageSize * (queryParams.page - 1) + + // TODO AJ-2008: full-table text search + // TODO AJ-2008: filter by column + // TODO AJ-2008: arbitrary sorting + // TODO AJ-2008: result projection + // TODO AJ-2008: total/filtered counts + + sql"""select id, name, entity_type, workspace_id, record_version, deleted, deleted_date, attributes + from ENTITY where workspace_id = $workspaceId and entity_type = $entityType + order by name + limit #${queryParams.pageSize} + offset #$offset""".as[JsonEntityRecord].map(results => results.map(_.toEntity)) + } + + // TODO AJ-2008: retrieve many JsonEntityRecords by type/name pairs. Use JsonEntityRecords for access to the recordVersion } } diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index d1137fe3ba..a28fa55818 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -25,7 +25,11 @@ import org.broadinstitute.dsde.rawls.model.{ } import spray.json._ import DefaultJsonProtocol._ +import io.opentelemetry.api.common.AttributeKey +import org.broadinstitute.dsde.rawls.model.AttributeUpdateOperations.EntityUpdateDefinition import org.broadinstitute.dsde.rawls.model.WorkspaceJsonSupport._ +import org.broadinstitute.dsde.rawls.util.AttributeSupport +import org.broadinstitute.dsde.rawls.util.TracingUtils.{setTraceSpanAttribute, traceFutureWithParent} import java.time.Duration import scala.concurrent.{ExecutionContext, Future} @@ -39,6 +43,7 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, val workbenchMetricBaseName: String )(implicit protected val executionContext: ExecutionContext) extends EntityProvider + with AttributeSupport with LazyLogging { override def entityStoreId: Option[String] = None @@ -49,9 +54,7 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, override def createEntity(entity: Entity): Future[Entity] = dataSource.inTransaction { dataAccess => dataAccess.jsonEntityQuery.createEntity(requestArguments.workspace.workspaceIdAsUUID, entity) - } map { jsonEntityRecord => - Entity(jsonEntityRecord.name, jsonEntityRecord.entityType, jsonEntityRecord.attributes.convertTo[AttributeMap]) - } + } map (_.toEntity) /** * Read a single entity from the db @@ -60,9 +63,7 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, override def getEntity(entityType: String, entityName: String): Future[Entity] = dataSource.inTransaction { dataAccess => dataAccess.jsonEntityQuery.getEntity(requestArguments.workspace.workspaceIdAsUUID, entityType, entityName) - } map { jsonEntityRecord => - Entity(jsonEntityRecord.name, jsonEntityRecord.entityType, jsonEntityRecord.attributes.convertTo[AttributeMap]) - } + } map (_.toEntity) override def deleteEntities(entityRefs: Seq[AttributeEntityReference]): Future[Int] = ??? @@ -93,22 +94,73 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, } override def queryEntitiesSource(entityType: String, - query: EntityQuery, + entityQuery: EntityQuery, parentContext: RawlsRequestContext - ): Future[(EntityQueryResultMetadata, Source[Entity, _])] = ??? + ): Future[(EntityQueryResultMetadata, Source[Entity, _])] = dataSource.inTransaction { dataAccess => + dataAccess.jsonEntityQuery.queryEntities(requestArguments.workspace.workspaceIdAsUUID, + entityType, + entityQuery + ) map { results => + // TODO AJ-2008: total/filtered counts + // TODO AJ-2008: actually stream!!!! + val metadata = EntityQueryResultMetadata(1, 2, 3) + val entitySource = Source.apply(results) + (metadata, entitySource) + } + } override def queryEntities(entityType: String, - query: EntityQuery, + entityQuery: EntityQuery, parentContext: RawlsRequestContext - ): Future[EntityQueryResponse] = ??? + ): Future[EntityQueryResponse] = dataSource.inTransaction { dataAccess => + dataAccess.jsonEntityQuery.queryEntities(requestArguments.workspace.workspaceIdAsUUID, + entityType, + entityQuery + ) map { results => + // TODO AJ-2008: total/filtered counts + EntityQueryResponse(entityQuery, EntityQueryResultMetadata(1, 2, 3), results) + } + } override def batchUpdateEntities( entityUpdates: Seq[AttributeUpdateOperations.EntityUpdateDefinition] - ): Future[Traversable[Entity]] = ??? + ): Future[Iterable[Entity]] = batchUpdateEntitiesImpl(entityUpdates, upsert = false) override def batchUpsertEntities( entityUpdates: Seq[AttributeUpdateOperations.EntityUpdateDefinition] - ): Future[Traversable[Entity]] = ??? + ): Future[Iterable[Entity]] = batchUpdateEntitiesImpl(entityUpdates, upsert = false) + + def batchUpdateEntitiesImpl(entityUpdates: Seq[EntityUpdateDefinition], upsert: Boolean): Future[Iterable[Entity]] = { + // find all attribute names mentioned + val namesToCheck = for { + update <- entityUpdates + operation <- update.operations + } yield operation.name + + // validate all attribute names + withAttributeNamespaceCheck(namesToCheck)(() => ()) + + // start tracing + traceFutureWithParent("JsonEntityProvider.batchUpdateEntitiesImpl", requestArguments.ctx) { localContext => + setTraceSpanAttribute(localContext, AttributeKey.stringKey("workspaceId"), requestArguments.workspace.workspaceId) + setTraceSpanAttribute(localContext, AttributeKey.booleanKey("upsert"), java.lang.Boolean.valueOf(upsert)) + setTraceSpanAttribute(localContext, + AttributeKey.longKey("entityUpdatesCount"), + java.lang.Long.valueOf(entityUpdates.length) + ) + setTraceSpanAttribute(localContext, + AttributeKey.longKey("entityOperationsCount"), + java.lang.Long.valueOf(entityUpdates.map(_.operations.length).sum) + ) + + // TODO: retrieve all entities mentioned in entityUpdates. For updates, throw error if any not found. + // TODO: for existing entities, apply operations to the existing value. For new entities, apply operations to an empty entity. + // TODO: perform the insert/update + + Future.successful(Seq()) + + } // end trace + } override def copyEntities(sourceWorkspaceContext: Workspace, destWorkspaceContext: Workspace, From 3e707d610965c3e597fd5df7c9af1832c1aefa6d Mon Sep 17 00:00:00 2001 From: David An Date: Wed, 11 Sep 2024 13:08:32 -0400 Subject: [PATCH 07/24] very basic batchUpsert --- .../slick/JsonEntityComponent.scala | 61 ++++++++++---- .../entities/json/JsonEntityProvider.scala | 83 +++++++++++++------ 2 files changed, 101 insertions(+), 43 deletions(-) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala index d31cf02832..3ba7db3efd 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala @@ -72,37 +72,36 @@ trait JsonEntityComponent { * Insert a single entity to the db */ // TODO AJ-2008: return Entity instead of JsonEntityRecord? - def createEntity(workspaceId: UUID, entity: Entity): ReadWriteAction[JsonEntityRecord] = { + def createEntity(workspaceId: UUID, entity: Entity): ReadWriteAction[Int] = { val attributesJson: JsValue = entity.attributes.toJson - // create insert statement - val insertStatement = - sqlu"""insert into ENTITY(name, entity_type, workspace_id, record_version, deleted, attributes) + sqlu"""insert into ENTITY(name, entity_type, workspace_id, record_version, deleted, attributes) values (${entity.name}, ${entity.entityType}, $workspaceId, 0, 0, $attributesJson)""" + } - // execute insert statement - insertStatement flatMap { _ => - // return the actually-saved entity - // TODO AJ-2008: move this logic up to JsonEntityProvider - getEntity(workspaceId, entity.entityType, entity.name) - } + /** + * Update a single entity in the db + */ + // TODO AJ-2008: return Entity instead of JsonEntityRecord? + def updateEntity(workspaceId: UUID, entity: Entity, recordVersion: Long): ReadWriteAction[Int] = { + val attributesJson: JsValue = entity.attributes.toJson + + sqlu"""update ENTITY set record_version = record_version+1, attributes = $attributesJson + where workspace_id = $workspaceId and entity_type = ${entity.entityType} and name = ${entity.name} + and record_version = $recordVersion; + """ } /** * Read a single entity from the db */ // TODO AJ-2008: return Entity instead of JsonEntityRecord? - def getEntity(workspaceId: UUID, entityType: String, entityName: String): ReadAction[JsonEntityRecord] = { + def getEntity(workspaceId: UUID, entityType: String, entityName: String): ReadAction[Option[JsonEntityRecord]] = { val selectStatement: SQLActionBuilder = sql"""select id, name, entity_type, workspace_id, record_version, deleted, deleted_date, attributes from ENTITY where workspace_id = $workspaceId and entity_type = $entityType and name = $entityName""" - // execute select statement - selectStatement.as[JsonEntityRecord].map { - case Seq() => throw new RawlsException(s"Expected at least one result") - case Seq(one) => one - case tooMany => throw new RawlsException(s"Expected 1 result but found ${tooMany.size}") - } + uniqueResult(selectStatement.as[JsonEntityRecord]) } /** @@ -141,6 +140,34 @@ trait JsonEntityComponent { } // TODO AJ-2008: retrieve many JsonEntityRecords by type/name pairs. Use JsonEntityRecords for access to the recordVersion + def retrieve(workspaceId: UUID, + allMentionedEntities: Seq[AttributeEntityReference] + ): ReadAction[Seq[JsonEntityRecord]] = { + // group the entity type/name pairs by type + val groupedReferences: Map[String, Seq[String]] = allMentionedEntities.groupMap(_.entityType)(_.entityName) + + // build select statements for each type + val queryParts: Iterable[SQLActionBuilder] = groupedReferences.map { + case (entityType: String, entityNames: Seq[String]) => + // build the "IN" clause values + val entityNamesSql = reduceSqlActionsWithDelim(entityNames.map(name => sql"$name"), sql",") + + // TODO AJ-2008: check query plan for this and make sure it is properly using indexes + concatSqlActions( + sql"""select id, name, entity_type, workspace_id, record_version, deleted, deleted_date, attributes + from ENTITY where workspace_id = $workspaceId and entity_type = $entityType + and name in (""", + entityNamesSql, + sql")" + ) + } + + // union the select statements together + val unionQuery = reduceSqlActionsWithDelim(queryParts.toSeq, sql" union ") + + // execute + unionQuery.as[JsonEntityRecord] + } } } diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index a28fa55818..0a2cd5a6c6 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -7,7 +7,7 @@ import org.broadinstitute.dsde.rawls.entities.EntityRequestArguments import org.broadinstitute.dsde.rawls.entities.base.ExpressionEvaluationSupport.LookupExpression import org.broadinstitute.dsde.rawls.entities.base.{EntityProvider, ExpressionEvaluationContext, ExpressionValidator} import org.broadinstitute.dsde.rawls.jobexec.MethodConfigResolver -import org.broadinstitute.dsde.rawls.model.Attributable.{entityIdAttributeSuffix, AttributeMap} +import org.broadinstitute.dsde.rawls.model.Attributable.{entityIdAttributeSuffix, workspaceIdAttribute, AttributeMap} import org.broadinstitute.dsde.rawls.model.{ AttributeEntityReference, AttributeName, @@ -26,14 +26,17 @@ import org.broadinstitute.dsde.rawls.model.{ import spray.json._ import DefaultJsonProtocol._ import io.opentelemetry.api.common.AttributeKey +import org.broadinstitute.dsde.rawls.dataaccess.slick.JsonEntityRecord import org.broadinstitute.dsde.rawls.model.AttributeUpdateOperations.EntityUpdateDefinition import org.broadinstitute.dsde.rawls.model.WorkspaceJsonSupport._ import org.broadinstitute.dsde.rawls.util.AttributeSupport import org.broadinstitute.dsde.rawls.util.TracingUtils.{setTraceSpanAttribute, traceFutureWithParent} +import slick.dbio.DBIO import java.time.Duration +import java.util.UUID import scala.concurrent.{ExecutionContext, Future} -import scala.util.Try +import scala.util.{Failure, Try} // TODO AJ-2008: tracing class JsonEntityProvider(requestArguments: EntityRequestArguments, @@ -48,13 +51,15 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, override def entityStoreId: Option[String] = None + val workspaceId: UUID = requestArguments.workspace.workspaceIdAsUUID // shorthand for methods below + /** * Insert a single entity to the db */ override def createEntity(entity: Entity): Future[Entity] = dataSource.inTransaction { dataAccess => - dataAccess.jsonEntityQuery.createEntity(requestArguments.workspace.workspaceIdAsUUID, entity) - } map (_.toEntity) + dataAccess.jsonEntityQuery.createEntity(workspaceId, entity) + } flatMap { _ => getEntity(entity.entityType, entity.name) } /** * Read a single entity from the db @@ -62,8 +67,8 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, // TODO AJ-2008: mark transaction as read-only override def getEntity(entityType: String, entityName: String): Future[Entity] = dataSource.inTransaction { dataAccess => - dataAccess.jsonEntityQuery.getEntity(requestArguments.workspace.workspaceIdAsUUID, entityType, entityName) - } map (_.toEntity) + dataAccess.jsonEntityQuery.getEntity(workspaceId, entityType, entityName) + } map { result => result.map(_.toEntity).get } override def deleteEntities(entityRefs: Seq[AttributeEntityReference]): Future[Int] = ??? @@ -73,10 +78,8 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, dataSource.inTransaction { dataAccess => // get the types and counts for { - typesAndCounts <- dataAccess.jsonEntityQuery.typesAndCounts(requestArguments.workspace.workspaceIdAsUUID) - typesAndAttributes <- dataAccess.jsonEntityQuery.typesAndAttributes( - requestArguments.workspace.workspaceIdAsUUID - ) + typesAndCounts <- dataAccess.jsonEntityQuery.typesAndCounts(workspaceId) + typesAndAttributes <- dataAccess.jsonEntityQuery.typesAndAttributes(workspaceId) } yield { // group attribute names by entity type val groupedAttributeNames: Map[String, Seq[String]] = @@ -97,10 +100,7 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, entityQuery: EntityQuery, parentContext: RawlsRequestContext ): Future[(EntityQueryResultMetadata, Source[Entity, _])] = dataSource.inTransaction { dataAccess => - dataAccess.jsonEntityQuery.queryEntities(requestArguments.workspace.workspaceIdAsUUID, - entityType, - entityQuery - ) map { results => + dataAccess.jsonEntityQuery.queryEntities(workspaceId, entityType, entityQuery) map { results => // TODO AJ-2008: total/filtered counts // TODO AJ-2008: actually stream!!!! val metadata = EntityQueryResultMetadata(1, 2, 3) @@ -113,10 +113,7 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, entityQuery: EntityQuery, parentContext: RawlsRequestContext ): Future[EntityQueryResponse] = dataSource.inTransaction { dataAccess => - dataAccess.jsonEntityQuery.queryEntities(requestArguments.workspace.workspaceIdAsUUID, - entityType, - entityQuery - ) map { results => + dataAccess.jsonEntityQuery.queryEntities(workspaceId, entityType, entityQuery) map { results => // TODO AJ-2008: total/filtered counts EntityQueryResponse(entityQuery, EntityQueryResultMetadata(1, 2, 3), results) } @@ -128,8 +125,9 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, override def batchUpsertEntities( entityUpdates: Seq[AttributeUpdateOperations.EntityUpdateDefinition] - ): Future[Iterable[Entity]] = batchUpdateEntitiesImpl(entityUpdates, upsert = false) + ): Future[Iterable[Entity]] = batchUpdateEntitiesImpl(entityUpdates, upsert = true) + // TODO AJ-2008: this needs some serious optimization, it issues way too many single individual updates def batchUpdateEntitiesImpl(entityUpdates: Seq[EntityUpdateDefinition], upsert: Boolean): Future[Iterable[Entity]] = { // find all attribute names mentioned val namesToCheck = for { @@ -142,7 +140,7 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, // start tracing traceFutureWithParent("JsonEntityProvider.batchUpdateEntitiesImpl", requestArguments.ctx) { localContext => - setTraceSpanAttribute(localContext, AttributeKey.stringKey("workspaceId"), requestArguments.workspace.workspaceId) + setTraceSpanAttribute(localContext, AttributeKey.stringKey("workspaceId"), workspaceId.toString) setTraceSpanAttribute(localContext, AttributeKey.booleanKey("upsert"), java.lang.Boolean.valueOf(upsert)) setTraceSpanAttribute(localContext, AttributeKey.longKey("entityUpdatesCount"), @@ -153,12 +151,45 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, java.lang.Long.valueOf(entityUpdates.map(_.operations.length).sum) ) - // TODO: retrieve all entities mentioned in entityUpdates. For updates, throw error if any not found. - // TODO: for existing entities, apply operations to the existing value. For new entities, apply operations to an empty entity. - // TODO: perform the insert/update - - Future.successful(Seq()) - + // identify all the entities mentioned in entityUpdates + val allMentionedEntities: Seq[AttributeEntityReference] = + entityUpdates.map(eu => AttributeEntityReference(eu.entityType, eu.name)) + + dataSource + .inTransaction { dataAccess => + // iterate through the desired updates and apply them + val queries = entityUpdates.map { entityUpdate => + // attempt to retrieve the existing entity + dataAccess.jsonEntityQuery.getEntity(workspaceId, entityUpdate.entityType, entityUpdate.name) flatMap { + foundEntityOption => + if (!upsert && foundEntityOption.isEmpty) { + throw new RuntimeException("Entity does not exist") + } + val baseEntity: Entity = + foundEntityOption + .map(_.toEntity) + .getOrElse(Entity(entityUpdate.name, entityUpdate.entityType, Map.empty)) + // TODO AJ-2008: collect all the apply errors instead of handling them one-by-one? + val updatedEntity: Entity = applyOperationsToEntity(baseEntity, entityUpdate.operations) + + // insert or update + foundEntityOption match { + // do insert + case None => dataAccess.jsonEntityQuery.createEntity(workspaceId, updatedEntity) + // do update + case Some(foundEntity) => + dataAccess.jsonEntityQuery.updateEntity(workspaceId, updatedEntity, foundEntity.recordVersion) map { + updatedCount => + if (updatedCount == 0) { + throw new RuntimeException("Update failed. Concurrent modifications?") + } + } + } + } + } + DBIO.sequence(queries) + } + .map(_ => Seq()) // TODO AJ-2008: return entities, not nothing } // end trace } From bbeadd6067523a12bf5dddbab66e712cf0c28366 Mon Sep 17 00:00:00 2001 From: David An Date: Wed, 11 Sep 2024 13:44:08 -0400 Subject: [PATCH 08/24] entityQuery metadata --- .../slick/JsonEntityComponent.scala | 45 ++++++++++++++++--- .../entities/json/JsonEntityProvider.scala | 34 +++++++++----- 2 files changed, 62 insertions(+), 17 deletions(-) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala index 3ba7db3efd..d20e58866e 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala @@ -16,6 +16,7 @@ import scala.language.postfixOps * model class for rows in the ENTITY table */ // TODO AJ-2008: handle the all_attribute_values column +// TODO AJ-2008: probably don't need deletedDate here case class JsonEntityRecord(id: Long, name: String, entityType: String, @@ -126,19 +127,46 @@ trait JsonEntityComponent { val offset = queryParams.pageSize * (queryParams.page - 1) + // get the where clause from the shared method + val whereClause = queryWhereClause(workspaceId, entityType, queryParams) + // TODO AJ-2008: full-table text search // TODO AJ-2008: filter by column // TODO AJ-2008: arbitrary sorting // TODO AJ-2008: result projection // TODO AJ-2008: total/filtered counts - sql"""select id, name, entity_type, workspace_id, record_version, deleted, deleted_date, attributes - from ENTITY where workspace_id = $workspaceId and entity_type = $entityType - order by name - limit #${queryParams.pageSize} - offset #$offset""".as[JsonEntityRecord].map(results => results.map(_.toEntity)) + val query = concatSqlActions( + sql"select id, name, entity_type, workspace_id, record_version, deleted, deleted_date, attributes from ENTITY ", + whereClause, + sql" order by name limit #${queryParams.pageSize} offset #$offset" + ) + + query.as[JsonEntityRecord].map(results => results.map(_.toEntity)) } + def countType(workspaceId: UUID, entityType: String): ReadAction[Int] = + singleResult( + sql"select count(1) from ENTITY where workspace_id = $workspaceId and entity_type = $entityType and deleted = 0" + .as[Int] + ) + + def countQuery(workspaceId: UUID, entityType: String, queryParams: EntityQuery): ReadAction[Int] = { + // get the where clause from the shared method + val whereClause = queryWhereClause(workspaceId, entityType, queryParams) + + val query = concatSqlActions( + sql"select count(1) from ENTITY ", + whereClause + ) + + singleResult(query.as[Int]) + + } + + private def queryWhereClause(workspaceId: UUID, entityType: String, queryParams: EntityQuery): SQLActionBuilder = + sql"where workspace_id = $workspaceId and entity_type = $entityType and deleted = 0" + // TODO AJ-2008: retrieve many JsonEntityRecords by type/name pairs. Use JsonEntityRecords for access to the recordVersion def retrieve(workspaceId: UUID, allMentionedEntities: Seq[AttributeEntityReference] @@ -170,4 +198,11 @@ trait JsonEntityComponent { } } + private def singleResult[V](results: ReadAction[Seq[V]]): ReadAction[V] = + results map { + case Seq() => throw new RawlsException(s"Expected 1 result but found 0") + case Seq(one) => one + case tooMany => throw new RawlsException(s"Expected 1 result but found ${tooMany.size}") + } + } diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index 0a2cd5a6c6..195c6c4447 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -25,8 +25,11 @@ import org.broadinstitute.dsde.rawls.model.{ } import spray.json._ import DefaultJsonProtocol._ +import akka.http.scaladsl.model.StatusCodes import io.opentelemetry.api.common.AttributeKey -import org.broadinstitute.dsde.rawls.dataaccess.slick.JsonEntityRecord +import org.broadinstitute.dsde.rawls.RawlsException +import org.broadinstitute.dsde.rawls.dataaccess.slick.{JsonEntityRecord, ReadAction} +import org.broadinstitute.dsde.rawls.entities.exceptions.DataEntityException import org.broadinstitute.dsde.rawls.model.AttributeUpdateOperations.EntityUpdateDefinition import org.broadinstitute.dsde.rawls.model.WorkspaceJsonSupport._ import org.broadinstitute.dsde.rawls.util.AttributeSupport @@ -99,23 +102,30 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, override def queryEntitiesSource(entityType: String, entityQuery: EntityQuery, parentContext: RawlsRequestContext - ): Future[(EntityQueryResultMetadata, Source[Entity, _])] = dataSource.inTransaction { dataAccess => - dataAccess.jsonEntityQuery.queryEntities(workspaceId, entityType, entityQuery) map { results => - // TODO AJ-2008: total/filtered counts - // TODO AJ-2008: actually stream!!!! - val metadata = EntityQueryResultMetadata(1, 2, 3) - val entitySource = Source.apply(results) - (metadata, entitySource) + ): Future[(EntityQueryResultMetadata, Source[Entity, _])] = + queryEntities(entityType, entityQuery, parentContext).map { queryResponse => + // TODO AJ-2008: actually stream! + (queryResponse.resultMetadata, Source.apply(queryResponse.results)) } - } override def queryEntities(entityType: String, entityQuery: EntityQuery, parentContext: RawlsRequestContext ): Future[EntityQueryResponse] = dataSource.inTransaction { dataAccess => - dataAccess.jsonEntityQuery.queryEntities(workspaceId, entityType, entityQuery) map { results => - // TODO AJ-2008: total/filtered counts - EntityQueryResponse(entityQuery, EntityQueryResultMetadata(1, 2, 3), results) + for { + results <- dataAccess.jsonEntityQuery.queryEntities(workspaceId, entityType, entityQuery) + unfilteredCount <- dataAccess.jsonEntityQuery.countType(workspaceId, entityType) + filteredCount <- dataAccess.jsonEntityQuery.countQuery(workspaceId, entityType, entityQuery) + } yield { + val pageCount: Int = Math.ceil(filteredCount.toFloat / entityQuery.pageSize).toInt + if (filteredCount > 0 && entityQuery.page > pageCount) { + throw new DataEntityException( + code = StatusCodes.BadRequest, + message = s"requested page ${entityQuery.page} is greater than the number of pages $pageCount" + ) + } + val queryMetadata = EntityQueryResultMetadata(unfilteredCount, filteredCount, pageCount) + EntityQueryResponse(entityQuery, queryMetadata, results) } } From aa5dfa85055a6ab2ce663017158f913cc9821a20 Mon Sep 17 00:00:00 2001 From: David An Date: Wed, 11 Sep 2024 13:59:26 -0400 Subject: [PATCH 09/24] query sorting --- .../slick/JsonEntityComponent.scala | 23 +++++++++++++++---- .../entities/json/JsonEntityProvider.scala | 1 + 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala index d20e58866e..c90610a63c 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala @@ -128,7 +128,14 @@ trait JsonEntityComponent { val offset = queryParams.pageSize * (queryParams.page - 1) // get the where clause from the shared method - val whereClause = queryWhereClause(workspaceId, entityType, queryParams) + val whereClause: SQLActionBuilder = queryWhereClause(workspaceId, entityType, queryParams) + + // sorting + val orderByClause: SQLActionBuilder = queryParams.sortField match { + case "name" => sql" order by name #${SortDirections.toSql(queryParams.sortDirection)} " + case attr => + sql" order by JSON_EXTRACT(attributes, '$$.#$attr') #${SortDirections.toSql(queryParams.sortDirection)} " + } // TODO AJ-2008: full-table text search // TODO AJ-2008: filter by column @@ -139,18 +146,25 @@ trait JsonEntityComponent { val query = concatSqlActions( sql"select id, name, entity_type, workspace_id, record_version, deleted, deleted_date, attributes from ENTITY ", whereClause, - sql" order by name limit #${queryParams.pageSize} offset #$offset" + orderByClause, + sql" limit #${queryParams.pageSize} offset #$offset" ) query.as[JsonEntityRecord].map(results => results.map(_.toEntity)) } + /** + * Count the number of entities that match the query, before applying all filters + */ def countType(workspaceId: UUID, entityType: String): ReadAction[Int] = singleResult( sql"select count(1) from ENTITY where workspace_id = $workspaceId and entity_type = $entityType and deleted = 0" .as[Int] ) + /** + * Count the number of entities that match the query, after applying all filters + */ def countQuery(workspaceId: UUID, entityType: String, queryParams: EntityQuery): ReadAction[Int] = { // get the where clause from the shared method val whereClause = queryWhereClause(workspaceId, entityType, queryParams) @@ -159,11 +173,12 @@ trait JsonEntityComponent { sql"select count(1) from ENTITY ", whereClause ) - singleResult(query.as[Int]) - } + /** + * Shared method to build the where-clause criteria for entityQuery. Used to generate the results and to generate the counts. + */ private def queryWhereClause(workspaceId: UUID, entityType: String, queryParams: EntityQuery): SQLActionBuilder = sql"where workspace_id = $workspaceId and entity_type = $entityType and deleted = 0" diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index 195c6c4447..523882304e 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -114,6 +114,7 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, ): Future[EntityQueryResponse] = dataSource.inTransaction { dataAccess => for { results <- dataAccess.jsonEntityQuery.queryEntities(workspaceId, entityType, entityQuery) + // TODO AJ-2008: optimize; if no filters are present, don't need separate queries for counts unfilteredCount <- dataAccess.jsonEntityQuery.countType(workspaceId, entityType) filteredCount <- dataAccess.jsonEntityQuery.countQuery(workspaceId, entityType, entityQuery) } yield { From 7897d702cdcfd9d8d051d8e4c3314d1fbda8e624 Mon Sep 17 00:00:00 2001 From: David An Date: Wed, 11 Sep 2024 14:59:42 -0400 Subject: [PATCH 10/24] fix ENTITY_KEYS insert trigger for null attributes --- .../changesets/20240910_entity_json_support.xml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml b/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml index 3652980399..2a6363395d 100644 --- a/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml +++ b/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml @@ -51,10 +51,12 @@ CREATE TRIGGER after_entity_insert AFTER INSERT ON ENTITY FOR EACH ROW - INSERT INTO ENTITY_KEYS - (id, workspace_id, entity_type, attribute_keys, last_updated) - VALUES - (new.id, new.workspace_id, new.entity_type, JSON_KEYS(new.attributes), now(3)); ~ + if new.attributes is not null then + INSERT INTO ENTITY_KEYS + (id, workspace_id, entity_type, attribute_keys, last_updated) + VALUES + (new.id, new.workspace_id, new.entity_type, JSON_KEYS(new.attributes), now(3)); + end if ~ CREATE TRIGGER after_entity_update AFTER UPDATE ON ENTITY FOR EACH ROW From 18eff978edf326fe65b2e7e1a7cf5cb3ae69ffd8 Mon Sep 17 00:00:00 2001 From: David An Date: Wed, 11 Sep 2024 15:00:04 -0400 Subject: [PATCH 11/24] fix old-schema inserts --- .../dsde/rawls/entities/json/JsonEntityProvider.scala | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index 523882304e..55ce016af1 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -168,9 +168,13 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, dataSource .inTransaction { dataAccess => + // TODO AJ-2008: retrieve all of ${allMentionedEntities} in one query and validate existence if these are not upserts + // iterate through the desired updates and apply them val queries = entityUpdates.map { entityUpdate => // attempt to retrieve the existing entity + // TODO AJ-2008: pull from the list we retrieved when possible. Only re-retrieve from the db + // if we are updating the same entity multiple times dataAccess.jsonEntityQuery.getEntity(workspaceId, entityUpdate.entityType, entityUpdate.name) flatMap { foundEntityOption => if (!upsert && foundEntityOption.isEmpty) { From e7dea70fe010a4d5afd37b61e3ed5d699435be09 Mon Sep 17 00:00:00 2001 From: David An Date: Wed, 11 Sep 2024 17:19:57 -0400 Subject: [PATCH 12/24] beginning of handling refs --- .../20240910_entity_json_support.xml | 16 +++++ .../slick/JsonEntityComponent.scala | 39 ++++++++--- .../entities/json/JsonEntityProvider.scala | 66 ++++++++++++++++++- 3 files changed, 109 insertions(+), 12 deletions(-) diff --git a/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml b/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml index 2a6363395d..affc23f1ec 100644 --- a/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml +++ b/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml @@ -81,6 +81,22 @@ + + + + + + + + + + + + diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala index c90610a63c..c8efa17f8c 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala @@ -30,6 +30,8 @@ case class JsonEntityRecord(id: Long, Entity(name, entityType, attributes.convertTo[AttributeMap]) } +case class JsonEntityRefRecord(id: Long, name: String, entityType: String) + /** * companion object for constants, etc. */ @@ -69,6 +71,9 @@ trait JsonEntityComponent { implicit val getJsonEntityRecord: GetResult[JsonEntityRecord] = GetResult(r => JsonEntityRecord(r.<<, r.<<, r.<<, r.<<, r.<<, r.<<, r.<<, r.<<)) + implicit val getJsonEntityRefRecord: GetResult[JsonEntityRefRecord] = + GetResult(r => JsonEntityRefRecord(r.<<, r.<<, r.<<)) + /** * Insert a single entity to the db */ @@ -182,22 +187,20 @@ trait JsonEntityComponent { private def queryWhereClause(workspaceId: UUID, entityType: String, queryParams: EntityQuery): SQLActionBuilder = sql"where workspace_id = $workspaceId and entity_type = $entityType and deleted = 0" - // TODO AJ-2008: retrieve many JsonEntityRecords by type/name pairs. Use JsonEntityRecords for access to the recordVersion - def retrieve(workspaceId: UUID, - allMentionedEntities: Seq[AttributeEntityReference] - ): ReadAction[Seq[JsonEntityRecord]] = { + /** Given a set of entity references, retrieve their ids */ + def validateRefs(workspaceId: UUID, refs: Set[AttributeEntityReference]): ReadAction[Seq[JsonEntityRefRecord]] = { // group the entity type/name pairs by type - val groupedReferences: Map[String, Seq[String]] = allMentionedEntities.groupMap(_.entityType)(_.entityName) + val groupedReferences: Map[String, Set[String]] = refs.groupMap(_.entityType)(_.entityName) // build select statements for each type val queryParts: Iterable[SQLActionBuilder] = groupedReferences.map { - case (entityType: String, entityNames: Seq[String]) => + case (entityType: String, entityNames: Set[String]) => // build the "IN" clause values - val entityNamesSql = reduceSqlActionsWithDelim(entityNames.map(name => sql"$name"), sql",") + val entityNamesSql = reduceSqlActionsWithDelim(entityNames.map(name => sql"$name").toSeq, sql",") // TODO AJ-2008: check query plan for this and make sure it is properly using indexes concatSqlActions( - sql"""select id, name, entity_type, workspace_id, record_version, deleted, deleted_date, attributes + sql"""select id, name, entity_type from ENTITY where workspace_id = $workspaceId and entity_type = $entityType and name in (""", entityNamesSql, @@ -209,8 +212,26 @@ trait JsonEntityComponent { val unionQuery = reduceSqlActionsWithDelim(queryParts.toSeq, sql" union ") // execute - unionQuery.as[JsonEntityRecord] + unionQuery.as[JsonEntityRefRecord](getJsonEntityRefRecord) } + + def replaceReferences(fromId: Long, refs: Map[AttributeName, Seq[JsonEntityRefRecord]]): ReadWriteAction[Int] = + sqlu"delete from ENTITY_REFS where from_id = $fromId" flatMap { _ => + // reduce the references to a set to remove any duplicates + val toIds: Set[Long] = refs.values.flatten.map(_.id).toSet + // short-circuit + if (toIds.isEmpty) { + DBIO.successful(0) + } else { + // generate bulk-insert SQL + val insertValues: Seq[SQLActionBuilder] = toIds.toSeq.map(toId => sql"($fromId, $toId)") + + val allInsertValues: SQLActionBuilder = reduceSqlActionsWithDelim(insertValues, sql",") + + concatSqlActions(sql"insert into ENTITY_REFS(from_id, to_id) values ", allInsertValues).asUpdate + } + } + } private def singleResult[V](results: ReadAction[Seq[V]]): ReadAction[V] = diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index 55ce016af1..767a7e090b 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -10,6 +10,7 @@ import org.broadinstitute.dsde.rawls.jobexec.MethodConfigResolver import org.broadinstitute.dsde.rawls.model.Attributable.{entityIdAttributeSuffix, workspaceIdAttribute, AttributeMap} import org.broadinstitute.dsde.rawls.model.{ AttributeEntityReference, + AttributeEntityReferenceList, AttributeName, AttributeUpdateOperations, AttributeValue, @@ -28,7 +29,7 @@ import DefaultJsonProtocol._ import akka.http.scaladsl.model.StatusCodes import io.opentelemetry.api.common.AttributeKey import org.broadinstitute.dsde.rawls.RawlsException -import org.broadinstitute.dsde.rawls.dataaccess.slick.{JsonEntityRecord, ReadAction} +import org.broadinstitute.dsde.rawls.dataaccess.slick.{JsonEntityRecord, JsonEntityRefRecord, ReadAction} import org.broadinstitute.dsde.rawls.entities.exceptions.DataEntityException import org.broadinstitute.dsde.rawls.model.AttributeUpdateOperations.EntityUpdateDefinition import org.broadinstitute.dsde.rawls.model.WorkspaceJsonSupport._ @@ -61,8 +62,18 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, */ override def createEntity(entity: Entity): Future[Entity] = dataSource.inTransaction { dataAccess => - dataAccess.jsonEntityQuery.createEntity(workspaceId, entity) - } flatMap { _ => getEntity(entity.entityType, entity.name) } + for { + // find and validate all references in the entity-to-be-saved + referenceTargets <- DBIO.from(validateReferences(entity)) + // save the entity + _ <- dataAccess.jsonEntityQuery.createEntity(workspaceId, entity) + // did it save correctly? + savedEntityRecordOption <- dataAccess.jsonEntityQuery.getEntity(workspaceId, entity.entityType, entity.name) + savedEntityRecord = savedEntityRecordOption.getOrElse(throw new RuntimeException("Could not save entity")) + // save all references from this entity to other entities + _ <- dataAccess.jsonEntityQuery.replaceReferences(savedEntityRecord.id, referenceTargets) + } yield savedEntityRecord.toEntity + } /** * Read a single entity from the db @@ -175,6 +186,10 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, // attempt to retrieve the existing entity // TODO AJ-2008: pull from the list we retrieved when possible. Only re-retrieve from the db // if we are updating the same entity multiple times + // see AJ-2009; the existing code does the wrong thing and this code should do better + + // TODO AJ-2008: find all the inserts (vs updates), and batch them together first, preserving order + // from the entityUpdates list dataAccess.jsonEntityQuery.getEntity(workspaceId, entityUpdate.entityType, entityUpdate.name) flatMap { foundEntityOption => if (!upsert && foundEntityOption.isEmpty) { @@ -225,4 +240,49 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, override def expressionValidator: ExpressionValidator = ??? + private def validateReferences(entity: Entity): Future[Map[AttributeName, Seq[JsonEntityRefRecord]]] = { + // find all refs in the entity + val refs: Map[AttributeName, Seq[AttributeEntityReference]] = findAllReferences(entity) + + // short-circuit + if (refs.isEmpty) { + Future.successful(Map()) + } + + // validate all refs + val allRefs: Set[AttributeEntityReference] = refs.values.flatten.toSet + + dataSource.inTransaction { dataAccess => + dataAccess.jsonEntityQuery.validateRefs(workspaceId, allRefs) map { foundRefs => + if (foundRefs.size != allRefs.size) { + throw new RuntimeException("Did not find all references") + } + // convert the foundRefs to a map for easier lookup + val foundMap: Map[(String, String), JsonEntityRefRecord] = foundRefs.map { foundRef => + ((foundRef.entityType, foundRef.name), foundRef) + }.toMap + + // return all the references found in this entity, mapped to the ids they are referencing + refs.map { case (name: AttributeName, refs: Seq[AttributeEntityReference]) => + val refRecords: Seq[JsonEntityRefRecord] = refs.map(ref => + foundMap.getOrElse((ref.entityType, ref.entityName), + throw new RuntimeException("unexpected; couldn't find ref") + ) + ) + (name, refRecords) + } + } + } + } + + // given an entity, finds all references in that entity, grouped by their attribute names + private def findAllReferences(entity: Entity): Map[AttributeName, Seq[AttributeEntityReference]] = + entity.attributes + .collect { + case (name: AttributeName, aer: AttributeEntityReference) => Seq((name, aer)) + case (name: AttributeName, aerl: AttributeEntityReferenceList) => aerl.list.map(ref => (name, ref)) + } + .flatten + .toSeq + .groupMap(_._1)(_._2) } From 868caac636ba5c4f0cd8aa38aafdc4cecfe054ce Mon Sep 17 00:00:00 2001 From: David An Date: Tue, 17 Sep 2024 20:03:50 -0400 Subject: [PATCH 13/24] batch upsert optimizations --- .../slick/JsonEntityComponent.scala | 56 +++++- .../entities/json/JsonEntityProvider.scala | 173 +++++++++++------- 2 files changed, 158 insertions(+), 71 deletions(-) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala index c8efa17f8c..cf9da3e800 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala @@ -5,6 +5,7 @@ import org.broadinstitute.dsde.rawls.model.Attributable.AttributeMap import org.broadinstitute.dsde.rawls.model.WorkspaceJsonSupport._ import org.broadinstitute.dsde.rawls.model._ import slick.jdbc._ +import slick.lifted.ProvenShape import spray.json.DefaultJsonProtocol._ import spray.json._ @@ -12,6 +13,19 @@ import java.sql.Timestamp import java.util.UUID import scala.language.postfixOps +case class JsonEntitySlickRecord(id: Long, + name: String, + entityType: String, + workspaceId: UUID, + recordVersion: Long, + deleted: Boolean, + deletedDate: Option[Timestamp], + attributes: Option[String] +) { + def toEntity: Entity = + Entity(name, entityType, attributes.getOrElse("{}").parseJson.convertTo[AttributeMap]) +} + /** * model class for rows in the ENTITY table */ @@ -28,6 +42,16 @@ case class JsonEntityRecord(id: Long, ) { def toEntity: Entity = Entity(name, entityType, attributes.convertTo[AttributeMap]) + def toSlick: JsonEntitySlickRecord = + JsonEntitySlickRecord(id, + name, + entityType, + workspaceId, + recordVersion, + deleted, + deletedDate, + Some(attributes.compactPrint) + ) } case class JsonEntityRefRecord(id: Long, name: String, entityType: String) @@ -51,6 +75,27 @@ trait JsonEntityComponent { // json codec for entity attributes implicit val attributeFormat: AttributeFormat = new AttributeFormat with PlainArrayAttributeListSerializer + class JsonEntityTable(tag: Tag) extends Table[JsonEntitySlickRecord](tag, "ENTITY") { + def id = column[Long]("id", O.PrimaryKey, O.AutoInc) + def name = column[String]("name", O.Length(254)) + def entityType = column[String]("entity_type", O.Length(254)) + def workspaceId = column[UUID]("workspace_id") + def version = column[Long]("record_version") + def deleted = column[Boolean]("deleted") + def deletedDate = column[Option[Timestamp]]("deleted_date") + def attributes = column[Option[String]]("attributes") + + // def workspace = foreignKey("FK_ENTITY_WORKSPACE", workspaceId, workspaceQuery)(_.id) + // def uniqueTypeName = index("idx_entity_type_name", (workspaceId, entityType, name), unique = true) + + def * = + (id, name, entityType, workspaceId, version, deleted, deletedDate, attributes) <> (JsonEntitySlickRecord.tupled, + JsonEntitySlickRecord.unapply + ) + } + + object jsonEntitySlickQuery extends TableQuery(new JsonEntityTable(_)) {} + /** * SQL queries for working with the ENTITY table */ @@ -187,8 +232,8 @@ trait JsonEntityComponent { private def queryWhereClause(workspaceId: UUID, entityType: String, queryParams: EntityQuery): SQLActionBuilder = sql"where workspace_id = $workspaceId and entity_type = $entityType and deleted = 0" - /** Given a set of entity references, retrieve their ids */ - def validateRefs(workspaceId: UUID, refs: Set[AttributeEntityReference]): ReadAction[Seq[JsonEntityRefRecord]] = { + /** Given a set of entity references, retrieve those entities */ + def getEntities(workspaceId: UUID, refs: Set[AttributeEntityReference]): ReadAction[Seq[JsonEntityRecord]] = { // group the entity type/name pairs by type val groupedReferences: Map[String, Set[String]] = refs.groupMap(_.entityType)(_.entityName) @@ -199,8 +244,9 @@ trait JsonEntityComponent { val entityNamesSql = reduceSqlActionsWithDelim(entityNames.map(name => sql"$name").toSeq, sql",") // TODO AJ-2008: check query plan for this and make sure it is properly using indexes + // TODO AJ-2008: include `where deleted=0`? Make that an argument? concatSqlActions( - sql"""select id, name, entity_type + sql"""select id, name, entity_type, workspace_id, record_version, deleted, deleted_date, attributes from ENTITY where workspace_id = $workspaceId and entity_type = $entityType and name in (""", entityNamesSql, @@ -212,10 +258,12 @@ trait JsonEntityComponent { val unionQuery = reduceSqlActionsWithDelim(queryParts.toSeq, sql" union ") // execute - unionQuery.as[JsonEntityRefRecord](getJsonEntityRefRecord) + unionQuery.as[JsonEntityRecord](getJsonEntityRecord) } def replaceReferences(fromId: Long, refs: Map[AttributeName, Seq[JsonEntityRefRecord]]): ReadWriteAction[Int] = + // TODO AJ-2008: instead of delete and insert all, find the intersections and only delete/insert where needed? + // alternately, do insert ... on conflict do nothing? sqlu"delete from ENTITY_REFS where from_id = $fromId" flatMap { _ => // reduce the references to a set to remove any duplicates val toIds: Set[Long] = refs.values.flatten.map(_.id).toSet diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index 767a7e090b..76166fcd21 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -29,7 +29,12 @@ import DefaultJsonProtocol._ import akka.http.scaladsl.model.StatusCodes import io.opentelemetry.api.common.AttributeKey import org.broadinstitute.dsde.rawls.RawlsException -import org.broadinstitute.dsde.rawls.dataaccess.slick.{JsonEntityRecord, JsonEntityRefRecord, ReadAction} +import org.broadinstitute.dsde.rawls.dataaccess.slick.{ + JsonEntityRecord, + JsonEntityRefRecord, + JsonEntitySlickRecord, + ReadAction +} import org.broadinstitute.dsde.rawls.entities.exceptions.DataEntityException import org.broadinstitute.dsde.rawls.model.AttributeUpdateOperations.EntityUpdateDefinition import org.broadinstitute.dsde.rawls.model.WorkspaceJsonSupport._ @@ -60,11 +65,13 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, /** * Insert a single entity to the db */ - override def createEntity(entity: Entity): Future[Entity] = + override def createEntity(entity: Entity): Future[Entity] = { + logger.info(s"creating entity $entity") dataSource.inTransaction { dataAccess => for { // find and validate all references in the entity-to-be-saved referenceTargets <- DBIO.from(validateReferences(entity)) + // save the entity _ <- dataAccess.jsonEntityQuery.createEntity(workspaceId, entity) // did it save correctly? @@ -74,6 +81,7 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, _ <- dataAccess.jsonEntityQuery.replaceReferences(savedEntityRecord.id, referenceTargets) } yield savedEntityRecord.toEntity } + } /** * Read a single entity from the db @@ -149,8 +157,9 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, entityUpdates: Seq[AttributeUpdateOperations.EntityUpdateDefinition] ): Future[Iterable[Entity]] = batchUpdateEntitiesImpl(entityUpdates, upsert = true) - // TODO AJ-2008: this needs some serious optimization, it issues way too many single individual updates def batchUpdateEntitiesImpl(entityUpdates: Seq[EntityUpdateDefinition], upsert: Boolean): Future[Iterable[Entity]] = { + val batchSize = 500 // arbitrary; choose a good value here; perhaps even adapt the value to the size of incoming + // find all attribute names mentioned val namesToCheck = for { update <- entityUpdates @@ -173,53 +182,83 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, java.lang.Long.valueOf(entityUpdates.map(_.operations.length).sum) ) - // identify all the entities mentioned in entityUpdates - val allMentionedEntities: Seq[AttributeEntityReference] = - entityUpdates.map(eu => AttributeEntityReference(eu.entityType, eu.name)) - dataSource .inTransaction { dataAccess => - // TODO AJ-2008: retrieve all of ${allMentionedEntities} in one query and validate existence if these are not upserts - - // iterate through the desired updates and apply them - val queries = entityUpdates.map { entityUpdate => - // attempt to retrieve the existing entity - // TODO AJ-2008: pull from the list we retrieved when possible. Only re-retrieve from the db - // if we are updating the same entity multiple times - // see AJ-2009; the existing code does the wrong thing and this code should do better - - // TODO AJ-2008: find all the inserts (vs updates), and batch them together first, preserving order - // from the entityUpdates list - dataAccess.jsonEntityQuery.getEntity(workspaceId, entityUpdate.entityType, entityUpdate.name) flatMap { - foundEntityOption => - if (!upsert && foundEntityOption.isEmpty) { - throw new RuntimeException("Entity does not exist") - } - val baseEntity: Entity = - foundEntityOption - .map(_.toEntity) - .getOrElse(Entity(entityUpdate.name, entityUpdate.entityType, Map.empty)) - // TODO AJ-2008: collect all the apply errors instead of handling them one-by-one? - val updatedEntity: Entity = applyOperationsToEntity(baseEntity, entityUpdate.operations) - - // insert or update - foundEntityOption match { - // do insert - case None => dataAccess.jsonEntityQuery.createEntity(workspaceId, updatedEntity) - // do update - case Some(foundEntity) => - dataAccess.jsonEntityQuery.updateEntity(workspaceId, updatedEntity, foundEntity.recordVersion) map { - updatedCount => - if (updatedCount == 0) { - throw new RuntimeException("Update failed. Concurrent modifications?") - } - } - } + import dataAccess.driver.api._ + + // identify all the entities mentioned in entityUpdates + val allMentionedEntities: Set[AttributeEntityReference] = + entityUpdates.map(eu => AttributeEntityReference(eu.entityType, eu.name)).toSet + + // retrieve all of ${allMentionedEntities} in one query and validate existence if these are not upserts + dataAccess.jsonEntityQuery.getEntities(workspaceId, allMentionedEntities) flatMap { existingEntities => + if (!upsert && existingEntities.size != allMentionedEntities.size) { + throw new RuntimeException( + s"Expected all entities being updated to exist; missing ${allMentionedEntities.size - existingEntities.size}" + ) + } + + // build map of (entityType, name) -> JsonEntityRecord for efficient lookup + val existingEntityMap: Map[(String, String), JsonEntityRecord] = + existingEntities.map(rec => (rec.entityType, rec.name) -> rec).toMap + + // iterate through the desired updates and apply them + val tableRecords: Seq[JsonEntitySlickRecord] = entityUpdates.map { entityUpdate => + // attempt to retrieve an existing entity + val existingRecordOption = existingEntityMap.get((entityUpdate.entityType, entityUpdate.name)) + + // this shouldn't happen because we validated above, but we're being defensive + if (!upsert && existingRecordOption.isEmpty) { + throw new RuntimeException("Expected all entities being updated to exist") + } + + // TODO AJ-2008/AJ-2009: Re-retrieve the existing entity if we are updating the same entity multiple times + // see AJ-2009; the existing code does the wrong thing and this code should do better + val baseEntity: Entity = + existingRecordOption + .map(_.toEntity) + .getOrElse(Entity(entityUpdate.name, entityUpdate.entityType, Map())) + + // TODO AJ-2008: collect all the apply errors instead of handling them one-by-one? + val updatedEntity: Entity = applyOperationsToEntity(baseEntity, entityUpdate.operations) + + // TODO AJ-2008: handle references + + // translate back to a JsonEntitySlickRecord for later insert/update + // TODO AJ-2008: so far we retrieved a JsonEntityRecord, translated it to an Entity, and are now + // translating it to JsonEntitySlickRecord; we could do better + JsonEntitySlickRecord( + id = existingRecordOption.map(_.id).getOrElse(0), + name = updatedEntity.name, + entityType = updatedEntity.entityType, + workspaceId = workspaceId, + recordVersion = existingRecordOption.map(_.recordVersion).getOrElse(0), + deleted = false, + deletedDate = None, + attributes = Some(updatedEntity.attributes.toJson.compactPrint) + ) + } + + // separate the records-to-be-saved into inserts and updates + // we identify inserts as those having recordVersion 0; we could also look for id 0 + val (inserts, updates) = tableRecords.partition(_.recordVersion == 0) + + // perform the inserts, then perform the updates + val insertResult = dataAccess.jsonEntitySlickQuery ++= inserts + + val updateActions = updates.map { upd => + dataAccess.jsonEntityQuery.updateEntity(workspaceId, upd.toEntity, upd.recordVersion) map { + updatedCount => + if (updatedCount == 0) { + throw new RuntimeException("Update failed. Concurrent modifications?") + } + } } + + insertResult.flatMap(_ => slick.dbio.DBIO.sequence(updateActions)) } - DBIO.sequence(queries) } - .map(_ => Seq()) // TODO AJ-2008: return entities, not nothing + .map(_ => Seq()) // TODO AJ-2008: return entities, not nothing. What does the current impl return? } // end trace } @@ -247,29 +286,29 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, // short-circuit if (refs.isEmpty) { Future.successful(Map()) - } - - // validate all refs - val allRefs: Set[AttributeEntityReference] = refs.values.flatten.toSet - - dataSource.inTransaction { dataAccess => - dataAccess.jsonEntityQuery.validateRefs(workspaceId, allRefs) map { foundRefs => - if (foundRefs.size != allRefs.size) { - throw new RuntimeException("Did not find all references") - } - // convert the foundRefs to a map for easier lookup - val foundMap: Map[(String, String), JsonEntityRefRecord] = foundRefs.map { foundRef => - ((foundRef.entityType, foundRef.name), foundRef) - }.toMap - - // return all the references found in this entity, mapped to the ids they are referencing - refs.map { case (name: AttributeName, refs: Seq[AttributeEntityReference]) => - val refRecords: Seq[JsonEntityRefRecord] = refs.map(ref => - foundMap.getOrElse((ref.entityType, ref.entityName), - throw new RuntimeException("unexpected; couldn't find ref") + } else { + // validate all refs + val allRefs: Set[AttributeEntityReference] = refs.values.flatten.toSet + + dataSource.inTransaction { dataAccess => + dataAccess.jsonEntityQuery.getEntities(workspaceId, allRefs) map { foundRefs => + if (foundRefs.size != allRefs.size) { + throw new RuntimeException("Did not find all references") + } + // convert the foundRefs to a map for easier lookup + val foundMap: Map[(String, String), JsonEntityRefRecord] = foundRefs.map { foundRef => + ((foundRef.entityType, foundRef.name), JsonEntityRefRecord(foundRef.id, foundRef.name, foundRef.entityType)) + }.toMap + + // return all the references found in this entity, mapped to the ids they are referencing + refs.map { case (name: AttributeName, refs: Seq[AttributeEntityReference]) => + val refRecords: Seq[JsonEntityRefRecord] = refs.map(ref => + foundMap.getOrElse((ref.entityType, ref.entityName), + throw new RuntimeException("unexpected; couldn't find ref") + ) ) - ) - (name, refRecords) + (name, refRecords) + } } } } From 6727cb3b38efe3737176856a2b79294b7c2d5bdd Mon Sep 17 00:00:00 2001 From: David An Date: Tue, 17 Sep 2024 21:23:34 -0400 Subject: [PATCH 14/24] debug logging --- .../entities/json/JsonEntityProvider.scala | 29 +++++++++++++++---- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index 76166fcd21..9e3bce576e 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -158,7 +158,11 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, ): Future[Iterable[Entity]] = batchUpdateEntitiesImpl(entityUpdates, upsert = true) def batchUpdateEntitiesImpl(entityUpdates: Seq[EntityUpdateDefinition], upsert: Boolean): Future[Iterable[Entity]] = { - val batchSize = 500 // arbitrary; choose a good value here; perhaps even adapt the value to the size of incoming + + val numUpdates = entityUpdates.size + val numOperations = entityUpdates.flatMap(_.operations).size + + logger.info(s"***** batchUpdateEntitiesImpl processing $numUpdates updates with $numOperations operations") // find all attribute names mentioned val namesToCheck = for { @@ -190,6 +194,8 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, val allMentionedEntities: Set[AttributeEntityReference] = entityUpdates.map(eu => AttributeEntityReference(eu.entityType, eu.name)).toSet + logger.info(s"***** the $numUpdates updates target ${allMentionedEntities.size} distinct entities.") + // retrieve all of ${allMentionedEntities} in one query and validate existence if these are not upserts dataAccess.jsonEntityQuery.getEntities(workspaceId, allMentionedEntities) flatMap { existingEntities => if (!upsert && existingEntities.size != allMentionedEntities.size) { @@ -198,6 +204,10 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, ) } + logger.info( + s"***** of the ${allMentionedEntities.size} distinct entities being updated, ${existingEntities.size} already exist." + ) + // build map of (entityType, name) -> JsonEntityRecord for efficient lookup val existingEntityMap: Map[(String, String), JsonEntityRecord] = existingEntities.map(rec => (rec.entityType, rec.name) -> rec).toMap @@ -240,8 +250,10 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, } // separate the records-to-be-saved into inserts and updates - // we identify inserts as those having recordVersion 0; we could also look for id 0 - val (inserts, updates) = tableRecords.partition(_.recordVersion == 0) + // we identify inserts as those having id 0 + val (inserts, updates) = tableRecords.partition(_.id == 0) + + logger.info(s"***** all updates have been prepared: ${inserts.size} inserts, ${updates.size} updates.") // perform the inserts, then perform the updates val insertResult = dataAccess.jsonEntitySlickQuery ++= inserts @@ -255,10 +267,17 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, } } - insertResult.flatMap(_ => slick.dbio.DBIO.sequence(updateActions)) + logger.info(s"***** performing inserts ...") + insertResult.flatMap { _ => + logger.info(s"***** performing updates ...") + slick.dbio.DBIO.sequence(updateActions) + } } } - .map(_ => Seq()) // TODO AJ-2008: return entities, not nothing. What does the current impl return? + .map { _ => + logger.info(s"***** all inserts and updates completed.") + Seq() + } // TODO AJ-2008: return entities, not nothing. What does the current impl return? } // end trace } From 36ab8c0e44f514cca05d2788977d6ebf88333fe7 Mon Sep 17 00:00:00 2001 From: David An Date: Wed, 18 Sep 2024 14:37:57 -0400 Subject: [PATCH 15/24] two steps forward, one step back --- .../20240910_entity_json_support.xml | 14 ++-- .../slick/JsonEntityComponent.scala | 77 ++++++++++++----- .../entities/json/JsonEntityProvider.scala | 83 ++++++++++++++++++- 3 files changed, 142 insertions(+), 32 deletions(-) diff --git a/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml b/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml index affc23f1ec..d344e808a3 100644 --- a/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml +++ b/core/src/main/resources/org/broadinstitute/dsde/rawls/liquibase/changesets/20240910_entity_json_support.xml @@ -85,16 +85,18 @@ - + - + + + + + + + diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala index cf9da3e800..9b9767cbf7 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala @@ -5,7 +5,6 @@ import org.broadinstitute.dsde.rawls.model.Attributable.AttributeMap import org.broadinstitute.dsde.rawls.model.WorkspaceJsonSupport._ import org.broadinstitute.dsde.rawls.model._ import slick.jdbc._ -import slick.lifted.ProvenShape import spray.json.DefaultJsonProtocol._ import spray.json._ @@ -13,6 +12,9 @@ import java.sql.Timestamp import java.util.UUID import scala.language.postfixOps +/** + * model class for rows in the ENTITY table, used for high-level Slick operations + */ case class JsonEntitySlickRecord(id: Long, name: String, entityType: String, @@ -27,7 +29,7 @@ case class JsonEntitySlickRecord(id: Long, } /** - * model class for rows in the ENTITY table + * model class for rows in the ENTITY table, used for low-level raw SQL operations */ // TODO AJ-2008: handle the all_attribute_values column // TODO AJ-2008: probably don't need deletedDate here @@ -54,8 +56,16 @@ case class JsonEntityRecord(id: Long, ) } +/** + * abbreviated model for rows in the ENTITY table when we don't need all the columns + */ case class JsonEntityRefRecord(id: Long, name: String, entityType: String) +/** + * model class for rows in the ENTITY_REFS table + */ +case class RefPointerRecord(fromId: Long, toId: Long) + /** * companion object for constants, etc. */ @@ -75,6 +85,7 @@ trait JsonEntityComponent { // json codec for entity attributes implicit val attributeFormat: AttributeFormat = new AttributeFormat with PlainArrayAttributeListSerializer + /** high-level Slick table for ENTITY */ class JsonEntityTable(tag: Tag) extends Table[JsonEntitySlickRecord](tag, "ENTITY") { def id = column[Long]("id", O.PrimaryKey, O.AutoInc) def name = column[String]("name", O.Length(254)) @@ -94,11 +105,22 @@ trait JsonEntityComponent { ) } + /** high-level Slick table for ENTITY_REFS */ + class JsonEntityRefTable(tag: Tag) extends Table[RefPointerRecord](tag, "ENTITY_REFS") { + def fromId = column[Long]("from_id") + def toId = column[Long]("to_id") + + def * = + (fromId, toId) <> (RefPointerRecord.tupled, RefPointerRecord.unapply) + } + + /** high-level Slick query object for ENTITY */ + object jsonEntityRefSlickQuery extends TableQuery(new JsonEntityRefTable(_)) {} + + /** high-level Slick query object for ENTITY_REFS */ object jsonEntitySlickQuery extends TableQuery(new JsonEntityTable(_)) {} - /** - * SQL queries for working with the ENTITY table - */ + /** low-level raw SQL queries for ENTITY */ object jsonEntityQuery extends RawSqlQuery { val driver: JdbcProfile = JsonEntityComponent.this.driver @@ -261,23 +283,34 @@ trait JsonEntityComponent { unionQuery.as[JsonEntityRecord](getJsonEntityRecord) } - def replaceReferences(fromId: Long, refs: Map[AttributeName, Seq[JsonEntityRefRecord]]): ReadWriteAction[Int] = - // TODO AJ-2008: instead of delete and insert all, find the intersections and only delete/insert where needed? - // alternately, do insert ... on conflict do nothing? - sqlu"delete from ENTITY_REFS where from_id = $fromId" flatMap { _ => - // reduce the references to a set to remove any duplicates - val toIds: Set[Long] = refs.values.flatten.map(_.id).toSet - // short-circuit - if (toIds.isEmpty) { - DBIO.successful(0) - } else { - // generate bulk-insert SQL - val insertValues: Seq[SQLActionBuilder] = toIds.toSeq.map(toId => sql"($fromId, $toId)") - - val allInsertValues: SQLActionBuilder = reduceSqlActionsWithDelim(insertValues, sql",") - - concatSqlActions(sql"insert into ENTITY_REFS(from_id, to_id) values ", allInsertValues).asUpdate - } +// def replaceReferences(fromId: Long, refs: Map[AttributeName, Seq[JsonEntityRefRecord]]): ReadWriteAction[Int] = +// // TODO AJ-2008: instead of delete and insert all, find the intersections and only delete/insert where needed? +// // alternately, do insert ... on conflict do nothing? +// sqlu"delete from ENTITY_REFS where from_id = $fromId" flatMap { _ => +// // reduce the references to a set to remove any duplicates +// val toIds: Set[Long] = refs.values.flatten.map(_.id).toSet +// // short-circuit +// if (toIds.isEmpty) { +// DBIO.successful(0) +// } else { +// // generate bulk-insert SQL +// val insertValues: Seq[SQLActionBuilder] = toIds.toSeq.map(toId => sql"($fromId, $toId)") +// +// val allInsertValues: SQLActionBuilder = reduceSqlActionsWithDelim(insertValues, sql",") +// +// concatSqlActions(sql"insert into ENTITY_REFS(from_id, to_id) values ", allInsertValues).asUpdate +// } +// } + + def bulkInsertReferences(fromId: Long, toIds: Set[Long]): ReadWriteAction[Int] = + // short-circuit + if (toIds.isEmpty) { + DBIO.successful(0) + } else { + // generate bulk-insert SQL + val insertValues: Seq[SQLActionBuilder] = toIds.toSeq.map(toId => sql"($fromId, $toId)") + val allInsertValues: SQLActionBuilder = reduceSqlActionsWithDelim(insertValues, sql",") + concatSqlActions(sql"insert into ENTITY_REFS(from_id, to_id) values ", allInsertValues).asUpdate } } diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index 9e3bce576e..86000715ec 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -33,7 +33,8 @@ import org.broadinstitute.dsde.rawls.dataaccess.slick.{ JsonEntityRecord, JsonEntityRefRecord, JsonEntitySlickRecord, - ReadAction + ReadAction, + RefPointerRecord } import org.broadinstitute.dsde.rawls.entities.exceptions.DataEntityException import org.broadinstitute.dsde.rawls.model.AttributeUpdateOperations.EntityUpdateDefinition @@ -41,6 +42,7 @@ import org.broadinstitute.dsde.rawls.model.WorkspaceJsonSupport._ import org.broadinstitute.dsde.rawls.util.AttributeSupport import org.broadinstitute.dsde.rawls.util.TracingUtils.{setTraceSpanAttribute, traceFutureWithParent} import slick.dbio.DBIO +import slick.jdbc.TransactionIsolation import java.time.Duration import java.util.UUID @@ -74,12 +76,18 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, // save the entity _ <- dataAccess.jsonEntityQuery.createEntity(workspaceId, entity) - // did it save correctly? + // did it save correctly? get its id, we need that id. savedEntityRecordOption <- dataAccess.jsonEntityQuery.getEntity(workspaceId, entity.entityType, entity.name) savedEntityRecord = savedEntityRecordOption.getOrElse(throw new RuntimeException("Could not save entity")) // save all references from this entity to other entities - _ <- dataAccess.jsonEntityQuery.replaceReferences(savedEntityRecord.id, referenceTargets) + _ <- DBIO.from(replaceReferences(savedEntityRecord.id, referenceTargets)) } yield savedEntityRecord.toEntity + // } yield (savedEntityRecord, referenceTargets) + // } flatMap { case (savedEntityRecord, referenceTargets) => + // // something in the transaction above causes replaceReferences to deadlock. For now do it in a separate + // // transaction, but that's wrong + // replaceReferences(savedEntityRecord.id, referenceTargets) map { _ => savedEntityRecord.toEntity } + // } } } @@ -258,6 +266,10 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, // perform the inserts, then perform the updates val insertResult = dataAccess.jsonEntitySlickQuery ++= inserts + val insertRefFutures: Seq[Future[_]] = inserts.map { ins => + synchronizeReferences(ins.id, ins.toEntity) + } + val updateActions = updates.map { upd => dataAccess.jsonEntityQuery.updateEntity(workspaceId, upd.toEntity, upd.recordVersion) map { updatedCount => @@ -267,17 +279,31 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, } } + val updateRefFutures: Seq[Future[_]] = updates.map { upd => + synchronizeReferences(upd.id, upd.toEntity) + } + logger.info(s"***** performing inserts ...") insertResult.flatMap { _ => +// logger.info(s"***** adding references for inserts ...") +// slick.dbio.DBIO.from(Future.sequence(insertRefFutures)) flatMap { _ => logger.info(s"***** performing updates ...") slick.dbio.DBIO.sequence(updateActions) +// flatMap { _ => +// logger.info(s"***** adding references for updates ...") +// slick.dbio.DBIO.from(Future.sequence(updateRefFutures)) +// } +// } } } } .map { _ => logger.info(s"***** all inserts and updates completed.") + // returns nothing. EntityApiService explicitly returns a 204 with no response body; so we don't bother + // returning anything at all from here. + // TODO AJ-2008: does this have any compatibility issues elsewhere? LocalEntityProvider does return entities. Seq() - } // TODO AJ-2008: return entities, not nothing. What does the current impl return? + } } // end trace } @@ -310,6 +336,7 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, val allRefs: Set[AttributeEntityReference] = refs.values.flatten.toSet dataSource.inTransaction { dataAccess => + // TODO AJ-2008: this should only return ids; it doesn't need to return everything about the entities dataAccess.jsonEntityQuery.getEntities(workspaceId, allRefs) map { foundRefs => if (foundRefs.size != allRefs.size) { throw new RuntimeException("Did not find all references") @@ -343,4 +370,52 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, .flatten .toSeq .groupMap(_._1)(_._2) + + private def replaceReferences(fromId: Long, foundRefs: Map[AttributeName, Seq[JsonEntityRefRecord]]) = + dataSource.inTransaction { dataAccess => + import dataAccess.driver.api._ + // we don't actually care about the referencing attribute name or referenced type&name; reduce to just the referenced ids. + val currentEntityRefTargets: Set[Long] = foundRefs.values.flatten.map(_.id).toSet + logger.trace(s"~~~~~ found ${currentEntityRefTargets.size} ref targets in entity") + for { + // retrieve all existing refs in ENTITY_REFS for this entity; create a set of the target ids + existingRowsSeq <- dataAccess.jsonEntityRefSlickQuery.filter(_.fromId === fromId).map(_.toId).result + existingRefTargets = existingRowsSeq.toSet + + _ = logger.trace(s"~~~~~ found ${existingRefTargets.size} ref targets in db for this entity") + // find all target ids in the db that are not in the current entity + deletes = existingRefTargets diff currentEntityRefTargets + // find all target ids in the current entity that are not in the db + inserts = currentEntityRefTargets diff existingRefTargets + insertPairs = inserts.map(toId => (fromId, toId)) + insertRecords = inserts.map(toId => RefPointerRecord(fromId, toId)) + _ = logger.trace(s"~~~~~ prepared ${inserts.size} inserts and ${deletes.size} deletes to perform") + _ = logger.trace(s"~~~~~ inserts: $insertPairs") + // insert what needs to be inserted + insertResult <- + if (inserts.nonEmpty) { dataAccess.jsonEntityRefSlickQuery.map(r => (r.fromId, r.toId)) ++= insertPairs } + else { slick.dbio.DBIO.successful(0) } +// insertResult <- dataAccess.jsonEntityQuery.bulkInsertReferences(fromId, inserts) + _ = logger.trace(s"~~~~~ actually inserted ${insertResult} rows") + // delete what needs to be deleted + deleteResult <- + if (deletes.nonEmpty) { + dataAccess.jsonEntityRefSlickQuery + .filter(x => x.fromId === fromId && x.toId.inSetBind(deletes)) + .delete + } else { slick.dbio.DBIO.successful(0) } + _ = logger.trace(s"~~~~~ actually deleted ${deleteResult} rows") + } yield foundRefs + } + + private def synchronizeReferences(fromId: Long, + entity: Entity + ): Future[Map[AttributeName, Seq[JsonEntityRefRecord]]] = dataSource.inTransaction { _ => + for { + // find and validate all references in this entity. This returns the target internal ids for each reference. + foundRefs <- DBIO.from(validateReferences(entity)) + // + _ <- DBIO.from(replaceReferences(fromId, foundRefs)) + } yield foundRefs + } } From d2297414e2787d64dd3c8ac4c9b2ae4efe6e0790 Mon Sep 17 00:00:00 2001 From: David An Date: Tue, 24 Sep 2024 16:58:54 -0400 Subject: [PATCH 16/24] more batchUpsert work --- .../slick/JsonEntityComponent.scala | 107 ++++++++----- .../entities/json/JsonEntityProvider.scala | 145 ++++++++++++------ 2 files changed, 167 insertions(+), 85 deletions(-) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala index 9b9767cbf7..b5d3f54bab 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala @@ -1,5 +1,6 @@ package org.broadinstitute.dsde.rawls.dataaccess.slick +import com.typesafe.scalalogging.LazyLogging import org.broadinstitute.dsde.rawls.RawlsException import org.broadinstitute.dsde.rawls.model.Attributable.AttributeMap import org.broadinstitute.dsde.rawls.model.WorkspaceJsonSupport._ @@ -77,7 +78,7 @@ object JsonEntityComponent { /** * Slick component for reading/writing JSON-based entities */ -trait JsonEntityComponent { +trait JsonEntityComponent extends LazyLogging { this: DriverComponent => import slick.jdbc.MySQLProfile.api._ @@ -255,52 +256,76 @@ trait JsonEntityComponent { sql"where workspace_id = $workspaceId and entity_type = $entityType and deleted = 0" /** Given a set of entity references, retrieve those entities */ - def getEntities(workspaceId: UUID, refs: Set[AttributeEntityReference]): ReadAction[Seq[JsonEntityRecord]] = { - // group the entity type/name pairs by type - val groupedReferences: Map[String, Set[String]] = refs.groupMap(_.entityType)(_.entityName) - - // build select statements for each type - val queryParts: Iterable[SQLActionBuilder] = groupedReferences.map { - case (entityType: String, entityNames: Set[String]) => - // build the "IN" clause values - val entityNamesSql = reduceSqlActionsWithDelim(entityNames.map(name => sql"$name").toSeq, sql",") - - // TODO AJ-2008: check query plan for this and make sure it is properly using indexes - // TODO AJ-2008: include `where deleted=0`? Make that an argument? - concatSqlActions( - sql"""select id, name, entity_type, workspace_id, record_version, deleted, deleted_date, attributes + def getEntities(workspaceId: UUID, refs: Set[AttributeEntityReference]): ReadAction[Seq[JsonEntityRecord]] = + // short-circuit + if (refs.isEmpty) { + DBIO.successful(Seq.empty[JsonEntityRecord]) + } else { + // group the entity type/name pairs by type + val groupedReferences: Map[String, Set[String]] = refs.groupMap(_.entityType)(_.entityName) + + // build select statements for each type + val queryParts: Iterable[SQLActionBuilder] = groupedReferences.map { + case (entityType: String, entityNames: Set[String]) => + // build the "IN" clause values + val entityNamesSql = reduceSqlActionsWithDelim(entityNames.map(name => sql"$name").toSeq, sql",") + + // TODO AJ-2008: check query plan for this and make sure it is properly using indexes + // UNION query does use indexes for each select; but it also requires a temporary table to + // combine the results, and we can probably do better. `where (entity_type, name) in ((?, ?), (?, ?)) + // looks like it works well + // TODO AJ-2008: include `where deleted=0`? Make that an argument? + concatSqlActions( + sql"""select id, name, entity_type, workspace_id, record_version, deleted, deleted_date, attributes from ENTITY where workspace_id = $workspaceId and entity_type = $entityType and name in (""", - entityNamesSql, - sql")" - ) + entityNamesSql, + sql")" + ) + } + + // union the select statements together + val unionQuery = reduceSqlActionsWithDelim(queryParts.toSeq, sql" union ") + + // execute + unionQuery.as[JsonEntityRecord](getJsonEntityRecord) } - // union the select statements together - val unionQuery = reduceSqlActionsWithDelim(queryParts.toSeq, sql" union ") + /** Given a set of entity references, retrieve those entities */ + def getEntityRefs(workspaceId: UUID, refs: Set[AttributeEntityReference]): ReadAction[Seq[JsonEntityRefRecord]] = + // short-circuit + if (refs.isEmpty) { + DBIO.successful(Seq.empty[JsonEntityRefRecord]) + } else { + // group the entity type/name pairs by type + val groupedReferences: Map[String, Set[String]] = refs.groupMap(_.entityType)(_.entityName) + + // build select statements for each type + val queryParts: Iterable[SQLActionBuilder] = groupedReferences.map { + case (entityType: String, entityNames: Set[String]) => + // build the "IN" clause values + val entityNamesSql = reduceSqlActionsWithDelim(entityNames.map(name => sql"$name").toSeq, sql",") + + // TODO AJ-2008: check query plan for this and make sure it is properly using indexes + // UNION query does use indexes for each select; but it also requires a temporary table to + // combine the results, and we can probably do better. `where (entity_type, name) in ((?, ?), (?, ?)) + // looks like it works well + // TODO AJ-2008: include `where deleted=0`? Make that an argument? + concatSqlActions( + sql"""select id, name, entity_type + from ENTITY where workspace_id = $workspaceId and entity_type = $entityType + and name in (""", + entityNamesSql, + sql")" + ) + } - // execute - unionQuery.as[JsonEntityRecord](getJsonEntityRecord) - } + // union the select statements together + val unionQuery = reduceSqlActionsWithDelim(queryParts.toSeq, sql" union ") -// def replaceReferences(fromId: Long, refs: Map[AttributeName, Seq[JsonEntityRefRecord]]): ReadWriteAction[Int] = -// // TODO AJ-2008: instead of delete and insert all, find the intersections and only delete/insert where needed? -// // alternately, do insert ... on conflict do nothing? -// sqlu"delete from ENTITY_REFS where from_id = $fromId" flatMap { _ => -// // reduce the references to a set to remove any duplicates -// val toIds: Set[Long] = refs.values.flatten.map(_.id).toSet -// // short-circuit -// if (toIds.isEmpty) { -// DBIO.successful(0) -// } else { -// // generate bulk-insert SQL -// val insertValues: Seq[SQLActionBuilder] = toIds.toSeq.map(toId => sql"($fromId, $toId)") -// -// val allInsertValues: SQLActionBuilder = reduceSqlActionsWithDelim(insertValues, sql",") -// -// concatSqlActions(sql"insert into ENTITY_REFS(from_id, to_id) values ", allInsertValues).asUpdate -// } -// } + // execute + unionQuery.as[JsonEntityRefRecord](getJsonEntityRefRecord) + } def bulkInsertReferences(fromId: Long, toIds: Set[Long]): ReadWriteAction[Int] = // short-circuit diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index 86000715ec..96eefc87eb 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -40,7 +40,11 @@ import org.broadinstitute.dsde.rawls.entities.exceptions.DataEntityException import org.broadinstitute.dsde.rawls.model.AttributeUpdateOperations.EntityUpdateDefinition import org.broadinstitute.dsde.rawls.model.WorkspaceJsonSupport._ import org.broadinstitute.dsde.rawls.util.AttributeSupport -import org.broadinstitute.dsde.rawls.util.TracingUtils.{setTraceSpanAttribute, traceFutureWithParent} +import org.broadinstitute.dsde.rawls.util.TracingUtils.{ + setTraceSpanAttribute, + traceDBIOWithParent, + traceFutureWithParent +} import slick.dbio.DBIO import slick.jdbc.TransactionIsolation @@ -221,7 +225,7 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, existingEntities.map(rec => (rec.entityType, rec.name) -> rec).toMap // iterate through the desired updates and apply them - val tableRecords: Seq[JsonEntitySlickRecord] = entityUpdates.map { entityUpdate => + val tableRecords: Seq[Option[JsonEntitySlickRecord]] = entityUpdates.map { entityUpdate => // attempt to retrieve an existing entity val existingRecordOption = existingEntityMap.get((entityUpdate.entityType, entityUpdate.name)) @@ -240,35 +244,49 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, // TODO AJ-2008: collect all the apply errors instead of handling them one-by-one? val updatedEntity: Entity = applyOperationsToEntity(baseEntity, entityUpdate.operations) - // TODO AJ-2008: handle references - - // translate back to a JsonEntitySlickRecord for later insert/update - // TODO AJ-2008: so far we retrieved a JsonEntityRecord, translated it to an Entity, and are now - // translating it to JsonEntitySlickRecord; we could do better - JsonEntitySlickRecord( - id = existingRecordOption.map(_.id).getOrElse(0), - name = updatedEntity.name, - entityType = updatedEntity.entityType, - workspaceId = workspaceId, - recordVersion = existingRecordOption.map(_.recordVersion).getOrElse(0), - deleted = false, - deletedDate = None, - attributes = Some(updatedEntity.attributes.toJson.compactPrint) - ) + // TODO AJ-2008: if the entity hasn't changed, skip it + if (existingRecordOption.nonEmpty && baseEntity.attributes == updatedEntity.attributes) { + Option.empty[JsonEntitySlickRecord] + } else { + // TODO AJ-2008: handle references + // translate back to a JsonEntitySlickRecord for later insert/update + // TODO AJ-2008: so far we retrieved a JsonEntityRecord, translated it to an Entity, and are now + // translating it to JsonEntitySlickRecord; we could do better + Some( + JsonEntitySlickRecord( + id = existingRecordOption.map(_.id).getOrElse(0), + name = updatedEntity.name, + entityType = updatedEntity.entityType, + workspaceId = workspaceId, + recordVersion = existingRecordOption.map(_.recordVersion).getOrElse(0), + deleted = false, + deletedDate = None, + attributes = Some(updatedEntity.attributes.toJson.compactPrint) + ) + ) + } } + // for logging purposes, count the noops + val noopCount = tableRecords.count(_.isEmpty) + // separate the records-to-be-saved into inserts and updates // we identify inserts as those having id 0 - val (inserts, updates) = tableRecords.partition(_.id == 0) + val (inserts, updates) = tableRecords.flatten.partition(_.id == 0) - logger.info(s"***** all updates have been prepared: ${inserts.size} inserts, ${updates.size} updates.") + logger.info( + s"***** all updates have been prepared: ${inserts.size} inserts, ${updates.size} updates, ${noopCount} noop updates." + ) // perform the inserts, then perform the updates + + // do NOT use the "returning" syntax above, as it forces individual insert statements for each entity. + // instead, we insert using non-returning syntax, then perform a second query to get the ids val insertResult = dataAccess.jsonEntitySlickQuery ++= inserts - val insertRefFutures: Seq[Future[_]] = inserts.map { ins => - synchronizeReferences(ins.id, ins.toEntity) - } +// val insertRefFutures: Seq[Future[_]] = inserts.map { ins => +// synchronizeReferences(ins.id, ins.toEntity) +// } val updateActions = updates.map { upd => dataAccess.jsonEntityQuery.updateEntity(workspaceId, upd.toEntity, upd.recordVersion) map { @@ -283,17 +301,43 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, synchronizeReferences(upd.id, upd.toEntity) } + // TODO AJ-2008: can we bulk/batch the ENTITY_REFS work? logger.info(s"***** performing inserts ...") insertResult.flatMap { _ => -// logger.info(s"***** adding references for inserts ...") -// slick.dbio.DBIO.from(Future.sequence(insertRefFutures)) flatMap { _ => - logger.info(s"***** performing updates ...") - slick.dbio.DBIO.sequence(updateActions) -// flatMap { _ => -// logger.info(s"***** adding references for updates ...") -// slick.dbio.DBIO.from(Future.sequence(updateRefFutures)) -// } -// } + // skip any inserts that have zero references + val insertsWithReferences = + inserts.flatMap(ins => + if (findAllReferences(ins.toEntity).isEmpty) { None } + else { Some(ins) } + ) + logger.info(s"***** adding references for ${insertsWithReferences.size} inserts ...") + + // retrieve the ids for the inserts that do have references + dataAccess.jsonEntityQuery.getEntityRefs( + workspaceId, + insertsWithReferences.map(x => AttributeEntityReference(x.entityType, x.name)).toSet + ) flatMap { inserted => + // map the inserted ids back to the full entities that were inserted + val insertedIds = inserted.map(x => (x.entityType, x.name) -> x.id).toMap + slick.dbio.DBIO.sequence( + insertsWithReferences + .map { ins => + val id = insertedIds.getOrElse((ins.entityType, ins.name), + throw new RuntimeException("couldn't find inserted id") + ) + slick.dbio.DBIO.from(synchronizeReferences(id, ins.toEntity, isInsert = true)) + } + ) flatMap { _ => + logger.info(s"***** performing updates ...") + slick.dbio.DBIO.sequence(updateActions) flatMap { _ => + logger.info(s"***** adding references for updates ...") + slick.dbio.DBIO.sequence(updateRefFutures.map(x => slick.dbio.DBIO.from(x))) flatMap { _ => + logger.info(s"***** all writes complete.") + slick.dbio.DBIO.successful(()) + } + } + } + } } } } @@ -336,14 +380,13 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, val allRefs: Set[AttributeEntityReference] = refs.values.flatten.toSet dataSource.inTransaction { dataAccess => - // TODO AJ-2008: this should only return ids; it doesn't need to return everything about the entities - dataAccess.jsonEntityQuery.getEntities(workspaceId, allRefs) map { foundRefs => + dataAccess.jsonEntityQuery.getEntityRefs(workspaceId, allRefs) map { foundRefs => if (foundRefs.size != allRefs.size) { throw new RuntimeException("Did not find all references") } // convert the foundRefs to a map for easier lookup val foundMap: Map[(String, String), JsonEntityRefRecord] = foundRefs.map { foundRef => - ((foundRef.entityType, foundRef.name), JsonEntityRefRecord(foundRef.id, foundRef.name, foundRef.entityType)) + ((foundRef.entityType, foundRef.name), foundRef) }.toMap // return all the references found in this entity, mapped to the ids they are referencing @@ -371,32 +414,45 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, .toSeq .groupMap(_._1)(_._2) - private def replaceReferences(fromId: Long, foundRefs: Map[AttributeName, Seq[JsonEntityRefRecord]]) = + private def replaceReferences(fromId: Long, + foundRefs: Map[AttributeName, Seq[JsonEntityRefRecord]], + isInsert: Boolean = false + ) = dataSource.inTransaction { dataAccess => import dataAccess.driver.api._ // we don't actually care about the referencing attribute name or referenced type&name; reduce to just the referenced ids. val currentEntityRefTargets: Set[Long] = foundRefs.values.flatten.map(_.id).toSet - logger.trace(s"~~~~~ found ${currentEntityRefTargets.size} ref targets in entity") + logger.trace(s"~~~~~ found ${currentEntityRefTargets.size} ref targets in entity $fromId") for { + // TODO AJ-2008: instead of (retrieve all, then calculate diffs, then execute diffs), try doing it all in the db: + // - delete from ENTITY_REFS where from_id = $fromId and to_id not in ($currentEntityRefTargets) + // - insert into ENTITY_REFS (from_id, to_id) values ($fromId, $currentEntityRefTargets:_*) on duplicate key update from_id=from_id (noop) // retrieve all existing refs in ENTITY_REFS for this entity; create a set of the target ids - existingRowsSeq <- dataAccess.jsonEntityRefSlickQuery.filter(_.fromId === fromId).map(_.toId).result + existingRowsSeq <- + if (isInsert) { + slick.dbio.DBIO.successful(Seq.empty[Long]) + } else { + dataAccess.jsonEntityRefSlickQuery.filter(_.fromId === fromId).map(_.toId).result + } existingRefTargets = existingRowsSeq.toSet - _ = logger.trace(s"~~~~~ found ${existingRefTargets.size} ref targets in db for this entity") + _ = logger.trace(s"~~~~~ found ${existingRefTargets.size} ref targets in db for entity $fromId") // find all target ids in the db that are not in the current entity deletes = existingRefTargets diff currentEntityRefTargets // find all target ids in the current entity that are not in the db inserts = currentEntityRefTargets diff existingRefTargets insertPairs = inserts.map(toId => (fromId, toId)) insertRecords = inserts.map(toId => RefPointerRecord(fromId, toId)) - _ = logger.trace(s"~~~~~ prepared ${inserts.size} inserts and ${deletes.size} deletes to perform") - _ = logger.trace(s"~~~~~ inserts: $insertPairs") + _ = logger.trace( + s"~~~~~ prepared ${inserts.size} inserts and ${deletes.size} deletes to perform for entity $fromId" + ) + _ = logger.trace(s"~~~~~ inserts: $insertPairs for entity $fromId") // insert what needs to be inserted insertResult <- if (inserts.nonEmpty) { dataAccess.jsonEntityRefSlickQuery.map(r => (r.fromId, r.toId)) ++= insertPairs } else { slick.dbio.DBIO.successful(0) } // insertResult <- dataAccess.jsonEntityQuery.bulkInsertReferences(fromId, inserts) - _ = logger.trace(s"~~~~~ actually inserted ${insertResult} rows") + _ = logger.trace(s"~~~~~ actually inserted ${insertResult} rows for entity $fromId") // delete what needs to be deleted deleteResult <- if (deletes.nonEmpty) { @@ -404,18 +460,19 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, .filter(x => x.fromId === fromId && x.toId.inSetBind(deletes)) .delete } else { slick.dbio.DBIO.successful(0) } - _ = logger.trace(s"~~~~~ actually deleted ${deleteResult} rows") + _ = logger.trace(s"~~~~~ actually deleted ${deleteResult} rows for entity $fromId") } yield foundRefs } private def synchronizeReferences(fromId: Long, - entity: Entity + entity: Entity, + isInsert: Boolean = false ): Future[Map[AttributeName, Seq[JsonEntityRefRecord]]] = dataSource.inTransaction { _ => for { // find and validate all references in this entity. This returns the target internal ids for each reference. foundRefs <- DBIO.from(validateReferences(entity)) // - _ <- DBIO.from(replaceReferences(fromId, foundRefs)) + _ <- DBIO.from(replaceReferences(fromId, foundRefs, isInsert)) } yield foundRefs } } From cfb6f86e2d64fcd4e28ae48ed15d898139a399e5 Mon Sep 17 00:00:00 2001 From: David An Date: Wed, 25 Sep 2024 09:22:37 -0400 Subject: [PATCH 17/24] createEntity is an insert --- .../dsde/rawls/entities/json/JsonEntityProvider.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index 96eefc87eb..7e4a9ea850 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -81,10 +81,11 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, // save the entity _ <- dataAccess.jsonEntityQuery.createEntity(workspaceId, entity) // did it save correctly? get its id, we need that id. + // TODO AJ-2008: return just the id; we don't need the whole record savedEntityRecordOption <- dataAccess.jsonEntityQuery.getEntity(workspaceId, entity.entityType, entity.name) savedEntityRecord = savedEntityRecordOption.getOrElse(throw new RuntimeException("Could not save entity")) // save all references from this entity to other entities - _ <- DBIO.from(replaceReferences(savedEntityRecord.id, referenceTargets)) + _ <- DBIO.from(replaceReferences(savedEntityRecord.id, referenceTargets, isInsert = true)) } yield savedEntityRecord.toEntity // } yield (savedEntityRecord, referenceTargets) // } flatMap { case (savedEntityRecord, referenceTargets) => From 26b4e6f944d405220c194c98ad859f57a4d42351 Mon Sep 17 00:00:00 2001 From: David An Date: Wed, 25 Sep 2024 11:21:07 -0400 Subject: [PATCH 18/24] cleanups --- .../slick/JsonEntityComponent.scala | 19 +---- .../entities/json/JsonEntityProvider.scala | 74 ++++++++++++------- 2 files changed, 53 insertions(+), 40 deletions(-) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala index b5d3f54bab..9fc070b6d0 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala @@ -32,7 +32,7 @@ case class JsonEntitySlickRecord(id: Long, /** * model class for rows in the ENTITY table, used for low-level raw SQL operations */ -// TODO AJ-2008: handle the all_attribute_values column +// TODO AJ-2008: handle the all_attribute_values column? // TODO AJ-2008: probably don't need deletedDate here case class JsonEntityRecord(id: Long, name: String, @@ -97,6 +97,7 @@ trait JsonEntityComponent extends LazyLogging { def deletedDate = column[Option[Timestamp]]("deleted_date") def attributes = column[Option[String]]("attributes") + // TODO AJ-2008: are these useful? // def workspace = foreignKey("FK_ENTITY_WORKSPACE", workspaceId, workspaceQuery)(_.id) // def uniqueTypeName = index("idx_entity_type_name", (workspaceId, entityType, name), unique = true) @@ -190,6 +191,7 @@ trait JsonEntityComponent extends LazyLogging { * The ENTITY_KEYS table is automatically populated via triggers on the ENTITY table; see the db * to understand those triggers. */ + // TODO AJ-2008: assess performance of ENTITY_KEYS.attribute_keys vs JSON_KEYS(ENTITY.attributes) def typesAndAttributes(workspaceId: UUID): ReadAction[Seq[(String, String)]] = sql"""SELECT DISTINCT entity_type, json_key FROM ENTITY_KEYS, JSON_TABLE(attribute_keys, '$$[*]' COLUMNS(json_key VARCHAR(256) PATH '$$')) t @@ -212,9 +214,7 @@ trait JsonEntityComponent extends LazyLogging { // TODO AJ-2008: full-table text search // TODO AJ-2008: filter by column - // TODO AJ-2008: arbitrary sorting // TODO AJ-2008: result projection - // TODO AJ-2008: total/filtered counts val query = concatSqlActions( sql"select id, name, entity_type, workspace_id, record_version, deleted, deleted_date, attributes from ENTITY ", @@ -292,6 +292,7 @@ trait JsonEntityComponent extends LazyLogging { } /** Given a set of entity references, retrieve those entities */ + // TODO AJ-2008: address lots of copy/paste between getEntities and getEntityRefs def getEntityRefs(workspaceId: UUID, refs: Set[AttributeEntityReference]): ReadAction[Seq[JsonEntityRefRecord]] = // short-circuit if (refs.isEmpty) { @@ -326,18 +327,6 @@ trait JsonEntityComponent extends LazyLogging { // execute unionQuery.as[JsonEntityRefRecord](getJsonEntityRefRecord) } - - def bulkInsertReferences(fromId: Long, toIds: Set[Long]): ReadWriteAction[Int] = - // short-circuit - if (toIds.isEmpty) { - DBIO.successful(0) - } else { - // generate bulk-insert SQL - val insertValues: Seq[SQLActionBuilder] = toIds.toSeq.map(toId => sql"($fromId, $toId)") - val allInsertValues: SQLActionBuilder = reduceSqlActionsWithDelim(insertValues, sql",") - concatSqlActions(sql"insert into ENTITY_REFS(from_id, to_id) values ", allInsertValues).asUpdate - } - } private def singleResult[V](results: ReadAction[Seq[V]]): ReadAction[V] = diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index 7e4a9ea850..a59777f55a 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -53,7 +53,11 @@ import java.util.UUID import scala.concurrent.{ExecutionContext, Future} import scala.util.{Failure, Try} -// TODO AJ-2008: tracing +// TODO AJ-2008: +// - tracing +// - mark transactions as read-only where possible (does this actually help?) +// - error-handling +// - logging class JsonEntityProvider(requestArguments: EntityRequestArguments, implicit protected val dataSource: SlickDataSource, cacheEnabled: Boolean, @@ -71,8 +75,7 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, /** * Insert a single entity to the db */ - override def createEntity(entity: Entity): Future[Entity] = { - logger.info(s"creating entity $entity") + override def createEntity(entity: Entity): Future[Entity] = dataSource.inTransaction { dataAccess => for { // find and validate all references in the entity-to-be-saved @@ -80,35 +83,33 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, // save the entity _ <- dataAccess.jsonEntityQuery.createEntity(workspaceId, entity) - // did it save correctly? get its id, we need that id. - // TODO AJ-2008: return just the id; we don't need the whole record + // did it save correctly? re-retrieve it. By re-retrieving it, we can 1) get its id, and 2) get the actual, + // normalized JSON that was persisted to the db. When we return the entity to the user, we return the + // normalized version. savedEntityRecordOption <- dataAccess.jsonEntityQuery.getEntity(workspaceId, entity.entityType, entity.name) savedEntityRecord = savedEntityRecordOption.getOrElse(throw new RuntimeException("Could not save entity")) // save all references from this entity to other entities _ <- DBIO.from(replaceReferences(savedEntityRecord.id, referenceTargets, isInsert = true)) } yield savedEntityRecord.toEntity - // } yield (savedEntityRecord, referenceTargets) - // } flatMap { case (savedEntityRecord, referenceTargets) => - // // something in the transaction above causes replaceReferences to deadlock. For now do it in a separate - // // transaction, but that's wrong - // replaceReferences(savedEntityRecord.id, referenceTargets) map { _ => savedEntityRecord.toEntity } - // } } - } /** * Read a single entity from the db */ - // TODO AJ-2008: mark transaction as read-only override def getEntity(entityType: String, entityName: String): Future[Entity] = dataSource.inTransaction { dataAccess => dataAccess.jsonEntityQuery.getEntity(workspaceId, entityType, entityName) } map { result => result.map(_.toEntity).get } + /** + * Soft-delete specified entities + */ override def deleteEntities(entityRefs: Seq[AttributeEntityReference]): Future[Int] = ??? - // TODO AJ-2008: mark transaction as read-only - // TODO AJ-2008: probably needs caching for the attribute calculations + /** + * Return type/count/attribute metadata + * TODO AJ-2008: assess performance and add caching if necessary + */ override def entityTypeMetadata(useCache: Boolean): Future[Map[String, EntityTypeMetadata]] = dataSource.inTransaction { dataAccess => // get the types and counts @@ -131,6 +132,9 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, } } + /** + * stream a page of entities + */ override def queryEntitiesSource(entityType: String, entityQuery: EntityQuery, parentContext: RawlsRequestContext @@ -140,6 +144,9 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, (queryResponse.resultMetadata, Source.apply(queryResponse.results)) } + /** + * return a page of entities + */ override def queryEntities(entityType: String, entityQuery: EntityQuery, parentContext: RawlsRequestContext @@ -162,14 +169,23 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, } } + /** + * update multiple entities; they must pre-exist + */ override def batchUpdateEntities( entityUpdates: Seq[AttributeUpdateOperations.EntityUpdateDefinition] ): Future[Iterable[Entity]] = batchUpdateEntitiesImpl(entityUpdates, upsert = false) + /** + * upsert multiple entities; will create if they do not pre-exist + */ override def batchUpsertEntities( entityUpdates: Seq[AttributeUpdateOperations.EntityUpdateDefinition] ): Future[Iterable[Entity]] = batchUpdateEntitiesImpl(entityUpdates, upsert = true) + /** + * internal implementation for both batchUpsert and batchUpdate + */ def batchUpdateEntitiesImpl(entityUpdates: Seq[EntityUpdateDefinition], upsert: Boolean): Future[Iterable[Entity]] = { val numUpdates = entityUpdates.size @@ -245,11 +261,10 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, // TODO AJ-2008: collect all the apply errors instead of handling them one-by-one? val updatedEntity: Entity = applyOperationsToEntity(baseEntity, entityUpdate.operations) - // TODO AJ-2008: if the entity hasn't changed, skip it + // if the entity hasn't changed, skip it if (existingRecordOption.nonEmpty && baseEntity.attributes == updatedEntity.attributes) { Option.empty[JsonEntitySlickRecord] } else { - // TODO AJ-2008: handle references // translate back to a JsonEntitySlickRecord for later insert/update // TODO AJ-2008: so far we retrieved a JsonEntityRecord, translated it to an Entity, and are now // translating it to JsonEntitySlickRecord; we could do better @@ -279,16 +294,11 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, s"***** all updates have been prepared: ${inserts.size} inserts, ${updates.size} updates, ${noopCount} noop updates." ) - // perform the inserts, then perform the updates - // do NOT use the "returning" syntax above, as it forces individual insert statements for each entity. // instead, we insert using non-returning syntax, then perform a second query to get the ids val insertResult = dataAccess.jsonEntitySlickQuery ++= inserts -// val insertRefFutures: Seq[Future[_]] = inserts.map { ins => -// synchronizeReferences(ins.id, ins.toEntity) -// } - + // TODO AJ-2008: don't eagerly kick these off; can cause parallelism problems val updateActions = updates.map { upd => dataAccess.jsonEntityQuery.updateEntity(workspaceId, upd.toEntity, upd.recordVersion) map { updatedCount => @@ -298,6 +308,7 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, } } + // TODO AJ-2008: don't eagerly kick these off; can cause parallelism problems val updateRefFutures: Seq[Future[_]] = updates.map { upd => synchronizeReferences(upd.id, upd.toEntity) } @@ -369,6 +380,12 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, override def expressionValidator: ExpressionValidator = ??? + // ==================================================================================================== + // helper methods + // ==================================================================================================== + + // given potential references from an entity, verify that the reference targets all exist, + // and return their ids. private def validateReferences(entity: Entity): Future[Map[AttributeName, Seq[JsonEntityRefRecord]]] = { // find all refs in the entity val refs: Map[AttributeName, Seq[AttributeEntityReference]] = findAllReferences(entity) @@ -415,10 +432,16 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, .toSeq .groupMap(_._1)(_._2) + // given already-validated references, including target ids, update the ENTITY_REFS table for a given source + // entity private def replaceReferences(fromId: Long, foundRefs: Map[AttributeName, Seq[JsonEntityRefRecord]], isInsert: Boolean = false - ) = + ): Future[Map[AttributeName, Seq[JsonEntityRefRecord]]] = { + // short-circuit + if (isInsert && foundRefs.isEmpty) { + return Future.successful(Map()) + } dataSource.inTransaction { dataAccess => import dataAccess.driver.api._ // we don't actually care about the referencing attribute name or referenced type&name; reduce to just the referenced ids. @@ -443,7 +466,6 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, // find all target ids in the current entity that are not in the db inserts = currentEntityRefTargets diff existingRefTargets insertPairs = inserts.map(toId => (fromId, toId)) - insertRecords = inserts.map(toId => RefPointerRecord(fromId, toId)) _ = logger.trace( s"~~~~~ prepared ${inserts.size} inserts and ${deletes.size} deletes to perform for entity $fromId" ) @@ -464,7 +486,9 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, _ = logger.trace(s"~~~~~ actually deleted ${deleteResult} rows for entity $fromId") } yield foundRefs } + } + // helper to call validateReferences followed by replaceReferences private def synchronizeReferences(fromId: Long, entity: Entity, isInsert: Boolean = false From e6c782c74eea345284edfdcec580a8073f05e7e4 Mon Sep 17 00:00:00 2001 From: David An Date: Wed, 25 Sep 2024 13:01:25 -0400 Subject: [PATCH 19/24] delete entities --- .../slick/JsonEntityComponent.scala | 69 ++++++++++++++++++- .../entities/json/JsonEntityProvider.scala | 18 ++++- 2 files changed, 84 insertions(+), 3 deletions(-) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala index 9fc070b6d0..312bc74c69 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala @@ -10,7 +10,7 @@ import spray.json.DefaultJsonProtocol._ import spray.json._ import java.sql.Timestamp -import java.util.UUID +import java.util.{Date, UUID} import scala.language.postfixOps /** @@ -327,6 +327,73 @@ trait JsonEntityComponent extends LazyLogging { // execute unionQuery.as[JsonEntityRefRecord](getJsonEntityRefRecord) } + + /** + * Returns the set of entities which directly reference the supplied targets + */ + def getReferrers(workspaceId: UUID, targets: Set[AttributeEntityReference]) = { + val inFragment = refsInFragment(targets) + + val baseSql = sql"""select referrer.id, referrer.name, referrer.entity_type + from ENTITY referrer, ENTITY_REFS refs, ENTITY target + where target.id = refs.to_id + and referrer.id = refs.from_id + and target.workspace_id = $workspaceId + and (target.entity_type, target.name) in """ + + concatSqlActions(baseSql, inFragment, sql";").as[JsonEntityRefRecord] + } + + /** + * Returns the set of entities which directly AND RECURSIVELY reference the supplied targets + */ + def getRecursiveReferrers(workspaceId: UUID, targets: Set[AttributeEntityReference]) = { + val startSql = + sql"""WITH RECURSIVE ancestor AS ( + select r.from_id, r.to_id + from ENTITY_REFS r, ENTITY e + where e.id = r.to_id + and e.workspace_id = $workspaceId + and (e.entity_type, e.name) in """ + + val inFragment = refsInFragment(targets) + + val endSql = + sql""" UNION ALL + select r.from_id, r.to_id + from ancestor, ENTITY_REFS r + where ancestor.from_id = r.to_id) + select a.from_id, e.entity_type, e.name from ancestor a, ENTITY e + where a.from_id = e.id;""" + + concatSqlActions(startSql, inFragment, endSql).as[JsonEntityRefRecord] + + } + + def softDelete(workspaceId: UUID, targets: Set[AttributeEntityReference]): ReadWriteAction[Int] = { + // short-circuit + if (targets.isEmpty) { + return DBIO.successful(0) + } + val renameSuffix = "_" + getSufficientlyRandomSuffix(1000000000) + val deletedDate = new Timestamp(new Date().getTime) + + val inFragment = refsInFragment(targets) + + val baseSql = sql"""update ENTITY set deleted=1, deleted_date=$deletedDate, name=CONCAT(name, $renameSuffix) + where deleted=0 AND workspace_id=$workspaceId and (entity_type, name) in """ + + concatSqlActions(baseSql, inFragment, sql";").asUpdate + } + + private def refsInFragment(refs: Set[AttributeEntityReference]) = { + // build select statements for each type + val pairs = refs.map { ref => + sql"""(${ref.entityType}, ${ref.entityName})""" + } + concatSqlActions(sql"(", reduceSqlActionsWithDelim(pairs.toSeq, sql","), sql")") + } + } private def singleResult[V](results: ReadAction[Seq[V]]): ReadAction[V] = diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index a59777f55a..7349bcc025 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -27,6 +27,7 @@ import org.broadinstitute.dsde.rawls.model.{ import spray.json._ import DefaultJsonProtocol._ import akka.http.scaladsl.model.StatusCodes +import bio.terra.common.exception.NotImplementedException import io.opentelemetry.api.common.AttributeKey import org.broadinstitute.dsde.rawls.RawlsException import org.broadinstitute.dsde.rawls.dataaccess.slick.{ @@ -36,7 +37,7 @@ import org.broadinstitute.dsde.rawls.dataaccess.slick.{ ReadAction, RefPointerRecord } -import org.broadinstitute.dsde.rawls.entities.exceptions.DataEntityException +import org.broadinstitute.dsde.rawls.entities.exceptions.{DataEntityException, DeleteEntitiesConflictException} import org.broadinstitute.dsde.rawls.model.AttributeUpdateOperations.EntityUpdateDefinition import org.broadinstitute.dsde.rawls.model.WorkspaceJsonSupport._ import org.broadinstitute.dsde.rawls.util.AttributeSupport @@ -104,7 +105,20 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, /** * Soft-delete specified entities */ - override def deleteEntities(entityRefs: Seq[AttributeEntityReference]): Future[Int] = ??? + override def deleteEntities(entityRefs: Seq[AttributeEntityReference]): Future[Int] = { + val deleteTargets = entityRefs.toSet + dataSource.inTransaction { dataAccess => + dataAccess.jsonEntityQuery.getRecursiveReferrers(workspaceId, deleteTargets) + } flatMap { referrers => + val referringSet = referrers.map(x => AttributeEntityReference(x.entityType, x.name)).toSet + if (referringSet != deleteTargets) + throw new DeleteEntitiesConflictException(referringSet) + else + dataSource.inTransaction { dataAccess => + dataAccess.jsonEntityQuery.softDelete(workspaceId, deleteTargets) + } + } + } /** * Return type/count/attribute metadata From b0d0fdf9845d5643f05accadffc1e49094dd51f1 Mon Sep 17 00:00:00 2001 From: David An Date: Wed, 25 Sep 2024 15:35:13 -0400 Subject: [PATCH 20/24] recursion limit; stopwatch timing --- .../slick/JsonEntityComponent.scala | 19 +++++- .../entities/json/JsonEntityProvider.scala | 62 +++++++++++++------ .../local/EntityStatisticsCacheSupport.scala | 11 +++- 3 files changed, 71 insertions(+), 21 deletions(-) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala index 312bc74c69..9e60d139ef 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala @@ -198,6 +198,12 @@ trait JsonEntityComponent extends LazyLogging { where workspace_id = $workspaceId;""" .as[(String, String)] + def typesAndAttributesV2(workspaceId: UUID): ReadAction[Seq[(String, String)]] = + sql"""SELECT DISTINCT entity_type, json_key FROM ENTITY, + JSON_TABLE(json_keys(attributes), '$$[*]' COLUMNS(json_key VARCHAR(256) PATH '$$')) t + where workspace_id = $workspaceId;""" + .as[(String, String)] + def queryEntities(workspaceId: UUID, entityType: String, queryParams: EntityQuery): ReadAction[Seq[Entity]] = { val offset = queryParams.pageSize * (queryParams.page - 1) @@ -348,6 +354,16 @@ trait JsonEntityComponent extends LazyLogging { * Returns the set of entities which directly AND RECURSIVELY reference the supplied targets */ def getRecursiveReferrers(workspaceId: UUID, targets: Set[AttributeEntityReference]) = { + + // max number of rows to consider in the recursive query. + // this function will never return more than this many results. When called to validate delete requests, + // it means that we will never return more than 10000 referrers for any given set of entities to be deleted. + val recursionLimit = 10000 + + // a recursive SQL query to retrieve all entities that refer to the ${targets} entities, plus all entities that + // refer to the referring entities, plus all entities that refer to those, plus ... + // + // recursive SQL: https://dev.mysql.com/doc/refman/8.4/en/with.html#common-table-expressions-recursive val startSql = sql"""WITH RECURSIVE ancestor AS ( select r.from_id, r.to_id @@ -362,7 +378,8 @@ trait JsonEntityComponent extends LazyLogging { sql""" UNION ALL select r.from_id, r.to_id from ancestor, ENTITY_REFS r - where ancestor.from_id = r.to_id) + where ancestor.from_id = r.to_id + limit #$recursionLimit) select a.from_id, e.entity_type, e.name from ancestor a, ENTITY e where a.from_id = e.id;""" diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index 7349bcc025..698654120a 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -29,8 +29,10 @@ import DefaultJsonProtocol._ import akka.http.scaladsl.model.StatusCodes import bio.terra.common.exception.NotImplementedException import io.opentelemetry.api.common.AttributeKey +import org.apache.commons.lang3.time.StopWatch import org.broadinstitute.dsde.rawls.RawlsException import org.broadinstitute.dsde.rawls.dataaccess.slick.{ + DataAccess, JsonEntityRecord, JsonEntityRefRecord, JsonEntitySlickRecord, @@ -124,12 +126,33 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, * Return type/count/attribute metadata * TODO AJ-2008: assess performance and add caching if necessary */ - override def entityTypeMetadata(useCache: Boolean): Future[Map[String, EntityTypeMetadata]] = + override def entityTypeMetadata(useCache: Boolean): Future[Map[String, EntityTypeMetadata]] = { + + def attrsV1(dataAccess: DataAccess) = { + val stopwatch = StopWatch.createStarted() + dataAccess.jsonEntityQuery.typesAndAttributes(workspaceId) map { result => + stopwatch.stop() + logger.info(s"***** attrsV1 complete in ${stopwatch.getTime}ms") + result + } + } + + def attrsV2(dataAccess: DataAccess) = { + val stopwatch = StopWatch.createStarted() + dataAccess.jsonEntityQuery.typesAndAttributesV2(workspaceId) map { result => + stopwatch.stop() + logger.info(s"***** attrsV2 complete in ${stopwatch.getTime}ms") + result + } + } + + val stopwatch = StopWatch.create() dataSource.inTransaction { dataAccess => // get the types and counts for { typesAndCounts <- dataAccess.jsonEntityQuery.typesAndCounts(workspaceId) - typesAndAttributes <- dataAccess.jsonEntityQuery.typesAndAttributes(workspaceId) + typesAndAttributes <- attrsV1(dataAccess) + typesAndAttributesV2 <- attrsV2(dataAccess) } yield { // group attribute names by entity type val groupedAttributeNames: Map[String, Seq[String]] = @@ -145,6 +168,7 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, }.toMap } } + } /** * stream a page of entities @@ -201,21 +225,6 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, * internal implementation for both batchUpsert and batchUpdate */ def batchUpdateEntitiesImpl(entityUpdates: Seq[EntityUpdateDefinition], upsert: Boolean): Future[Iterable[Entity]] = { - - val numUpdates = entityUpdates.size - val numOperations = entityUpdates.flatMap(_.operations).size - - logger.info(s"***** batchUpdateEntitiesImpl processing $numUpdates updates with $numOperations operations") - - // find all attribute names mentioned - val namesToCheck = for { - update <- entityUpdates - operation <- update.operations - } yield operation.name - - // validate all attribute names - withAttributeNamespaceCheck(namesToCheck)(() => ()) - // start tracing traceFutureWithParent("JsonEntityProvider.batchUpdateEntitiesImpl", requestArguments.ctx) { localContext => setTraceSpanAttribute(localContext, AttributeKey.stringKey("workspaceId"), workspaceId.toString) @@ -229,6 +238,22 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, java.lang.Long.valueOf(entityUpdates.map(_.operations.length).sum) ) + val stopwatch = StopWatch.createStarted() + + val numUpdates = entityUpdates.size + val numOperations = entityUpdates.flatMap(_.operations).size + + logger.info(s"***** batchUpdateEntitiesImpl processing $numUpdates updates with $numOperations operations") + + // find all attribute names mentioned + val namesToCheck = for { + update <- entityUpdates + operation <- update.operations + } yield operation.name + + // validate all attribute names + withAttributeNamespaceCheck(namesToCheck)(() => ()) + dataSource .inTransaction { dataAccess => import dataAccess.driver.api._ @@ -358,7 +383,8 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, slick.dbio.DBIO.sequence(updateActions) flatMap { _ => logger.info(s"***** adding references for updates ...") slick.dbio.DBIO.sequence(updateRefFutures.map(x => slick.dbio.DBIO.from(x))) flatMap { _ => - logger.info(s"***** all writes complete.") + stopwatch.stop() + logger.info(s"***** all writes complete in ${stopwatch.getTime}ms") slick.dbio.DBIO.successful(()) } } diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/local/EntityStatisticsCacheSupport.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/local/EntityStatisticsCacheSupport.scala index 81804029f4..d101ab01e1 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/local/EntityStatisticsCacheSupport.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/local/EntityStatisticsCacheSupport.scala @@ -1,6 +1,7 @@ package org.broadinstitute.dsde.rawls.entities.local import com.typesafe.scalalogging.LazyLogging +import org.apache.commons.lang3.time.StopWatch import org.broadinstitute.dsde.rawls.dataaccess.SlickDataSource import org.broadinstitute.dsde.rawls.dataaccess.slick.{DataAccess, ReadAction, ReadWriteAction} import org.broadinstitute.dsde.rawls.metrics.RawlsInstrumented @@ -161,9 +162,15 @@ trait EntityStatisticsCacheSupport extends LazyLogging with RawlsInstrumented { /** wrapper for uncached type-attributes lookup, includes performance tracing */ def uncachedTypeAttributes(dataAccess: DataAccess, parentContext: RawlsRequestContext - ): ReadAction[Map[String, Seq[AttributeName]]] = + ): ReadAction[Map[String, Seq[AttributeName]]] = { + val stopwatch = StopWatch.createStarted() traceDBIOWithParent("getAttrNamesAndEntityTypes", parentContext) { _ => - dataAccess.entityQuery.getAttrNamesAndEntityTypes(workspaceContext.workspaceIdAsUUID) + dataAccess.entityQuery.getAttrNamesAndEntityTypes(workspaceContext.workspaceIdAsUUID) map { result => + stopwatch.stop() + logger.info(s"***** getAttrNamesAndEntityTypes complete in ${stopwatch.getTime}ms") + result + } } + } } From 6cfd9933525cb60ba09d992b59191f7828952be3 Mon Sep 17 00:00:00 2001 From: David An Date: Thu, 26 Sep 2024 12:15:13 -0400 Subject: [PATCH 21/24] timing for perf tests --- .../entities/json/JsonEntityProvider.scala | 24 ++++++++++---- .../entities/local/LocalEntityProvider.scala | 33 +++++++++++++++---- 2 files changed, 44 insertions(+), 13 deletions(-) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index 698654120a..a994108266 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -108,16 +108,23 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, * Soft-delete specified entities */ override def deleteEntities(entityRefs: Seq[AttributeEntityReference]): Future[Int] = { + val stopwatch = StopWatch.createStarted() val deleteTargets = entityRefs.toSet dataSource.inTransaction { dataAccess => dataAccess.jsonEntityQuery.getRecursiveReferrers(workspaceId, deleteTargets) } flatMap { referrers => val referringSet = referrers.map(x => AttributeEntityReference(x.entityType, x.name)).toSet - if (referringSet != deleteTargets) + if (referringSet != deleteTargets) { + stopwatch.stop() + logger.info(s"***** deleteEntities complete in ${stopwatch.getTime}ms") throw new DeleteEntitiesConflictException(referringSet) - else + } else dataSource.inTransaction { dataAccess => - dataAccess.jsonEntityQuery.softDelete(workspaceId, deleteTargets) + dataAccess.jsonEntityQuery.softDelete(workspaceId, deleteTargets) map { result => + stopwatch.stop() + logger.info(s"***** deleteEntities complete in ${stopwatch.getTime}ms") + result + } } } } @@ -146,13 +153,12 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, } } - val stopwatch = StopWatch.create() dataSource.inTransaction { dataAccess => // get the types and counts for { typesAndCounts <- dataAccess.jsonEntityQuery.typesAndCounts(workspaceId) - typesAndAttributes <- attrsV1(dataAccess) - typesAndAttributesV2 <- attrsV2(dataAccess) + // typesAndAttributes <- attrsV1(dataAccess) + typesAndAttributes <- attrsV2(dataAccess) } yield { // group attribute names by entity type val groupedAttributeNames: Map[String, Seq[String]] = @@ -176,11 +182,15 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, override def queryEntitiesSource(entityType: String, entityQuery: EntityQuery, parentContext: RawlsRequestContext - ): Future[(EntityQueryResultMetadata, Source[Entity, _])] = + ): Future[(EntityQueryResultMetadata, Source[Entity, _])] = { + val stopwatch = StopWatch.createStarted() queryEntities(entityType, entityQuery, parentContext).map { queryResponse => + stopwatch.stop() + logger.info(s"***** queryEntitiesSource complete in ${stopwatch.getTime}ms") // TODO AJ-2008: actually stream! (queryResponse.resultMetadata, Source.apply(queryResponse.results)) } + } /** * return a page of entities diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/local/LocalEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/local/LocalEntityProvider.scala index 8c95b29038..9f2919b1c7 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/local/LocalEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/local/LocalEntityProvider.scala @@ -7,6 +7,7 @@ import akka.stream.scaladsl.{Sink, Source} import com.typesafe.scalalogging.LazyLogging import io.opencensus.trace.{AttributeValue => OpenCensusAttributeValue} import io.opentelemetry.api.common.AttributeKey +import org.apache.commons.lang3.time.StopWatch import org.broadinstitute.dsde.rawls.RawlsExceptionWithErrorReport import org.broadinstitute.dsde.rawls.dataaccess.slick.{ DataAccess, @@ -181,7 +182,8 @@ class LocalEntityProvider(requestArguments: EntityRequestArguments, } // EntityApiServiceSpec has good test coverage for this api - override def deleteEntities(entRefs: Seq[AttributeEntityReference]): Future[Int] = + override def deleteEntities(entRefs: Seq[AttributeEntityReference]): Future[Int] = { + val stopwatch = StopWatch.createStarted() dataSource.inTransaction { dataAccess => // withAllEntityRefs throws exception if some entities not found; passes through if all ok traceDBIOWithParent("LocalEntityProvider.deleteEntities", requestArguments.ctx) { localContext => @@ -191,13 +193,19 @@ class LocalEntityProvider(requestArguments: EntityRequestArguments, traceDBIOWithParent("entityQuery.getAllReferringEntities", localContext)(innerSpan => dataAccess.entityQuery.getAllReferringEntities(workspaceContext, entRefs.toSet) flatMap { referringEntities => - if (referringEntities != entRefs.toSet) + if (referringEntities != entRefs.toSet) { + stopwatch.stop() + logger.info(s"***** deleteEntities complete in ${stopwatch.getTime}ms") throw new DeleteEntitiesConflictException(referringEntities) - else { + } else { traceDBIOWithParent("entityQuery.hide", innerSpan)(_ => dataAccess.entityQuery .hide(workspaceContext, entRefs) - .withStatementParameters(statementInit = _.setQueryTimeout(queryTimeoutSeconds)) + .withStatementParameters(statementInit = _.setQueryTimeout(queryTimeoutSeconds)) map { result => + stopwatch.stop() + logger.info(s"***** deleteEntities complete in ${stopwatch.getTime}ms") + result + } ) } } @@ -205,6 +213,7 @@ class LocalEntityProvider(requestArguments: EntityRequestArguments, } } } + } override def deleteEntitiesOfType(entityType: String): Future[Int] = dataSource.inTransaction { dataAccess => @@ -310,6 +319,7 @@ class LocalEntityProvider(requestArguments: EntityRequestArguments, query: EntityQuery, parentContext: RawlsRequestContext = requestArguments.ctx ): Future[(EntityQueryResultMetadata, Source[Entity, _])] = { + val stopwatch = StopWatch.createStarted() // look for a columnFilter that specifies the primary key for this entityType; // such a columnFilter means we are filtering by name and can greatly simplify the underlying query. val nameFilter: Option[String] = query.columnFilter match { @@ -353,7 +363,10 @@ class LocalEntityProvider(requestArguments: EntityRequestArguments, for { metadata <- queryForMetadata(entityType, query, childContext) entitySource = queryForResultSource(entityType, query, childContext) - } yield (metadata, entitySource) + } yield { + logger.info(s"***** queryEntitiesSource complete in ${stopwatch.getTime}ms") + (metadata, entitySource) + } } } } @@ -457,6 +470,8 @@ class LocalEntityProvider(requestArguments: EntityRequestArguments, java.lang.Long.valueOf(entityUpdates.map(_.operations.length).sum) ) + val stopwatch = StopWatch.createStarted() + withAttributeNamespaceCheck(namesToCheck) { dataSource.inTransactionWithAttrTempTable(Set(AttributeTempTableType.Entity)) { dataAccess => val updateTrialsAction = traceDBIOWithParent("getActiveEntities", localContext)(_ => @@ -503,7 +518,13 @@ class LocalEntityProvider(requestArguments: EntityRequestArguments, } } - traceDBIOWithParent("saveAction", localContext)(_ => saveAction) + traceDBIOWithParent("saveAction", localContext)(_ => + saveAction map { result => + stopwatch.stop() + logger.info(s"***** all writes complete in ${stopwatch.getTime}ms") + result + } + ) } recover { case icve: java.sql.SQLIntegrityConstraintViolationException => val userMessage = From c7728d2f6765f5125dea62af41e88bb7f9cd6653 Mon Sep 17 00:00:00 2001 From: David An Date: Thu, 26 Sep 2024 15:55:49 -0400 Subject: [PATCH 22/24] handle references in bulk --- .../entities/json/JsonEntityProvider.scala | 115 +++++++++++++----- 1 file changed, 87 insertions(+), 28 deletions(-) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index a994108266..70f03438ca 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -348,21 +348,10 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, val insertResult = dataAccess.jsonEntitySlickQuery ++= inserts // TODO AJ-2008: don't eagerly kick these off; can cause parallelism problems - val updateActions = updates.map { upd => - dataAccess.jsonEntityQuery.updateEntity(workspaceId, upd.toEntity, upd.recordVersion) map { - updatedCount => - if (updatedCount == 0) { - throw new RuntimeException("Update failed. Concurrent modifications?") - } - } - } - - // TODO AJ-2008: don't eagerly kick these off; can cause parallelism problems - val updateRefFutures: Seq[Future[_]] = updates.map { upd => - synchronizeReferences(upd.id, upd.toEntity) - } +// val updateRefFutures: Seq[Future[_]] = updates.map { upd => +// synchronizeReferences(upd.id, upd.toEntity) +// } - // TODO AJ-2008: can we bulk/batch the ENTITY_REFS work? logger.info(s"***** performing inserts ...") insertResult.flatMap { _ => // skip any inserts that have zero references @@ -380,22 +369,36 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, ) flatMap { inserted => // map the inserted ids back to the full entities that were inserted val insertedIds = inserted.map(x => (x.entityType, x.name) -> x.id).toMap - slick.dbio.DBIO.sequence( - insertsWithReferences - .map { ins => - val id = insertedIds.getOrElse((ins.entityType, ins.name), - throw new RuntimeException("couldn't find inserted id") - ) - slick.dbio.DBIO.from(synchronizeReferences(id, ins.toEntity, isInsert = true)) - } - ) flatMap { _ => + + val insertsWithReferencesAndIds: Seq[JsonEntitySlickRecord] = insertsWithReferences.map { ins => + val id = insertedIds.getOrElse((ins.entityType, ins.name), + throw new RuntimeException("couldn't find inserted id") + ) + ins.copy(id = id) + } + + slick.dbio.DBIO.from(synchronizeInsertedReferences(insertsWithReferencesAndIds)) flatMap { _ => logger.info(s"***** performing updates ...") - slick.dbio.DBIO.sequence(updateActions) flatMap { _ => + val idsBeingUpdated: Seq[Long] = updates.map(_.id) + + slick.dbio.DBIO.sequence(updates.map { upd => + dataAccess.jsonEntityQuery.updateEntity(workspaceId, upd.toEntity, upd.recordVersion) map { + updatedCount => + if (updatedCount == 0) { + throw new RuntimeException("Update failed. Concurrent modifications?") + } + } + }) flatMap { _ => logger.info(s"***** adding references for updates ...") - slick.dbio.DBIO.sequence(updateRefFutures.map(x => slick.dbio.DBIO.from(x))) flatMap { _ => - stopwatch.stop() - logger.info(s"***** all writes complete in ${stopwatch.getTime}ms") - slick.dbio.DBIO.successful(()) + // delete all from ENTITY_REFS where from_id in (entities being updated) + dataAccess.jsonEntityRefSlickQuery.filter(_.fromId inSetBind idsBeingUpdated).delete flatMap { _ => + // insert all references for the records being updated + // TODO AJ-2008: instead of delete all/insert all, can we optimize? + slick.dbio.DBIO.from(synchronizeInsertedReferences(updates)) flatMap { _ => + stopwatch.stop() + logger.info(s"***** all writes complete in ${stopwatch.getTime}ms") + slick.dbio.DBIO.successful(()) + } } } } @@ -471,6 +474,15 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, } } + private def findReferences(entity: Entity): Seq[AttributeEntityReference] = + entity.attributes + .collect { + case (_: AttributeName, aer: AttributeEntityReference) => Seq(aer) + case (_: AttributeName, aerl: AttributeEntityReferenceList) => aerl.list + } + .flatten + .toSeq + // given an entity, finds all references in that entity, grouped by their attribute names private def findAllReferences(entity: Entity): Map[AttributeName, Seq[AttributeEntityReference]] = entity.attributes @@ -538,6 +550,53 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, } } + private def synchronizeInsertedReferences(inserted: Seq[JsonEntitySlickRecord]): Future[Int] = { + // find all references for all records + val referenceRequestsByEntityId: Map[Long, Seq[AttributeEntityReference]] = + inserted.map(ins => ins.id -> findReferences(ins.toEntity)).toMap + + // validate all references for all records + val uniqueReferences: Set[AttributeEntityReference] = referenceRequestsByEntityId.values.flatten.toSet + + // short-circuit + if (uniqueReferences.isEmpty) { + return Future.successful(0) + } + + dataSource.inTransaction { dataAccess => + import dataAccess.driver.api._ + + dataAccess.jsonEntityQuery.getEntityRefs(workspaceId, uniqueReferences) flatMap { foundRefs => + if (foundRefs.size != uniqueReferences.size) { + throw new RuntimeException("Did not find all references") + } + + // convert the foundRefs to a map for easier lookup + val foundMap: Map[(String, String), Long] = foundRefs.map { foundRef => + ((foundRef.entityType, foundRef.name), foundRef.id) + }.toMap + + // generate the (from_id, to_id) pairs to insert into ENTITY_REFS + val targetIdsByFromId: Map[Long, Seq[Long]] = referenceRequestsByEntityId.map { + case (fromId, desiredReferences) => + val targetIds = desiredReferences.map { desiredRef => + foundMap.getOrElse((desiredRef.entityType, desiredRef.entityName), + throw new RuntimeException("this shouldn't happen") + ) + } + (fromId, targetIds) + } + val pairsToInsert: Seq[RefPointerRecord] = targetIdsByFromId.flatMap { case (fromId, toIds) => + toIds.map(toId => RefPointerRecord(fromId, toId)) + }.toSeq + + // perform the insert + (dataAccess.jsonEntityRefSlickQuery ++= pairsToInsert).map(x => x.sum) + + } + } + } + // helper to call validateReferences followed by replaceReferences private def synchronizeReferences(fromId: Long, entity: Entity, From 2f0af75f669163a6b758bf496912e3c4a3a2c74d Mon Sep 17 00:00:00 2001 From: David An Date: Fri, 27 Sep 2024 09:04:52 -0400 Subject: [PATCH 23/24] code comment --- .../dsde/rawls/dataaccess/slick/JsonEntityComponent.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala index 9e60d139ef..42c96b024e 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala @@ -158,6 +158,8 @@ trait JsonEntityComponent extends LazyLogging { * Update a single entity in the db */ // TODO AJ-2008: return Entity instead of JsonEntityRecord? + // TODO AJ-2008: can this use INSERT ... ON DUPLICATE KEY UPDATE instead? That would allow batching multiple updates + // into a single statement. But, how would that work with record_version checking? def updateEntity(workspaceId: UUID, entity: Entity, recordVersion: Long): ReadWriteAction[Int] = { val attributesJson: JsValue = entity.attributes.toJson From 438013875dfe39b8b93453c4e8d4f80c1e9f7ab9 Mon Sep 17 00:00:00 2001 From: David An Date: Fri, 27 Sep 2024 13:54:16 -0400 Subject: [PATCH 24/24] rename entity --- .../slick/JsonEntityComponent.scala | 39 +++++++++++++++++++ .../dsde/rawls/entities/EntityService.scala | 28 ++++++------- .../rawls/entities/base/EntityProvider.scala | 5 ++- .../datarepo/DataRepoEntityProvider.scala | 3 ++ .../entities/json/JsonEntityProvider.scala | 36 +++++++++++++++++ .../entities/local/LocalEntityProvider.scala | 14 +++++++ .../rawls/webservice/EntityApiService.scala | 4 +- .../entities/local/CaseSensitivitySpec.scala | 2 +- 8 files changed, 115 insertions(+), 16 deletions(-) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala index 42c96b024e..433c200e1f 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/dataaccess/slick/JsonEntityComponent.scala @@ -181,6 +181,21 @@ trait JsonEntityComponent extends LazyLogging { uniqueResult(selectStatement.as[JsonEntityRecord]) } + /** + * Read a single entity from the db + */ + // TODO AJ-2008: return Entity instead of JsonEntityRecord? + def getEntityRef(workspaceId: UUID, + entityType: String, + entityName: String + ): ReadAction[Option[JsonEntityRefRecord]] = { + val selectStatement: SQLActionBuilder = + sql"""select id, name, entity_type + from ENTITY where workspace_id = $workspaceId and entity_type = $entityType and name = $entityName""" + + uniqueResult(selectStatement.as[JsonEntityRefRecord]) + } + /** * All entity types for the given workspace, with their counts of active entities */ @@ -405,6 +420,12 @@ trait JsonEntityComponent extends LazyLogging { concatSqlActions(baseSql, inFragment, sql";").asUpdate } + def renameSingleEntity(workspaceId: UUID, entity: AttributeEntityReference, newName: String): ReadWriteAction[Int] = + sql"""update ENTITY set name = $newName + where workspace_id = $workspaceId + and entity_type = ${entity.entityType} + and name = ${entity.entityName};""".asUpdate + private def refsInFragment(refs: Set[AttributeEntityReference]) = { // build select statements for each type val pairs = refs.map { ref => @@ -413,6 +434,24 @@ trait JsonEntityComponent extends LazyLogging { concatSqlActions(sql"(", reduceSqlActionsWithDelim(pairs.toSeq, sql","), sql")") } + def renameEmbeddedReferences(workspaceId: UUID, + toId: Long, + oldReference: AttributeEntityReference, + newReference: AttributeEntityReference + ): ReadWriteAction[Int] = { + // build string to be replaced + val oldStr = s"""{"entityName": "${oldReference.entityName}", "entityType": "${oldReference.entityType}"}""" + // build string to be the replacement + val newStr = s"""{"entityName": "${newReference.entityName}", "entityType": "${newReference.entityType}"}""" + + // perform replacements + sql"""update ENTITY set attributes = REPLACE(attributes, $oldStr, $newStr) + where workspace_id = $workspaceId and id in ( + select from_id from ENTITY_REFS er + where er.to_id = $toId + )""".asUpdate + } + } private def singleResult[V](results: ReadAction[Seq[V]]): ReadAction[V] = diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/EntityService.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/EntityService.scala index a2b3ecce7b..e63068b4c1 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/EntityService.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/EntityService.scala @@ -226,23 +226,25 @@ class EntityService(protected val ctx: RawlsRequestContext, ) // TODO AJ-2008: move to EntityProvider - def renameEntity(workspaceName: WorkspaceName, entityType: String, entityName: String, newName: String): Future[Int] = + def renameEntity(workspaceName: WorkspaceName, + entityType: String, + entityName: String, + newName: String, + dataReference: Option[DataReferenceName], + billingProject: Option[GoogleProjectId] + ): Future[Int] = (getV2WorkspaceContextAndPermissions(workspaceName, SamWorkspaceActions.write, Some(WorkspaceAttributeSpecs(all = false)) ) flatMap { workspaceContext => - dataSource.inTransaction { dataAccess => - withEntity(workspaceContext, entityType, entityName, dataAccess) { entity => - dataAccess.entityQuery.get(workspaceContext, entity.entityType, newName) flatMap { - case None => dataAccess.entityQuery.rename(workspaceContext, entity.entityType, entity.name, newName) - case Some(_) => - throw new RawlsExceptionWithErrorReport( - errorReport = - ErrorReport(StatusCodes.Conflict, s"Destination ${entity.entityType} ${newName} already exists") - ) - } - } - } + val entityRequestArguments = EntityRequestArguments(workspaceContext, ctx, dataReference, billingProject) + for { + entityProvider <- entityManager.resolveProviderFuture(entityRequestArguments) + numberOfEntitiesRenamed <- entityProvider.renameEntity(AttributeEntityReference(entityType, entityName), + newName + ) + } yield numberOfEntitiesRenamed + }).recover( sqlLoggingRecover(s"renameEntity: $workspaceName $entityType $entityName") ) diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/base/EntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/base/EntityProvider.scala index 2d7ceba197..4a532e669c 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/base/EntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/base/EntityProvider.scala @@ -16,7 +16,8 @@ import org.broadinstitute.dsde.rawls.model.{ EntityTypeMetadata, RawlsRequestContext, SubmissionValidationEntityInputs, - Workspace + Workspace, + WorkspaceName } import scala.concurrent.Future @@ -88,4 +89,6 @@ trait EntityProvider { linkExistingEntities: Boolean, parentContext: RawlsRequestContext ): Future[EntityCopyResponse] + + def renameEntity(entity: AttributeEntityReference, newName: String): Future[Int] } diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/datarepo/DataRepoEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/datarepo/DataRepoEntityProvider.scala index cc00113260..d8f566315a 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/datarepo/DataRepoEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/datarepo/DataRepoEntityProvider.scala @@ -547,4 +547,7 @@ class DataRepoEntityProvider(snapshotModel: SnapshotModel, parentContext: RawlsRequestContext ): Future[EntityCopyResponse] = throw new UnsupportedEntityOperationException("copy entities not supported by this provider.") + + override def renameEntity(entity: AttributeEntityReference, newName: EntityName): Future[Int] = + throw new UnsupportedEntityOperationException("renameEntity not supported by this provider.") } diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala index 70f03438ca..6c23c6c866 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/json/JsonEntityProvider.scala @@ -433,6 +433,42 @@ class JsonEntityProvider(requestArguments: EntityRequestArguments, override def expressionValidator: ExpressionValidator = ??? + override def renameEntity(entity: AttributeEntityReference, newName: String): Future[Int] = + dataSource.inTransaction { dataAccess => + import dataAccess.driver.api._ + + // get the entity. This validates it exists, as well as retrieves its id which we will need later + dataAccess.jsonEntityQuery.getEntityRef(workspaceId, entity.entityType, entity.entityName) flatMap { + existingOption => + val existing = existingOption.getOrElse(throw new DataEntityException("Entity not found")) + // rename the specific entity + dataAccess.jsonEntityQuery.renameSingleEntity(workspaceId, entity, newName) flatMap { numRenamed => + if (numRenamed == 0) { + // this shouldn't happen, since we just verified its existence + throw new DataEntityException("Entity not renamed") + } else if (numRenamed > 1) { + // this shouldn't happen, since the db enforces uniqueness of workspaceId+entityType+name + throw new DataEntityException( + "Unexpected error; found more than one entity to rename" + ) + } else { + // replace the reference in all referrers + // TODO! + dataAccess.jsonEntityQuery.renameEmbeddedReferences(workspaceId, + existing.id, + entity, + entity.copy(entityName = newName) + ) map { embeddedUpdates => + logger.info(s"***** renameEntity updated $embeddedUpdates embedded references") + // return the number of entities renamed, which should be one + numRenamed + } + } + } + } + + } + // ==================================================================================================== // helper methods // ==================================================================================================== diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/local/LocalEntityProvider.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/local/LocalEntityProvider.scala index 9f2919b1c7..d43495f4c1 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/local/LocalEntityProvider.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/entities/local/LocalEntityProvider.scala @@ -569,4 +569,18 @@ class LocalEntityProvider(requestArguments: EntityRequestArguments, ) } ) + + override def renameEntity(entity: AttributeEntityReference, newName: String): Future[Int] = + dataSource.inTransaction { dataAccess => + withEntity(workspaceContext, entity.entityType, entity.entityName, dataAccess) { entity => + dataAccess.entityQuery.get(workspaceContext, entity.entityType, newName) flatMap { + case None => dataAccess.entityQuery.rename(workspaceContext, entity.entityType, entity.name, newName) + case Some(_) => + throw new RawlsExceptionWithErrorReport( + errorReport = + ErrorReport(StatusCodes.Conflict, s"Destination ${entity.entityType} ${newName} already exists") + ) + } + } + } } diff --git a/core/src/main/scala/org/broadinstitute/dsde/rawls/webservice/EntityApiService.scala b/core/src/main/scala/org/broadinstitute/dsde/rawls/webservice/EntityApiService.scala index 2eecb67d81..2588ae0a39 100644 --- a/core/src/main/scala/org/broadinstitute/dsde/rawls/webservice/EntityApiService.scala +++ b/core/src/main/scala/org/broadinstitute/dsde/rawls/webservice/EntityApiService.scala @@ -231,7 +231,9 @@ trait EntityApiService extends UserInfoDirectives { .renameEntity(WorkspaceName(workspaceNamespace, workspaceName), entityType, entityName, - newEntityName.name + newEntityName.name, + dataReference, + billingProject ) .map(_ => StatusCodes.NoContent) } diff --git a/core/src/test/scala/org/broadinstitute/dsde/rawls/entities/local/CaseSensitivitySpec.scala b/core/src/test/scala/org/broadinstitute/dsde/rawls/entities/local/CaseSensitivitySpec.scala index e9184e5d4c..59df9724ee 100644 --- a/core/src/test/scala/org/broadinstitute/dsde/rawls/entities/local/CaseSensitivitySpec.scala +++ b/core/src/test/scala/org/broadinstitute/dsde/rawls/entities/local/CaseSensitivitySpec.scala @@ -453,7 +453,7 @@ class CaseSensitivitySpec extends AnyFreeSpec with Matchers with TestDriverCompo // rename entity of target type services.entityService - .renameEntity(testWorkspace.workspace.toWorkspaceName, typeUnderTest, "003", "my-new-name") + .renameEntity(testWorkspace.workspace.toWorkspaceName, typeUnderTest, "003", "my-new-name", None, None) .futureValue // get actual entities