From 8406fc9c4ac497422d682c1cba6343479e1a7a7c Mon Sep 17 00:00:00 2001 From: Lucas Burson Date: Sat, 23 Dec 2023 12:18:31 -0600 Subject: [PATCH] Improve 'updateLocations' Task for Database Efficiency (#1096) Modified the 'updateLocations' task to enhance database efficiency. The recurring task now uses a transaction per iteration instead of one for the entire task, eliminating manual commits. Cache removals are also now done with each transaction, avoiding extra database lookups. --- app/org/maproulette/jobs/SchedulerActor.scala | 58 +++++++++---------- 1 file changed, 27 insertions(+), 31 deletions(-) diff --git a/app/org/maproulette/jobs/SchedulerActor.scala b/app/org/maproulette/jobs/SchedulerActor.scala index fc049051..87ab0a0d 100644 --- a/app/org/maproulette/jobs/SchedulerActor.scala +++ b/app/org/maproulette/jobs/SchedulerActor.scala @@ -5,7 +5,6 @@ package org.maproulette.jobs import akka.actor.{Actor, Props} -import anorm.JodaParameterMetaData._ import anorm.SqlParser._ import anorm._ @@ -152,47 +151,44 @@ class SchedulerActor @Inject() ( def updateLocations(action: String): Unit = { logger.info(s"Scheduled Task '$action': Starting run") val start = System.currentTimeMillis - val currentTime = DateTime.now() val challengeFilter = "deleted = false AND is_archived = false AND enabled = true"; - val staleChallengeIds = db.withTransaction { implicit c => + val staleChallengeIds = db.withConnection { implicit c => SQL( s"SELECT id FROM challenges WHERE ${challengeFilter} AND (modified > last_updated OR last_updated IS NULL)" ).as(SqlParser.long("id").*) } + logger.info(s"Updating locations and bounding boxes for ${staleChallengeIds.length} challenges") + // For each "stale" challenge, update the fields: location, bounding box, and last_updated time. + // Previously the below code was written as a single database transaction, which is much faster, but when a single + // challenge had bad data for the location/box columns it would fail the entire transaction, reverting any changes. + // See https://github.com/maproulette/maproulette3/issues/567 for more details. staleChallengeIds.foreach(id => { - db.withTransaction { implicit c => - try { - val query = - s"""UPDATE challenges SET - location = (SELECT ST_Centroid(ST_Collect(ST_Makevalid(location))) - FROM tasks - WHERE parent_id = ${id}), - bounding = (SELECT ST_Envelope(ST_Buffer((ST_SetSRID(ST_Extent(location), 4326))::geography,2)::geometry) - FROM tasks - WHERE parent_id = ${id}), - last_updated = NOW() - WHERE id = ${id};""" - SQL(query).executeUpdate() - c.commit() - } catch { - case e: Exception => { - logger.error("Unable to update location on challenge " + id, e) - } + try { + db.withTransaction { + implicit c => + val query = + s"""UPDATE challenges SET + location = (SELECT ST_Centroid(ST_Collect(ST_Makevalid(location))) + FROM tasks + WHERE parent_id = ${id}), + bounding = (SELECT ST_Envelope(ST_Buffer((ST_SetSRID(ST_Extent(location), 4326))::geography,2)::geometry) + FROM tasks + WHERE parent_id = ${id}), + last_updated = NOW() + WHERE id = ${id};""" + SQL(query).executeUpdate() + } + // The above query will not update the cache, so remove the id from the cache in case it is there + logger.debug(s"Flushing challenge cache of challenge with id $id") + this.dALManager.challenge.cacheManager.cache.remove(id) + } catch { + case e: Exception => { + logger.error("Unable to update location on challenge " + id, e) } } }) - db.withTransaction { implicit c => - SQL(s"SELECT id FROM challenges WHERE ${challengeFilter} AND (last_updated > {currentTime})") - .on(Symbol("currentTime") -> ToParameterValue.apply[DateTime].apply(currentTime)) - .as(SqlParser.long("id").*) - .foreach(id => { - logger.debug(s"Flushing challenge cache of challenge with id $id") - this.dALManager.challenge.cacheManager.cache.remove(id) - }) - } - val totalTime = System.currentTimeMillis - start logger.info( s"Scheduled Task '$action': Finished run. Time spent: ${String.format("%1d", totalTime)}ms"