Skip to content

Commit

Permalink
Improve 'updateLocations' Task for Database Efficiency (#1096)
Browse files Browse the repository at this point in the history
Modified the 'updateLocations' task to enhance database efficiency. The recurring task now uses a transaction per iteration instead of one for the entire task, eliminating manual commits. Cache removals are also now done with each transaction, avoiding extra database lookups.
  • Loading branch information
ljdelight authored Dec 23, 2023
1 parent 4d945f2 commit 8406fc9
Showing 1 changed file with 27 additions and 31 deletions.
58 changes: 27 additions & 31 deletions app/org/maproulette/jobs/SchedulerActor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
package org.maproulette.jobs

import akka.actor.{Actor, Props}
import anorm.JodaParameterMetaData._
import anorm.SqlParser._
import anorm._

Expand Down Expand Up @@ -152,47 +151,44 @@ class SchedulerActor @Inject() (
def updateLocations(action: String): Unit = {
logger.info(s"Scheduled Task '$action': Starting run")
val start = System.currentTimeMillis
val currentTime = DateTime.now()
val challengeFilter = "deleted = false AND is_archived = false AND enabled = true";
val staleChallengeIds = db.withTransaction { implicit c =>
val staleChallengeIds = db.withConnection { implicit c =>
SQL(
s"SELECT id FROM challenges WHERE ${challengeFilter} AND (modified > last_updated OR last_updated IS NULL)"
).as(SqlParser.long("id").*)
}
logger.info(s"Updating locations and bounding boxes for ${staleChallengeIds.length} challenges")

// For each "stale" challenge, update the fields: location, bounding box, and last_updated time.
// Previously the below code was written as a single database transaction, which is much faster, but when a single
// challenge had bad data for the location/box columns it would fail the entire transaction, reverting any changes.
// See https://github.com/maproulette/maproulette3/issues/567 for more details.
staleChallengeIds.foreach(id => {
db.withTransaction { implicit c =>
try {
val query =
s"""UPDATE challenges SET
location = (SELECT ST_Centroid(ST_Collect(ST_Makevalid(location)))
FROM tasks
WHERE parent_id = ${id}),
bounding = (SELECT ST_Envelope(ST_Buffer((ST_SetSRID(ST_Extent(location), 4326))::geography,2)::geometry)
FROM tasks
WHERE parent_id = ${id}),
last_updated = NOW()
WHERE id = ${id};"""
SQL(query).executeUpdate()
c.commit()
} catch {
case e: Exception => {
logger.error("Unable to update location on challenge " + id, e)
}
try {
db.withTransaction {
implicit c =>
val query =
s"""UPDATE challenges SET
location = (SELECT ST_Centroid(ST_Collect(ST_Makevalid(location)))
FROM tasks
WHERE parent_id = ${id}),
bounding = (SELECT ST_Envelope(ST_Buffer((ST_SetSRID(ST_Extent(location), 4326))::geography,2)::geometry)
FROM tasks
WHERE parent_id = ${id}),
last_updated = NOW()
WHERE id = ${id};"""
SQL(query).executeUpdate()
}
// The above query will not update the cache, so remove the id from the cache in case it is there
logger.debug(s"Flushing challenge cache of challenge with id $id")
this.dALManager.challenge.cacheManager.cache.remove(id)
} catch {
case e: Exception => {
logger.error("Unable to update location on challenge " + id, e)
}
}
})

db.withTransaction { implicit c =>
SQL(s"SELECT id FROM challenges WHERE ${challengeFilter} AND (last_updated > {currentTime})")
.on(Symbol("currentTime") -> ToParameterValue.apply[DateTime].apply(currentTime))
.as(SqlParser.long("id").*)
.foreach(id => {
logger.debug(s"Flushing challenge cache of challenge with id $id")
this.dALManager.challenge.cacheManager.cache.remove(id)
})
}

val totalTime = System.currentTimeMillis - start
logger.info(
s"Scheduled Task '$action': Finished run. Time spent: ${String.format("%1d", totalTime)}ms"
Expand Down

0 comments on commit 8406fc9

Please sign in to comment.