From b2b76e9e1587ef2ae3836d59f2e1f4c6fb680810 Mon Sep 17 00:00:00 2001 From: Albert Louis Rossi Date: Mon, 2 Oct 2023 08:32:46 -0500 Subject: [PATCH] dcache-bulk: add admin command and query to reset all requests with failed targets Motivation: There exists an admin command to reset requests to be rerun. The command takes various options based on the fields in the bulk request table. However, it is currently not possible to rerun only the requests with failed targets that have completed. This may sometimes be useful to the admin. (Note that for safety we do not automatically do this on service restart/reload, where only incomplete requests are reset). Modifications: Add the command and a few extra pieces of support in the dao/store layers. Result: We can now retry completed requests which have failed targets. Target: master Request: 9.2 Patch: https://rb.dcache.org/r/14117/ Requires-notes: yes Acked-by: Tigran --- .../services/bulk/BulkServiceCommands.java | 20 +++++++++++++++++++ .../services/bulk/store/BulkRequestStore.java | 7 +++++++ .../jdbc/request/JdbcBulkRequestStore.java | 16 +++++++++++++-- .../jdbc/rtarget/JdbcRequestTargetDao.java | 8 ++++++++ 4 files changed, 49 insertions(+), 2 deletions(-) diff --git a/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/BulkServiceCommands.java b/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/BulkServiceCommands.java index 339675c7d4c..d4fc5b82026 100644 --- a/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/BulkServiceCommands.java +++ b/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/BulkServiceCommands.java @@ -1062,6 +1062,26 @@ public String call() throws Exception { } } + @Command(name = "request retry failed", + hint = "Retry only those requests which have failed targets.", + description = "Calls reset on the requests matching this criterion.") + class RequestRetryFailed implements Callable { + + @Override + public String call() throws Exception { + executor.submit(()-> { + try { + int count = requestStore.retryFailed(); + LOGGER.info("{} requests with failed targets have been reset.", count); + } catch (BulkStorageException e) { + LOGGER.error("could not reset failed: {}.", e.toString()); + } + }); + + return "Resetting requests with failed targets."; + } + } + @Command(name = "request submit", hint = "Launch a bulk request.", description = "Command-line version of the RESTful request.") diff --git a/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/store/BulkRequestStore.java b/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/store/BulkRequestStore.java index 39eb96b532a..2e5b8281723 100644 --- a/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/store/BulkRequestStore.java +++ b/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/store/BulkRequestStore.java @@ -277,6 +277,13 @@ List next(Optional sortedBy, Optional reverse, lon */ void reset(String uid) throws BulkStorageException; + /** + * Retry all requests that have FAILED targets. + * + * @throws BulkStorageException + */ + int retryFailed() throws BulkStorageException; + /** * Store the request. * diff --git a/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/store/jdbc/request/JdbcBulkRequestStore.java b/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/store/jdbc/request/JdbcBulkRequestStore.java index f36708d5633..3880281260f 100644 --- a/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/store/jdbc/request/JdbcBulkRequestStore.java +++ b/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/store/jdbc/request/JdbcBulkRequestStore.java @@ -71,6 +71,7 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING import static org.dcache.services.bulk.util.BulkRequestTarget.PLACEHOLDER_PNFSID; import static org.dcache.services.bulk.util.BulkRequestTarget.ROOT_REQUEST_PATH; import static org.dcache.services.bulk.util.BulkRequestTarget.State.CREATED; +import static org.dcache.services.bulk.util.BulkRequestTarget.State.FAILED; import com.google.common.base.Strings; import com.google.common.base.Throwables; @@ -93,6 +94,7 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; import java.util.function.Consumer; import java.util.stream.Collectors; import javax.security.auth.Subject; @@ -120,7 +122,6 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING import org.dcache.services.bulk.util.BulkRequestFilter; import org.dcache.services.bulk.util.BulkRequestTarget; import org.dcache.services.bulk.util.BulkRequestTarget.PID; -import org.dcache.services.bulk.util.BulkRequestTarget.State; import org.dcache.services.bulk.util.BulkRequestTargetBuilder; import org.dcache.services.bulk.util.BulkServiceStatistics; import org.dcache.vehicles.FileAttributes; @@ -547,6 +548,17 @@ public void reset(String uid) throws BulkStorageException { } } + @Override + public int retryFailed() throws BulkStorageException { + AtomicInteger count = new AtomicInteger(0); + List uids = requestTargetDao.getRequestsOfFailed(); + for (String uid: uids) { + reset(uid); + count.incrementAndGet(); + } + return count.get(); + } + @Required public void setArchiveDao(JdbcBulkArchiveDao archiveDao) { this.archiveDao = archiveDao; @@ -783,7 +795,7 @@ private String checkRequestPermissions(Subject subject, String uid) private void conditionallyClearTerminalRequest(BulkRequest stored) { Long rid = stored.getId(); - if (requestTargetDao.count(requestTargetDao.where().rid(rid).state(State.FAILED)) > 0) { + if (requestTargetDao.count(requestTargetDao.where().rid(rid).state(FAILED)) > 0) { if (stored.isClearOnFailure()) { clear(stored.getUid()); } diff --git a/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/store/jdbc/rtarget/JdbcRequestTargetDao.java b/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/store/jdbc/rtarget/JdbcRequestTargetDao.java index b08fd6c632b..2e251b05d88 100644 --- a/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/store/jdbc/rtarget/JdbcRequestTargetDao.java +++ b/modules/dcache-bulk/src/main/java/org/dcache/services/bulk/store/jdbc/rtarget/JdbcRequestTargetDao.java @@ -117,6 +117,10 @@ static class TargetPlaceholder { static final String JOINED_TABLE_NAMES_FOR_SELECT = SECONDARY_TABLE_NAME + ", " + TABLE_NAME; + static final String UIDS_OF_FAILED + = "SELECT r.uid FROM bulk_request r WHERE r.status = 'COMPLETED' AND EXISTS " + + "(SELECT * FROM request_target t WHERE r.id = t.rid AND t.state = 'FAILED')"; + static final ParameterizedPreparedStatementSetter SETTER = (ps, target) -> { Instant now = Instant.now(); ps.setInt(1, PID.INITIAL.ordinal()); @@ -164,6 +168,10 @@ public List get(JdbcRequestTargetCriterion criterion, int lim this, criterion.isJoined() ? this::toFullRequestTarget : this::toRequestTarget); } + public List getRequestsOfFailed() { + return getJdbcTemplate().queryForList(UIDS_OF_FAILED, String.class); + } + public Optional insert(JdbcRequestTargetUpdate update) { return utils.insert(update, TABLE_NAME, this); }