Skip to content

Commit

Permalink
pnfsmanager: inroduce limit on number of concurrent listing of the sa…
Browse files Browse the repository at this point in the history
…me directory

Motivation
----------
When many jobs run listings of large directories in parallel
thread starvation occurs, the list queue gets filled, the message
queue gets filled and PnfsManager becomes unresponsive.

Modification:
-------------
Before dispatching list request to a queue count how many
list request are already in the list queue.
Add property:

pnfsmanager.limits.number-of-concurrent-dir-listings = infinity

that controls number of list requests in the PnfsManager list queue.
If this number is exceeded PnfsManager refuses to serve the request.

Result:
------
PnfsManager is more resilient against flood of concurrent list requests.

Patch: https://rb.dcache.org/r/14098/
Acked-by: Albert Rossi
Target: trunk
Request: 9.2
Require-notes: yes
  • Loading branch information
DmitryLitvintsev committed Sep 19, 2023
1 parent c1e3728 commit 67508e9
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
<property name="logSlowThreshold" value="${pnfsmanager.limits.log-slow-threshold}"/>
<property name="folding" value="${pnfsmanager.enable.folding}"/>
<property name="useParallelListing" value="${pnfsmanager.enable.parallel-listing}"/>
<property name="maxListRequestsInQueue"
value="#{T(org.dcache.util.Strings).parseInt('${pnfsmanager.limits.number-of-concurrent-dir-listings}')}"/>
<property name="directoryListLimit" value="${pnfsmanager.limits.list-chunk-size}"/>
<property name="permissionHandler" ref="permission-handler"/>
<property name="queueMaxSize" value="${pnfsmanager.limits.queue-length}"/>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,7 @@ public class PnfsManagerV3

private boolean useParentHashOnCreate;
private boolean useParallelListing;
private int maxListRequestsInQueue;

/**
* Whether to use folding.
Expand Down Expand Up @@ -343,6 +344,11 @@ public void setUseParallelListing(boolean useParallelListing) {
this.useParallelListing = useParallelListing;
}

@Required
public void setMaxListRequestsInQueue(int maxListRequestsInQueue) {
this.maxListRequestsInQueue = maxListRequestsInQueue;
}

@Required
public void setScheduledExecutor(ScheduledExecutorService executor) {
scheduledExecutor = executor;
Expand Down Expand Up @@ -2810,21 +2816,40 @@ public void messageArrived(CellMessage envelope, PnfsListDirectoryMessage messag
throws CacheException {

String path = message.getPnfsPath();

if (path == null) {
throw new InvalidMessageCacheException("Missing PNFS id and path");
}

/**
* when useParallelListing is true, we only have 1 queue in the
* list of queues below
*/
int index = 0;

if (!useParallelListing) {
index = (int)(Math.abs((long)Objects.hashCode(path.toString())) % _listThreads);
}
BlockingQueue<CellMessage> queue = _listQueues[index];

/**
* when useParallelListing is true, we only have 1 queue in the
* list of queues below
*/
if (!_listQueues[index].offer(envelope)) {
/**
* Do counts only if maxListRequestsInQueue is enabled
*/
if (maxListRequestsInQueue < Integer.MAX_VALUE) {
int counter = 0;
for (CellMessage i : queue) {
PnfsListDirectoryMessage msg = (PnfsListDirectoryMessage)i.getMessageObject();
if (msg.getPnfsPath().equals(path)) {
if (counter > maxListRequestsInQueue) {
LOGGER.warn("Too many list requests for the same directory {} in PnfsManager queue", path);
throw new MissingResourceCacheException("Too many list requests for the same directory in PndsManager queue");
}
counter += 1;
}
}
}

if (!queue.offer(envelope)) {
throw new MissingResourceCacheException("PnfsManager queue limit exceeded");
}
}
Expand Down
22 changes: 15 additions & 7 deletions skel/share/defaults/pnfsmanager.properties
Original file line number Diff line number Diff line change
Expand Up @@ -75,14 +75,14 @@ pnfsmanager.limits.threads = ${pnfsmanager.limits.threads-per-group}
# ---- Thread displatch mechanisms
#
# Experimental feature. Normally message processing is dispatched
# to the same thread in the thread pool associated with pnfsid (or path)
# of the namespace entry contained in the messagee. On massive
# uploads (create entries) to a single directory we observed
# to the same thread in the thread pool associated with pnfsid (or path)
# of the namespace entry contained in the messagee. On massive
# uploads (create entries) to a single directory we observed
# performance degradation caused by undelrying db back-end
# synchronization when updating mtime and link count of the target
# directory. This leads to all available threads being busy/hanging
# processing create entry messages denying other users from
# accessing the namespace. The switch below, if enabled, would cause
# directory. This leads to all available threads being busy/hanging
# processing create entry messages denying other users from
# accessing the namespace. The switch below, if enabled, would cause
# the create mesages to be dispatched to a thread associated
# with that entry's parent (that is the target directory).
#
Expand Down Expand Up @@ -126,7 +126,15 @@ pnfsmanager.limits.list-chunk-size = 100

(one-of?true|false)pnfsmanager.enable.parallel-listing = false


# ---- Determines how many simultaneous same directory listings to allow
#
# If number of allowed simultaneous listings of the same directory
# reaches the limit defined below the PnfsManager will reject
# new listing requests for that directory until the number of already
# queued and active listing requests for that directory drops
# below this number.
#
pnfsmanager.limits.number-of-concurrent-dir-listings = infinity

# ---- Threshold for when to log slow requests
#
Expand Down
1 change: 1 addition & 0 deletions skel/share/services/pnfsmanager.batch
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ check -strong pnfsmanager.enable.acl
check -strong pnfsmanager.default-retention-policy
check -strong pnfsmanager.default-access-latency
check -strong pnfsmanager.enable.parallel-listing
check -strong pnfsmanager.limits.number-of-concurrent-dir-listings
check pnfsmanager.destination.flush-notification
check pnfsmanager.destination.cache-notification
check pnfsmanager.destination.cancel-upload-notification
Expand Down

0 comments on commit 67508e9

Please sign in to comment.