Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mem free #91

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,14 +128,15 @@ EXTRA_NODES_PERIOD=60
* IDLE_TIME is related to the CLUES_Scheduler_PowOff_IDLE and is the time during which a working node has to be idle to be considered to be powered off.
* RECONSIDER_JOB_TIME is related to the CLUES_Scheduler_Reconsider_Jobs scheduler, and states the frequency (in seconds) that a job has to be in the queue before its resources are reconsidered.
* EXTRA_SLOTS_FREE is related to the CLUES_Scheduler_PowOn_Free scheduler and states how many slots should be free in the platform.
* EXTRA_MEM_FREE is also related to CLUES_Scheduler_PowOn_Free and states the amount of memory (in bytes) that are wanted to be free on a single node.
* EXTRA_NODES_PERIOD=60 is also related to CLUES_Scheduler_PowOn_Free and states the frequency of the scheduler. It is not executed all the time to try to avoid transient allocations.

Once this file is configured, we can use the templates in the /etc/clues2/conf.d folder to configure the SLURM and IPMI plugins. So we are creating the proper files:

```
$ cd /etc/clues2/conf.d/
$ cp plugin-slurm.cfg-example plugin-slurm.cfg
$ cp plugin-ipmi.cfg-example plugin-ipmi.cfg
$ cp plugin-slurm.cfg-example plugin-slurm.cfg
$ cp plugin-ipmi.cfg-example plugin-ipmi.cfg
```

You should check the variables in the ```/etc/clues2/conf.d/plugin-slurm.cfg``` file to match your platform, but the default values may suitable for you. The expected include getting the nodes, queues, jobs, etc.
Expand Down
35 changes: 24 additions & 11 deletions clueslib/schedulers.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,11 +544,12 @@ def schedule(self, requests_queue, monitoring_info, candidates_on, candidates_of

class CLUES_Scheduler_PowOn_Free(CLUES_Scheduler):
def __init__(self):
CLUES_Scheduler.__init__(self, "Power On extra nodes to maintain a set of slots or nodes free")
CLUES_Scheduler.__init__(self, "Power On extra nodes to maintain a set of slots, memory or nodes free")
cpyutils.config.read_config("scheduling",
{
"EXTRA_SLOTS_FREE": 0,
"EXTRA_NODES_FREE": 0,
"EXTRA_MEM_FREE": 0,
"EXTRA_NODES_PERIOD": 30
},
self)
Expand All @@ -569,6 +570,7 @@ def schedule(self, requests_queue, monitoring_info, candidates_on, candidates_of

# WARNING: this algorithm may be improved for better performance, but in this way it is easier to understand

max_nodes_mem_free = 0
slots_free = 0
nodes_free = 0
slots_powon = 0
Expand All @@ -582,26 +584,33 @@ def schedule(self, requests_queue, monitoring_info, candidates_on, candidates_of

for node in nodelist:
if node.state in [ Node.IDLE, Node.USED, Node.ON_ERR, Node.POW_ON ]:
# In these states, the free slots are usable
# In these states, the free slots and memory are usable
node_slots_free = max(0, node.slots_free_original) # When the resources are negative they are commited to be understood as unknown
node_memory_free = max(0, node.memory_free_original)

if node.name in candidates_off:
nodes_that_can_be_poweron_on.append((node_slots_free, node.name))
nodes_that_can_be_poweron_on.append((node_slots_free, node_memory_free, node.name))
else:
slots_free += node_slots_free
# Get the max memory free in a single node
if node_memory_free > max_nodes_mem_free:
max_nodes_mem_free = node_memory_free
if node.state == Node.IDLE:
nodes_free += 1

elif node.state in [ Node.OFF ]:
node_slots_free = max(0, node.slots_count) # When the resources are negative they are commited to be understood as unknown
nodes_that_can_be_poweron_off.append((node_slots_free, node.name))
node_memory_free = max(0, node.memory_total)
nodes_that_can_be_poweron_off.append((node_slots_free, node_memory_free, node.name))
elif node.state in [ Node.OFF_ERR ]:

# TODO: check if the cooldown of nodes is fulfilled
if node.power_on_operation_failed < config_scheduling.RETRIES_POWER_ON:
node_slots_free = max(0, node.slots_count) # When the resources are negative they are commited to be understood as unknown
nodes_that_can_be_poweron_off.append((node_slots_free, node.name))
node_memory_free = max(0, node.memory_total)
nodes_that_can_be_poweron_off.append((node_slots_free, node_memory_free, node.name))

mem_to_power_on = 0
slots_to_power_on = 0
nodes_to_power_on = 0
if slots_free < self.EXTRA_SLOTS_FREE:
Expand All @@ -610,33 +619,37 @@ def schedule(self, requests_queue, monitoring_info, candidates_on, candidates_of
if nodes_free < self.EXTRA_NODES_FREE:
nodes_to_power_on = self.EXTRA_NODES_FREE - nodes_free

if max_nodes_mem_free < self.EXTRA_MEM_FREE:
mem_to_power_on = 1

nodes_that_can_be_poweron_off.sort(key=lambda tup:tup[1])
nodes_that_can_be_poweron_on.sort(key=lambda tup:tup[1])
nodes_that_can_be_poweron = nodes_that_can_be_poweron_on + nodes_that_can_be_poweron_off

local_poweron = []
while ((len(nodes_that_can_be_poweron) > 0) and ((slots_to_power_on > 0) or (nodes_to_power_on >0))):
(slots_count, nname) = nodes_that_can_be_poweron.pop(0)
while ((len(nodes_that_can_be_poweron) > 0) and ((slots_to_power_on > 0) or (nodes_to_power_on > 0) or mem_to_power_on > 0)):
(slots_count, memory, nname) = nodes_that_can_be_poweron.pop(0)
node = nodelist.get_node(nname)

slots_to_power_on -= slots_count
# Power on nodes with enough memory
if memory >= mem_to_power_on:
mem_to_power_on = 0
if node.state in [ Node.IDLE, Node.POW_ON, Node.OFF, Node.OFF_ERR ]:
nodes_to_power_on -= 1

local_poweron.append(nname)

# _LOGGER.debug("would poweron %s; poweroff: %s" % (str(local_poweron), str(candidates_off)))

if len(local_poweron) > 0:
log = False
for node in local_poweron:
if node in candidates_off:
candidates_off.remove(node)
else:
if node not in candidates_on:
candidates_on[node] = []
if log:
_LOGGER.debug("will power on %s; still need %d slots and %d nodes" % (str(local_poweron), extra_slotcount, extra_nodecount))
_LOGGER.debug("will power on %s; still need %d slots %d memory and %d nodes" % (str(local_poweron), slots_to_power_on, mem_to_power_on, nodes_to_power_on))
else:
if (slots_to_power_on > 0) or (nodes_to_power_on > 0):
_LOGGER.debug("cannot power on any node but still need %d slots and %d nodes" % (slots_to_power_on, nodes_to_power_on))
Expand Down
3 changes: 3 additions & 0 deletions etc/clues2.cfg-example
Original file line number Diff line number Diff line change
Expand Up @@ -149,5 +149,8 @@ EXTRA_SLOTS_FREE=0
# Number of nodes that are wanted to be free (CLUES will power on some nodes)
EXTRA_NODES_FREE=0

# Amount of memory (in bytes) that are wanted to be free (CLUES will power on some nodes to get the extra memory)
EXTRA_MEM_FREE=0

# Frequence to run the extra slots or node scheduler
EXTRA_NODES_PERIOD=30
3 changes: 3 additions & 0 deletions etc/clues2.cfg-full-example
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ EXTRA_SLOTS_FREE=0
# Number of nodes that are wanted to be free (CLUES will power on some nodes)
EXTRA_NODES_FREE=0

# Amount of memory (in bytes) that are wanted to be free (CLUES will power on some nodes to get the extra memory)
EXTRA_MEM_FREE=0

# Frequence to run the extra slots or node scheduler
EXTRA_NODES_PERIOD=30

Expand Down