diff --git a/README.md b/README.md index a8b4f92..e75301c 100644 --- a/README.md +++ b/README.md @@ -128,14 +128,15 @@ EXTRA_NODES_PERIOD=60 * IDLE_TIME is related to the CLUES_Scheduler_PowOff_IDLE and is the time during which a working node has to be idle to be considered to be powered off. * RECONSIDER_JOB_TIME is related to the CLUES_Scheduler_Reconsider_Jobs scheduler, and states the frequency (in seconds) that a job has to be in the queue before its resources are reconsidered. * EXTRA_SLOTS_FREE is related to the CLUES_Scheduler_PowOn_Free scheduler and states how many slots should be free in the platform. +* EXTRA_MEM_FREE is also related to CLUES_Scheduler_PowOn_Free and states the amount of memory (in bytes) that are wanted to be free on a single node. * EXTRA_NODES_PERIOD=60 is also related to CLUES_Scheduler_PowOn_Free and states the frequency of the scheduler. It is not executed all the time to try to avoid transient allocations. - + Once this file is configured, we can use the templates in the /etc/clues2/conf.d folder to configure the SLURM and IPMI plugins. So we are creating the proper files: ``` $ cd /etc/clues2/conf.d/ -$ cp plugin-slurm.cfg-example plugin-slurm.cfg -$ cp plugin-ipmi.cfg-example plugin-ipmi.cfg +$ cp plugin-slurm.cfg-example plugin-slurm.cfg +$ cp plugin-ipmi.cfg-example plugin-ipmi.cfg ``` You should check the variables in the ```/etc/clues2/conf.d/plugin-slurm.cfg``` file to match your platform, but the default values may suitable for you. The expected include getting the nodes, queues, jobs, etc. diff --git a/clueslib/schedulers.py b/clueslib/schedulers.py index a315192..b530e7a 100644 --- a/clueslib/schedulers.py +++ b/clueslib/schedulers.py @@ -544,11 +544,12 @@ def schedule(self, requests_queue, monitoring_info, candidates_on, candidates_of class CLUES_Scheduler_PowOn_Free(CLUES_Scheduler): def __init__(self): - CLUES_Scheduler.__init__(self, "Power On extra nodes to maintain a set of slots or nodes free") + CLUES_Scheduler.__init__(self, "Power On extra nodes to maintain a set of slots, memory or nodes free") cpyutils.config.read_config("scheduling", { "EXTRA_SLOTS_FREE": 0, "EXTRA_NODES_FREE": 0, + "EXTRA_MEM_FREE": 0, "EXTRA_NODES_PERIOD": 30 }, self) @@ -569,6 +570,7 @@ def schedule(self, requests_queue, monitoring_info, candidates_on, candidates_of # WARNING: this algorithm may be improved for better performance, but in this way it is easier to understand + max_nodes_mem_free = 0 slots_free = 0 nodes_free = 0 slots_powon = 0 @@ -582,26 +584,33 @@ def schedule(self, requests_queue, monitoring_info, candidates_on, candidates_of for node in nodelist: if node.state in [ Node.IDLE, Node.USED, Node.ON_ERR, Node.POW_ON ]: - # In these states, the free slots are usable + # In these states, the free slots and memory are usable node_slots_free = max(0, node.slots_free_original) # When the resources are negative they are commited to be understood as unknown + node_memory_free = max(0, node.memory_free_original) if node.name in candidates_off: - nodes_that_can_be_poweron_on.append((node_slots_free, node.name)) + nodes_that_can_be_poweron_on.append((node_slots_free, node_memory_free, node.name)) else: slots_free += node_slots_free + # Get the max memory free in a single node + if node_memory_free > max_nodes_mem_free: + max_nodes_mem_free = node_memory_free if node.state == Node.IDLE: nodes_free += 1 elif node.state in [ Node.OFF ]: node_slots_free = max(0, node.slots_count) # When the resources are negative they are commited to be understood as unknown - nodes_that_can_be_poweron_off.append((node_slots_free, node.name)) + node_memory_free = max(0, node.memory_total) + nodes_that_can_be_poweron_off.append((node_slots_free, node_memory_free, node.name)) elif node.state in [ Node.OFF_ERR ]: # TODO: check if the cooldown of nodes is fulfilled if node.power_on_operation_failed < config_scheduling.RETRIES_POWER_ON: node_slots_free = max(0, node.slots_count) # When the resources are negative they are commited to be understood as unknown - nodes_that_can_be_poweron_off.append((node_slots_free, node.name)) + node_memory_free = max(0, node.memory_total) + nodes_that_can_be_poweron_off.append((node_slots_free, node_memory_free, node.name)) + mem_to_power_on = 0 slots_to_power_on = 0 nodes_to_power_on = 0 if slots_free < self.EXTRA_SLOTS_FREE: @@ -610,33 +619,37 @@ def schedule(self, requests_queue, monitoring_info, candidates_on, candidates_of if nodes_free < self.EXTRA_NODES_FREE: nodes_to_power_on = self.EXTRA_NODES_FREE - nodes_free + if max_nodes_mem_free < self.EXTRA_MEM_FREE: + mem_to_power_on = 1 + nodes_that_can_be_poweron_off.sort(key=lambda tup:tup[1]) nodes_that_can_be_poweron_on.sort(key=lambda tup:tup[1]) nodes_that_can_be_poweron = nodes_that_can_be_poweron_on + nodes_that_can_be_poweron_off local_poweron = [] - while ((len(nodes_that_can_be_poweron) > 0) and ((slots_to_power_on > 0) or (nodes_to_power_on >0))): - (slots_count, nname) = nodes_that_can_be_poweron.pop(0) + while ((len(nodes_that_can_be_poweron) > 0) and ((slots_to_power_on > 0) or (nodes_to_power_on > 0) or mem_to_power_on > 0)): + (slots_count, memory, nname) = nodes_that_can_be_poweron.pop(0) node = nodelist.get_node(nname) slots_to_power_on -= slots_count + # Power on nodes with enough memory + if memory >= mem_to_power_on: + mem_to_power_on = 0 if node.state in [ Node.IDLE, Node.POW_ON, Node.OFF, Node.OFF_ERR ]: nodes_to_power_on -= 1 - + local_poweron.append(nname) # _LOGGER.debug("would poweron %s; poweroff: %s" % (str(local_poweron), str(candidates_off))) if len(local_poweron) > 0: - log = False for node in local_poweron: if node in candidates_off: candidates_off.remove(node) else: if node not in candidates_on: candidates_on[node] = [] - if log: - _LOGGER.debug("will power on %s; still need %d slots and %d nodes" % (str(local_poweron), extra_slotcount, extra_nodecount)) + _LOGGER.debug("will power on %s; still need %d slots %d memory and %d nodes" % (str(local_poweron), slots_to_power_on, mem_to_power_on, nodes_to_power_on)) else: if (slots_to_power_on > 0) or (nodes_to_power_on > 0): _LOGGER.debug("cannot power on any node but still need %d slots and %d nodes" % (slots_to_power_on, nodes_to_power_on)) diff --git a/etc/clues2.cfg-example b/etc/clues2.cfg-example index bc2ea85..cf25215 100644 --- a/etc/clues2.cfg-example +++ b/etc/clues2.cfg-example @@ -149,5 +149,8 @@ EXTRA_SLOTS_FREE=0 # Number of nodes that are wanted to be free (CLUES will power on some nodes) EXTRA_NODES_FREE=0 +# Amount of memory (in bytes) that are wanted to be free (CLUES will power on some nodes to get the extra memory) +EXTRA_MEM_FREE=0 + # Frequence to run the extra slots or node scheduler EXTRA_NODES_PERIOD=30 \ No newline at end of file diff --git a/etc/clues2.cfg-full-example b/etc/clues2.cfg-full-example index ce123aa..7e25900 100644 --- a/etc/clues2.cfg-full-example +++ b/etc/clues2.cfg-full-example @@ -155,6 +155,9 @@ EXTRA_SLOTS_FREE=0 # Number of nodes that are wanted to be free (CLUES will power on some nodes) EXTRA_NODES_FREE=0 +# Amount of memory (in bytes) that are wanted to be free (CLUES will power on some nodes to get the extra memory) +EXTRA_MEM_FREE=0 + # Frequence to run the extra slots or node scheduler EXTRA_NODES_PERIOD=30