ciao-launcher: Limit the number of parallel starts

This commit limits the number of parallel starts to a function of the number of CPUs present in the node. There really isn't much point in allowing 1000 instances to be started on the same node at the same time. Doing so won't increase the start times much and will increase the likelihood of failure due to the resource exhaustion caused by the heavy demands of instance startup. Fixes ciao-project#8 Signed-off-by: Mark Ryan <[email protected]>
markdryan · Jul 6, 2016 · 3fbd92d · 3fbd92d · tpepper · Jul 6, 2016
1 parent b68ed47
commit 3fbd92d
Showing 1 changed file with 15 additions and 0 deletions.
diff --git a/ciao-launcher/instance.go b/ciao-launcher/instance.go
@@ -18,6 +18,7 @@ package main
 
 import (
 	"path"
+	"runtime"
 	"sync"
 	"time"
 
@@ -61,6 +62,12 @@ type insDeleteCmd struct {
 type insStopCmd struct{}
 type insMonitorCmd struct{}
 
+var startSemaphore chan struct{}
+
+func init() {
+	startSemaphore = make(chan struct{}, runtime.NumCPU()*2)
+}
+
 /*
 This functions asks the server loop to kill the instance.  An instance
 needs to request that the server loop kill it if Start fails completly.
@@ -103,7 +110,15 @@ func (id *instanceData) startCommand(cmd *insStartCmd) {
 		startErr.send(id.ac.conn, id.instance)
 		return
 	}
+
+	select {
+	case startSemaphore <- struct{}{}:
+	case <-id.doneCh:
+		glog.Warningf("Abandoning instance %s start due to shutdown", id.instance)
+		return
+	}
 	st, startErr := processStart(cmd, id.instanceDir, id.vm, id.ac.conn)
+	_ = <-startSemaphore
 	if startErr != nil {
 		glog.Errorf("Unable to start instance[%s]: %v", string(startErr.code), startErr.err)
 		startErr.send(id.ac.conn, id.instance)