From 88c7df9d5f848c425634befa811446b18ed2632c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosme=20Dom=C3=ADnguez=20D=C3=ADaz?= Date: Sat, 29 May 2021 23:19:46 +0200 Subject: [PATCH 1/3] Add ClusterVMIDNodeGet() to support all nodes in the cluster --- proxmox.go | 26 +++++++++++++++ proxmoxdriver.go | 84 +++++++++++++++++++++++++++++++++++------------- 2 files changed, 88 insertions(+), 22 deletions(-) diff --git a/proxmox.go b/proxmox.go index 16ae42f..f024203 100644 --- a/proxmox.go +++ b/proxmox.go @@ -283,6 +283,32 @@ func (p ProxmoxVE) NodesNodeStorageStorageContentPost(node string, storage strin return diskname, err } +type VMResourcesReturnParameter struct { + Data []struct { + Node string `json:"node"` + Vmid int `json:"vmid"` + } `json:"data"` +} + +func (p ProxmoxVE) ClusterVMIDNodeGet(vmid string) (node string, err error) { + path := "/cluster/resources" + + response, err := p.client.R().SetQueryParams(map[string]string{"type": "vm"}).Get(p.getURL(path)) + + id, _ := strconv.Atoi(vmid) + + var r VMResourcesReturnParameter + err = json.Unmarshal([]byte(response.String()), &r) + + for _, vm := range r.Data { + if vm.Vmid == id { + return vm.Node, err + } + } + + return p.Node, err +} + // ClusterNextIDGet Get next free VMID. If you pass an VMID it will raise an error if the ID is already used. func (p ProxmoxVE) ClusterNextIDGet(id int) (vmid string, err error) { path := "/cluster/nextid" diff --git a/proxmoxdriver.go b/proxmoxdriver.go index bc39d0d..e324de2 100644 --- a/proxmoxdriver.go +++ b/proxmoxdriver.go @@ -333,8 +333,13 @@ func (d *Driver) ping() bool { return false } + node, nodeErr := d.driver.ClusterVMIDNodeGet(d.VMID) + if nodeErr != nil { + return false + } + command := NodesNodeQemuVMIDAgentPostParameter{Command: "ping"} - err := d.driver.NodesNodeQemuVMIDAgentPost(d.Node, d.VMID, &command) + err := d.driver.NodesNodeQemuVMIDAgentPost(node, d.VMID, &command) if err != nil { d.debug(err) @@ -444,7 +449,12 @@ func (d *Driver) GetNetVlanTag() int { func (d *Driver) GetIP() (string, error) { d.connectAPI() - ip, err := d.driver.GetEth0IPv4(d.Node, d.VMID) + node, err := d.driver.ClusterVMIDNodeGet(d.VMID) + if err != nil { + return "", err + } + + ip, err := d.driver.GetEth0IPv4(node, d.VMID) if err != nil { // TODO: should we return the cached IP here? return ip, err @@ -485,8 +495,13 @@ func (d *Driver) GetState() (state.State, error) { return state.Error, err } + node, err := d.driver.ClusterVMIDNodeGet(d.VMID) + if err != nil { + return state.Error, err + } + // sanity check the UUID - config, err := d.driver.GetConfig(d.Node, d.VMID) + config, err := d.driver.GetConfig(node, d.VMID) if err != nil { return state.Error, err } @@ -514,7 +529,7 @@ func (d *Driver) GetState() (state.State, error) { return state.Starting, nil } - pveState, err := d.driver.NodesNodeQemuVMIDStatusCurrentGet(d.Node, d.VMID) + pveState, err := d.driver.NodesNodeQemuVMIDStatusCurrentGet(node, d.VMID) if err != nil { return state.Error, err } @@ -906,7 +921,7 @@ func (d *Driver) Create() error { } // wait for network to come up - err = d.waitForNetwork() + err = d.waitForNetwork(d.Node, d.VMID) // set the IPAddress _, err = d.GetIP() @@ -939,7 +954,7 @@ func (d *Driver) waitForQemuGuestAgent() error { return nil } -func (d *Driver) waitForNetwork() error { +func (d *Driver) waitForNetwork(node string, vmid string) error { d.debugf("waiting for VM network to start") d.connectAPI() @@ -948,7 +963,7 @@ func (d *Driver) waitForNetwork() error { var err error for !up { - ip, err = d.driver.GetEth0IPv4(d.Node, d.VMID) + ip, err = d.driver.GetEth0IPv4(node, vmid) if err != nil { d.debugf("waiting for VM network to start") time.Sleep(5 * time.Second) @@ -1047,8 +1062,13 @@ func (d *Driver) Start() error { return err } + node, err := d.driver.ClusterVMIDNodeGet(d.VMID) + if err != nil { + return err + } + // sanity check the UUID - config, err := d.driver.GetConfig(d.Node, d.VMID) + config, err := d.driver.GetConfig(node, d.VMID) if err != nil { return err } @@ -1058,12 +1078,12 @@ func (d *Driver) Start() error { return fmt.Errorf("UUID mismatch - %s (stored) vs %s (current)", d.VMUUID, cVMMUUID) } - taskid, err := d.driver.NodesNodeQemuVMIDStatusStartPost(d.Node, d.VMID) + taskid, err := d.driver.NodesNodeQemuVMIDStatusStartPost(node, d.VMID) if err != nil { return err } - err = d.driver.WaitForTaskToComplete(d.Node, taskid) + err = d.driver.WaitForTaskToComplete(node, taskid) return err } @@ -1079,8 +1099,13 @@ func (d *Driver) Stop() error { return err } + node, err := d.driver.ClusterVMIDNodeGet(d.VMID) + if err != nil { + return err + } + // sanity check the UUID - config, err := d.driver.GetConfig(d.Node, d.VMID) + config, err := d.driver.GetConfig(node, d.VMID) if err != nil { return err } @@ -1091,12 +1116,12 @@ func (d *Driver) Stop() error { } // shutdown - taskid, err := d.driver.NodesNodeQemuVMIDStatusShutdownPost(d.Node, d.VMID) + taskid, err := d.driver.NodesNodeQemuVMIDStatusShutdownPost(node, d.VMID) if err != nil { return err } - err = d.driver.WaitForTaskToComplete(d.Node, taskid) + err = d.driver.WaitForTaskToComplete(node, taskid) return err } @@ -1112,8 +1137,13 @@ func (d *Driver) Restart() error { return err } + node, err := d.driver.ClusterVMIDNodeGet(d.VMID) + if err != nil { + return err + } + // sanity check the UUID - config, err := d.driver.GetConfig(d.Node, d.VMID) + config, err := d.driver.GetConfig(node, d.VMID) if err != nil { return err } @@ -1124,12 +1154,12 @@ func (d *Driver) Restart() error { } // reboot - taskid, err := d.driver.NodesNodeQemuVMIDStatusRebootPost(d.Node, d.VMID) + taskid, err := d.driver.NodesNodeQemuVMIDStatusRebootPost(node, d.VMID) if err != nil { return err } - err = d.driver.WaitForTaskToComplete(d.Node, taskid) + err = d.driver.WaitForTaskToComplete(node, taskid) return err } @@ -1145,8 +1175,13 @@ func (d *Driver) Kill() error { return err } + node, err := d.driver.ClusterVMIDNodeGet(d.VMID) + if err != nil { + return err + } + // sanity check the UUID - config, err := d.driver.GetConfig(d.Node, d.VMID) + config, err := d.driver.GetConfig(node, d.VMID) if err != nil { return err } @@ -1157,12 +1192,12 @@ func (d *Driver) Kill() error { } // stop - taskid, err := d.driver.NodesNodeQemuVMIDStatusStopPost(d.Node, d.VMID) + taskid, err := d.driver.NodesNodeQemuVMIDStatusStopPost(node, d.VMID) if err != nil { return err } - err = d.driver.WaitForTaskToComplete(d.Node, taskid) + err = d.driver.WaitForTaskToComplete(node, taskid) return err } @@ -1178,8 +1213,13 @@ func (d *Driver) Remove() error { return err } + node, err := d.driver.ClusterVMIDNodeGet(d.VMID) + if err != nil { + return err + } + // sanity check the UUID - config, err := d.driver.GetConfig(d.Node, d.VMID) + config, err := d.driver.GetConfig(node, d.VMID) if err != nil { return err } @@ -1195,12 +1235,12 @@ func (d *Driver) Remove() error { return err } - taskid, err := d.driver.NodesNodeQemuVMIDDelete(d.Node, d.VMID) + taskid, err := d.driver.NodesNodeQemuVMIDDelete(node, d.VMID) if err != nil { return err } - err = d.driver.WaitForTaskToComplete(d.Node, taskid) + err = d.driver.WaitForTaskToComplete(node, taskid) return err } From dd168b513c9e0b1e0345de727c07ea7d775272d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosme=20Dom=C3=ADnguez=20D=C3=ADaz?= Date: Mon, 31 May 2021 00:36:44 +0200 Subject: [PATCH 2/3] Use random cluster nodes for VM creation. A rudimentary way of trying to distribute the machines in the cluster. --- proxmox.go | 29 +++++++++++++++++++++++++++++ proxmoxdriver.go | 33 +++++++++++++++++++-------------- 2 files changed, 48 insertions(+), 14 deletions(-) diff --git a/proxmox.go b/proxmox.go index f024203..2802f50 100644 --- a/proxmox.go +++ b/proxmox.go @@ -14,6 +14,7 @@ import ( "strconv" "strings" "time" + "math/rand" "github.com/labstack/gommon/log" resty "gopkg.in/resty.v1" @@ -326,6 +327,34 @@ func (p ProxmoxVE) ClusterNextIDGet(id int) (vmid string, err error) { return vmid, err } +type NodeReturnParameter struct { + Data []struct { + Node string `json:"node"` + Status string `json:"status"` + } `json:"data"` +} + +func (p ProxmoxVE) ClusterNodeGet() (node string, err error) { + path := "/cluster/resources" + + response, err := p.client.R().SetQueryParams(map[string]string{"type": "node"}).Get(p.getURL(path)) + + var r NodeReturnParameter + var nodeList []string + err = json.Unmarshal([]byte(response.String()), &r) + + for _, n := range r.Data { + if n.Status == "online" { + nodeList = append(nodeList, n.Node) + } + } + + rand.Seed(time.Now().Unix()) + n := rand.Int() % len(nodeList) + + return nodeList[n], err +} + // NodesNodeQemuPostParameter represents the input data for /nodes/{node}/qemu // Original Description: // Create or restore a virtual machine. diff --git a/proxmoxdriver.go b/proxmoxdriver.go index e324de2..7a3af33 100644 --- a/proxmoxdriver.go +++ b/proxmoxdriver.go @@ -684,6 +684,11 @@ func (d *Driver) Create() error { d.debugf("Next ID is '%s'", id) d.VMID = id + node, err := d.driver.ClusterNodeGet() + if err != nil { + return err + } + switch d.ProvisionStrategy { case "cdrom": @@ -697,7 +702,7 @@ func (d *Driver) Create() error { } d.debugf("Creating disk volume '%s' with size '%s'", volume.Filename, volume.Size) - diskname, err := d.driver.NodesNodeStorageStorageContentPost(d.Node, d.Storage, &volume) + diskname, err := d.driver.NodesNodeStorageStorageContentPost(node, d.Storage, &volume) if err != nil { return err } @@ -754,12 +759,12 @@ func (d *Driver) Create() error { } } d.debugf("Creating VM '%s' with '%d' of memory", npp.VMID, npp.Memory) - taskid, err := d.driver.NodesNodeQemuPost(d.Node, &npp) + taskid, err := d.driver.NodesNodeQemuPost(node, &npp) if err != nil { return err } - err = d.driver.WaitForTaskToComplete(d.Node, taskid) + err = d.driver.WaitForTaskToComplete(node, taskid) if err != nil { return err } @@ -767,12 +772,12 @@ func (d *Driver) Create() error { if d.CiEnabled == "1" { // specially handle setting sshkeys // https://forum.proxmox.com/threads/how-to-use-pvesh-set-vms-sshkeys.52570/ - taskid, err = d.driver.NodesNodeQemuVMIDConfigSetSSHKeys(d.Node, d.VMID, key) + taskid, err = d.driver.NodesNodeQemuVMIDConfigSetSSHKeys(node, d.VMID, key) if err != nil { return err } - err = d.driver.WaitForTaskToComplete(d.Node, taskid) + err = d.driver.WaitForTaskToComplete(node, taskid) if err != nil { return err } @@ -805,12 +810,12 @@ func (d *Driver) Create() error { d.debugf("cloning template id '%s' as vmid '%s'", d.CloneVMID, clone.Newid) - taskid, err := d.driver.NodesNodeQemuVMIDClonePost(d.Node, d.CloneVMID, &clone) + taskid, err := d.driver.NodesNodeQemuVMIDClonePost(node, d.CloneVMID, &clone) if err != nil { return err } - err = d.driver.WaitForTaskToComplete(d.Node, taskid) + err = d.driver.WaitForTaskToComplete(node, taskid) if err != nil { return err } @@ -822,7 +827,7 @@ func (d *Driver) Create() error { } d.debugf("resizing disk '%s' on vmid '%s' to '%s'", resize.Disk, d.VMID, resize.Size) - err = d.driver.NodesNodeQemuVMIDResizePut(d.Node, d.VMID, &resize) + err = d.driver.NodesNodeQemuVMIDResizePut(node, d.VMID, &resize) if err != nil { return err } @@ -853,14 +858,14 @@ func (d *Driver) Create() error { npp.CPU = d.CPU } - taskid, err = d.driver.NodesNodeQemuVMIDConfigPost(d.Node, d.VMID, &npp) + taskid, err = d.driver.NodesNodeQemuVMIDConfigPost(node, d.VMID, &npp) if err != nil { return err } // append newly minted ssh key to existing (if any) d.debugf("retrieving existing cloud-init sshkeys from vmid '%s'", d.VMID) - config, err := d.driver.GetConfig(d.Node, d.CloneVMID) + config, err := d.driver.GetConfig(node, d.CloneVMID) if err != nil { return err } @@ -882,12 +887,12 @@ func (d *Driver) Create() error { // specially handle setting sshkeys // https://forum.proxmox.com/threads/how-to-use-pvesh-set-vms-sshkeys.52570/ - taskid, err = d.driver.NodesNodeQemuVMIDConfigSetSSHKeys(d.Node, d.VMID, SSHKeys) + taskid, err = d.driver.NodesNodeQemuVMIDConfigSetSSHKeys(node, d.VMID, SSHKeys) if err != nil { return err } - err = d.driver.WaitForTaskToComplete(d.Node, taskid) + err = d.driver.WaitForTaskToComplete(node, taskid) if err != nil { return err } @@ -897,7 +902,7 @@ func (d *Driver) Create() error { } // Set the newly minted UUID - config, err := d.driver.GetConfig(d.Node, d.VMID) + config, err := d.driver.GetConfig(node, d.VMID) if err != nil { return err } @@ -921,7 +926,7 @@ func (d *Driver) Create() error { } // wait for network to come up - err = d.waitForNetwork(d.Node, d.VMID) + err = d.waitForNetwork(node, d.VMID) // set the IPAddress _, err = d.GetIP() From ad744026a1d457c9e2baf3a15076f437a42f94d9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cosme=20Dom=C3=ADnguez=20D=C3=ADaz?= Date: Sun, 30 May 2021 19:34:45 +0200 Subject: [PATCH 3/3] Increased sleep values. Avoids problems in large clusters. --- proxmox.go | 4 ++-- proxmoxdriver.go | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/proxmox.go b/proxmox.go index 2802f50..b464188 100644 --- a/proxmox.go +++ b/proxmox.go @@ -834,8 +834,8 @@ func (p ProxmoxVE) WaitForTaskToComplete(node string, taskid string) error { log.Infof("exiting with %s", tsr.Exitstatus) return nil } - log.Info("still running, waiting 500ms") - time.Sleep(500 * time.Millisecond) + log.Info("still running, waiting 1s") + time.Sleep(time.Second) } // unreachable code return nil diff --git a/proxmoxdriver.go b/proxmoxdriver.go index 7a3af33..2969164 100644 --- a/proxmoxdriver.go +++ b/proxmoxdriver.go @@ -635,9 +635,11 @@ func (d *Driver) Create() error { // add some random wait time here to help with race conditions in the proxmox api // the app could be getting invoked several times in rapid succession so some small waits may be helpful mrand.Seed(time.Now().UnixNano()) // Seed the random number generator using the current time (nanoseconds since epoch) - r := mrand.Intn(5000) - d.debugf("sleeping %d milliseconds before retrieving next ID", r) - time.Sleep(time.Duration(r) * time.Millisecond) + min := 5 + max := 15 + r := mrand.Intn(max - min + 1) + min + d.debugf("sleeping %d seconds before retrieving next ID", r) + time.Sleep(time.Duration(r) * time.Second) // get next available VMID // NOTE: we want to lock in the ID as quickly as possible after retrieving (ie: invoke QemuPost or Clone ASAP to avoid race conditions with other instances) @@ -916,8 +918,8 @@ func (d *Driver) Create() error { } // let VM start a settle a little - d.debugf("waiting for VM to start, wait 10 seconds") - time.Sleep(10 * time.Second) + d.debugf("waiting for VM to start, wait 15 seconds") + time.Sleep(15 * time.Second) // wait for qemu-guest-agent err = d.waitForQemuGuestAgent() @@ -953,7 +955,7 @@ func (d *Driver) waitForQemuGuestAgent() error { d.connectAPI() for !d.ping() { d.debugf("waiting for VM qemu-guest-agent to start") - time.Sleep(5 * time.Second) + time.Sleep(10 * time.Second) } return nil @@ -971,14 +973,14 @@ func (d *Driver) waitForNetwork(node string, vmid string) error { ip, err = d.driver.GetEth0IPv4(node, vmid) if err != nil { d.debugf("waiting for VM network to start") - time.Sleep(5 * time.Second) + time.Sleep(10 * time.Second) } else { if len(ip) > 0 { up = true d.debugf("VM network started with ip: %s", ip) } else { d.debugf("waiting for VM network to start") - time.Sleep(5 * time.Second) + time.Sleep(10 * time.Second) } } }