Skip to content
This repository has been archived by the owner on Jul 16, 2020. It is now read-only.

ciao-launcher: Fix unit test deadlocks #469

Merged
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
92 changes: 49 additions & 43 deletions ciao-launcher/instance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,9 +267,10 @@ func (v *instanceTestState) ClusterConfiguration() (payloads.Configure, error) {
return payloads.Configure{}, nil
}

func cleanupShutdownFail(t *testing.T, instance string, doneCh chan struct{}, ovsCh chan interface{}) {
func cleanupShutdownFail(t *testing.T, instance string, doneCh chan struct{}, ovsCh chan interface{}, wg *sync.WaitGroup) {
_ = os.RemoveAll(path.Join(instancesDir, instance))
shutdownInstanceLoop(doneCh, ovsCh)

shutdownInstanceLoop(doneCh, ovsCh, wg, t)
t.FailNow()
}

Expand Down Expand Up @@ -383,16 +384,26 @@ func (v *instanceTestState) restartInstance(t *testing.T, ovsCh chan interface{}
}
}

func shutdownInstanceLoop(doneCh chan struct{}, ovsCh chan interface{}) {
func shutdownInstanceLoop(doneCh chan struct{}, ovsCh chan interface{}, wg *sync.WaitGroup,
t *testing.T) {
close(doneCh)

timeout := time.After(time.Second * 5)
DONE:
for {
select {
case _, ok := <-ovsCh:
if !ok {
break DONE
}
default:
case <-ovsCh:
case <-timeout:
t.Error("Timedout waiting for instance loop to exit")
break DONE
case <-func() chan struct{} {
ch := make(chan struct{})
go func() {
wg.Wait()
close(ch)
}()
return ch
}():
break DONE
}
}
Expand All @@ -418,11 +429,10 @@ func TestStartInstanceLoop(t *testing.T) {
ac := &agentClient{conn: state, cmdCh: cmdWrapCh}
_ = startInstanceWithVM(state.instance, cfg, &wg, doneCh, ac, ovsCh, state, &storage.NoopDriver{})
ok := state.expectStatsUpdate(t, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
if !ok {
t.FailNow()
}
wg.Wait()
}

// Checks an instance loop can be deleted before an instance is launched.
Expand All @@ -449,12 +459,12 @@ func TestDeleteInstanceLoop(t *testing.T) {

ok := state.expectStatsUpdate(t, ovsCh)
if !ok {
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.FailNow()
}

if !state.deleteInstance(t, ovsCh, cmdCh) {
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.FailNow()
}
wg.Wait()
Expand Down Expand Up @@ -483,22 +493,22 @@ func TestStopNotRunning(t *testing.T) {

ok := state.expectStatsUpdate(t, ovsCh)
if !ok {
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.FailNow()
}

select {
case cmdCh <- &insStopCmd{}:
case <-time.After(time.Second):
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.Fatal("Timed out sending Stop command")
}

select {
case <-state.errorCh:
state.errorCh = nil
case <-time.After(time.Second):
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.Fatal("Timed out waiting for error channel")
}

Expand All @@ -508,7 +518,7 @@ func TestStopNotRunning(t *testing.T) {
}

if !state.deleteInstance(t, ovsCh, cmdCh) {
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.FailNow()
}
wg.Wait()
Expand All @@ -527,12 +537,12 @@ func startVMWithCFG(t *testing.T, wg *sync.WaitGroup, cfg *vmConfig, connect boo
state.ac = &agentClient{conn: state, cmdCh: make(chan *cmdWrapper)}
cmdCh := startInstanceWithVM(state.instance, cfg, wg, doneCh, state.ac, ovsCh, state, &storage.NoopDriver{})
if !state.expectStatsUpdate(t, ovsCh) {
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, wg, t)
t.FailNow()
}

if !state.startInstance(t, ovsCh, cmdCh, cfg, errorOk) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, wg)
}
return state, ovsCh, cmdCh, doneCh
}
Expand All @@ -552,7 +562,7 @@ func TestStartNotRunning(t *testing.T) {
state, ovsCh, cmdCh, doneCh := startVMWithCFG(t, &wg, &cfg, true, false)

if !state.deleteInstance(t, ovsCh, cmdCh) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, &wg)
}

wg.Wait()
Expand Down Expand Up @@ -593,14 +603,12 @@ func TestLoopShutdownWithRunningInstance(t *testing.T) {
cfg := standardCfg
_, ovsCh, _, doneCh := startVMWithCFG(t, &wg, &cfg, true, false)

shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)

// We need to remove the instance manually to have a clean setup for the
// subsequent tests.

_ = os.RemoveAll(path.Join(instancesDir, cfg.Instance))

wg.Wait()
}

// Check we can restart an instance
Expand Down Expand Up @@ -628,17 +636,16 @@ func TestRestart(t *testing.T) {
cmdCh := startInstanceWithVM(state.instance, &cfg, &wg, doneCh, ac, ovsCh, state, &storage.NoopDriver{})
ok := state.expectStatsUpdate(t, ovsCh)
if !ok {
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.FailNow()
}

if !state.restartInstance(t, ovsCh, cmdCh, false) {
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.FailNow()
}

shutdownInstanceLoop(doneCh, ovsCh)
wg.Wait()
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
}

// Check we can handle a restart error
Expand Down Expand Up @@ -666,7 +673,7 @@ func TestRestartFail(t *testing.T) {
cmdCh := startInstanceWithVM(state.instance, &cfg, &wg, doneCh, ac, ovsCh, state, &storage.NoopDriver{})
ok := state.expectStatsUpdate(t, ovsCh)
if !ok {
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.FailNow()
}

Expand All @@ -679,8 +686,7 @@ func TestRestartFail(t *testing.T) {
state.rf.Reason, payloads.RestartLaunchFailure)
}

shutdownInstanceLoop(doneCh, ovsCh)
wg.Wait()
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
}

// Check we get an error when starting an instance with an invalid image
Expand Down Expand Up @@ -710,11 +716,11 @@ func TestStartBadImage(t *testing.T) {
select {
case cmdCh <- acCmd.cmd:
case <-time.After(time.Second):
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.Fatal("Timed out sending suicide command")
}
case <-time.After(time.Second):
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.Fatal("Timedout waiting from suicide command")
}
wg.Wait()
Expand Down Expand Up @@ -742,30 +748,30 @@ func sendCommandDuringSuicide(t *testing.T, testCmd interface{}) *instanceTestSt
select {
case acCmd = <-state.ac.cmdCh:
case <-time.After(time.Second):
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.Fatal("Timedout waiting from suicide command")
}

state.errorCh = make(chan struct{})
select {
case cmdCh <- testCmd:
case <-time.After(time.Second):
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.Fatal("Timed out sending command during suicide")
}

select {
case <-state.errorCh:
state.errorCh = nil
case <-time.After(time.Second):
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.Fatal("Timed out waiting on error channel")
}

select {
case cmdCh <- acCmd.cmd:
case <-time.After(time.Second):
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.Fatal("Timed out sending suicide command")
}

Expand Down Expand Up @@ -855,15 +861,15 @@ func TestLostInstance(t *testing.T) {
close(state.monitorClosedCh)

if !waitForStateChange(t, ovsStopped, ovsCh) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, &wg)
}

// This gets closed by the instanceLoop and so will become available
// in the deleteInstance select loop if we don't set it to nil.
state.monitorCh = nil

if !state.deleteInstance(t, ovsCh, cmdCh) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, &wg)
}

wg.Wait()
Expand All @@ -884,7 +890,7 @@ func TestStartRunningInstance(t *testing.T) {
state, ovsCh, cmdCh, doneCh := startVMWithCFG(t, &wg, &cfg, true, false)

if !state.startInstance(t, ovsCh, cmdCh, &cfg, true) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, &wg)
}

if state.stf.Reason != payloads.AlreadyRunning {
Expand All @@ -893,7 +899,7 @@ func TestStartRunningInstance(t *testing.T) {
}

if !state.deleteInstance(t, ovsCh, cmdCh) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, &wg)
}

wg.Wait()
Expand Down Expand Up @@ -927,7 +933,7 @@ func TestAttachVolumeToInstance(t *testing.T) {
}

if !state.deleteInstance(t, ovsCh, cmdCh) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, &wg)
}

wg.Wait()
Expand Down Expand Up @@ -979,7 +985,7 @@ func TestAttachExistingVolumeToInstance(t *testing.T) {
}

if !state.deleteInstance(t, ovsCh, cmdCh) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, &wg)
}

wg.Wait()
Expand Down Expand Up @@ -1027,7 +1033,7 @@ func TestDetachVolumeFromInstance(t *testing.T) {
}

if !state.deleteInstance(t, ovsCh, cmdCh) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, &wg)
}

wg.Wait()
Expand Down Expand Up @@ -1063,7 +1069,7 @@ func TestDetachNonexistingVolumeFromInstance(t *testing.T) {
}

if !state.deleteInstance(t, ovsCh, cmdCh) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, &wg)
}

wg.Wait()
Expand Down