Skip to content
This repository has been archived by the owner on Jul 16, 2020. It is now read-only.

Commit

Permalink
ciao-launcher: Fix unit test deadlocks
Browse files Browse the repository at this point in the history
The code in instance_test.go that waited for the instance loop to close
down was incorrect.  There was a possibility of deadlock if the instance
loop was sending some stats down the overseer channel at the same time
as the test was trying to shut down the instance.  There is similar code
in the overseer which actually shuts down the instance loop correctly.
This commit simply ports the good overseer code over to instance_test.go.

Partial fix for #434

Signed-off-by: Mark Ryan <[email protected]>
  • Loading branch information
Mark Ryan committed Aug 17, 2016
1 parent 2abcb88 commit b764708
Showing 1 changed file with 49 additions and 43 deletions.
92 changes: 49 additions & 43 deletions ciao-launcher/instance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -267,9 +267,10 @@ func (v *instanceTestState) ClusterConfiguration() (payloads.Configure, error) {
return payloads.Configure{}, nil
}

func cleanupShutdownFail(t *testing.T, instance string, doneCh chan struct{}, ovsCh chan interface{}) {
func cleanupShutdownFail(t *testing.T, instance string, doneCh chan struct{}, ovsCh chan interface{}, wg *sync.WaitGroup) {
_ = os.RemoveAll(path.Join(instancesDir, instance))
shutdownInstanceLoop(doneCh, ovsCh)

shutdownInstanceLoop(doneCh, ovsCh, wg, t)
t.FailNow()
}

Expand Down Expand Up @@ -383,16 +384,26 @@ func (v *instanceTestState) restartInstance(t *testing.T, ovsCh chan interface{}
}
}

func shutdownInstanceLoop(doneCh chan struct{}, ovsCh chan interface{}) {
func shutdownInstanceLoop(doneCh chan struct{}, ovsCh chan interface{}, wg *sync.WaitGroup,
t *testing.T) {
close(doneCh)

timeout := time.After(time.Second * 5)
DONE:
for {
select {
case _, ok := <-ovsCh:
if !ok {
break DONE
}
default:
case <-ovsCh:
case <-timeout:
t.Error("Timedout waiting for instance loop to exit")
break DONE
case <-func() chan struct{} {
ch := make(chan struct{})
go func() {
wg.Wait()
close(ch)
}()
return ch
}():
break DONE
}
}
Expand All @@ -418,11 +429,10 @@ func TestStartInstanceLoop(t *testing.T) {
ac := &agentClient{conn: state, cmdCh: cmdWrapCh}
_ = startInstanceWithVM(state.instance, cfg, &wg, doneCh, ac, ovsCh, state, &storage.NoopDriver{})
ok := state.expectStatsUpdate(t, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
if !ok {
t.FailNow()
}
wg.Wait()
}

// Checks an instance loop can be deleted before an instance is launched.
Expand All @@ -449,12 +459,12 @@ func TestDeleteInstanceLoop(t *testing.T) {

ok := state.expectStatsUpdate(t, ovsCh)
if !ok {
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.FailNow()
}

if !state.deleteInstance(t, ovsCh, cmdCh) {
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.FailNow()
}
wg.Wait()
Expand Down Expand Up @@ -483,22 +493,22 @@ func TestStopNotRunning(t *testing.T) {

ok := state.expectStatsUpdate(t, ovsCh)
if !ok {
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.FailNow()
}

select {
case cmdCh <- &insStopCmd{}:
case <-time.After(time.Second):
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.Fatal("Timed out sending Stop command")
}

select {
case <-state.errorCh:
state.errorCh = nil
case <-time.After(time.Second):
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.Fatal("Timed out waiting for error channel")
}

Expand All @@ -508,7 +518,7 @@ func TestStopNotRunning(t *testing.T) {
}

if !state.deleteInstance(t, ovsCh, cmdCh) {
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.FailNow()
}
wg.Wait()
Expand All @@ -527,12 +537,12 @@ func startVMWithCFG(t *testing.T, wg *sync.WaitGroup, cfg *vmConfig, connect boo
state.ac = &agentClient{conn: state, cmdCh: make(chan *cmdWrapper)}
cmdCh := startInstanceWithVM(state.instance, cfg, wg, doneCh, state.ac, ovsCh, state, &storage.NoopDriver{})
if !state.expectStatsUpdate(t, ovsCh) {
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, wg, t)
t.FailNow()
}

if !state.startInstance(t, ovsCh, cmdCh, cfg, errorOk) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, wg)
}
return state, ovsCh, cmdCh, doneCh
}
Expand All @@ -552,7 +562,7 @@ func TestStartNotRunning(t *testing.T) {
state, ovsCh, cmdCh, doneCh := startVMWithCFG(t, &wg, &cfg, true, false)

if !state.deleteInstance(t, ovsCh, cmdCh) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, &wg)
}

wg.Wait()
Expand Down Expand Up @@ -593,14 +603,12 @@ func TestLoopShutdownWithRunningInstance(t *testing.T) {
cfg := standardCfg
_, ovsCh, _, doneCh := startVMWithCFG(t, &wg, &cfg, true, false)

shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)

// We need to remove the instance manually to have a clean setup for the
// subsequent tests.

_ = os.RemoveAll(path.Join(instancesDir, cfg.Instance))

wg.Wait()
}

// Check we can restart an instance
Expand Down Expand Up @@ -628,17 +636,16 @@ func TestRestart(t *testing.T) {
cmdCh := startInstanceWithVM(state.instance, &cfg, &wg, doneCh, ac, ovsCh, state, &storage.NoopDriver{})
ok := state.expectStatsUpdate(t, ovsCh)
if !ok {
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.FailNow()
}

if !state.restartInstance(t, ovsCh, cmdCh, false) {
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.FailNow()
}

shutdownInstanceLoop(doneCh, ovsCh)
wg.Wait()
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
}

// Check we can handle a restart error
Expand Down Expand Up @@ -666,7 +673,7 @@ func TestRestartFail(t *testing.T) {
cmdCh := startInstanceWithVM(state.instance, &cfg, &wg, doneCh, ac, ovsCh, state, &storage.NoopDriver{})
ok := state.expectStatsUpdate(t, ovsCh)
if !ok {
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.FailNow()
}

Expand All @@ -679,8 +686,7 @@ func TestRestartFail(t *testing.T) {
state.rf.Reason, payloads.RestartLaunchFailure)
}

shutdownInstanceLoop(doneCh, ovsCh)
wg.Wait()
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
}

// Check we get an error when starting an instance with an invalid image
Expand Down Expand Up @@ -710,11 +716,11 @@ func TestStartBadImage(t *testing.T) {
select {
case cmdCh <- acCmd.cmd:
case <-time.After(time.Second):
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.Fatal("Timed out sending suicide command")
}
case <-time.After(time.Second):
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.Fatal("Timedout waiting from suicide command")
}
wg.Wait()
Expand Down Expand Up @@ -742,30 +748,30 @@ func sendCommandDuringSuicide(t *testing.T, testCmd interface{}) *instanceTestSt
select {
case acCmd = <-state.ac.cmdCh:
case <-time.After(time.Second):
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.Fatal("Timedout waiting from suicide command")
}

state.errorCh = make(chan struct{})
select {
case cmdCh <- testCmd:
case <-time.After(time.Second):
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.Fatal("Timed out sending command during suicide")
}

select {
case <-state.errorCh:
state.errorCh = nil
case <-time.After(time.Second):
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.Fatal("Timed out waiting on error channel")
}

select {
case cmdCh <- acCmd.cmd:
case <-time.After(time.Second):
shutdownInstanceLoop(doneCh, ovsCh)
shutdownInstanceLoop(doneCh, ovsCh, &wg, t)
t.Fatal("Timed out sending suicide command")
}

Expand Down Expand Up @@ -855,15 +861,15 @@ func TestLostInstance(t *testing.T) {
close(state.monitorClosedCh)

if !waitForStateChange(t, ovsStopped, ovsCh) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, &wg)
}

// This gets closed by the instanceLoop and so will become available
// in the deleteInstance select loop if we don't set it to nil.
state.monitorCh = nil

if !state.deleteInstance(t, ovsCh, cmdCh) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, &wg)
}

wg.Wait()
Expand All @@ -884,7 +890,7 @@ func TestStartRunningInstance(t *testing.T) {
state, ovsCh, cmdCh, doneCh := startVMWithCFG(t, &wg, &cfg, true, false)

if !state.startInstance(t, ovsCh, cmdCh, &cfg, true) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, &wg)
}

if state.stf.Reason != payloads.AlreadyRunning {
Expand All @@ -893,7 +899,7 @@ func TestStartRunningInstance(t *testing.T) {
}

if !state.deleteInstance(t, ovsCh, cmdCh) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, &wg)
}

wg.Wait()
Expand Down Expand Up @@ -927,7 +933,7 @@ func TestAttachVolumeToInstance(t *testing.T) {
}

if !state.deleteInstance(t, ovsCh, cmdCh) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, &wg)
}

wg.Wait()
Expand Down Expand Up @@ -979,7 +985,7 @@ func TestAttachExistingVolumeToInstance(t *testing.T) {
}

if !state.deleteInstance(t, ovsCh, cmdCh) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, &wg)
}

wg.Wait()
Expand Down Expand Up @@ -1027,7 +1033,7 @@ func TestDetachVolumeFromInstance(t *testing.T) {
}

if !state.deleteInstance(t, ovsCh, cmdCh) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, &wg)
}

wg.Wait()
Expand Down Expand Up @@ -1063,7 +1069,7 @@ func TestDetachNonexistingVolumeFromInstance(t *testing.T) {
}

if !state.deleteInstance(t, ovsCh, cmdCh) {
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh)
cleanupShutdownFail(t, cfg.Instance, doneCh, ovsCh, &wg)
}

wg.Wait()
Expand Down

0 comments on commit b764708

Please sign in to comment.