Skip to content

Commit

Permalink
Add support for configurable retry count
Browse files Browse the repository at this point in the history
Signed-off-by: Harsh Desai <[email protected]>
  • Loading branch information
Harsh Desai committed Feb 25, 2019
1 parent 4b53857 commit c35fe71
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 8 deletions.
3 changes: 3 additions & 0 deletions pkg/apis/stork/v1alpha1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,8 @@ type GroupVolumeSnapshotSpec struct {
PVCSelector PVCSelectorSpec `json:"pvcSelector"`
// RestoreNamespaces is a list of namespaces to which the snapshots can be restored to
RestoreNamespaces []string `json:"restoreNamespaces"`
// MaxRetries is the number of times to retry the groupvolumesnapshot on failure. default: 0
MaxRetries int `json:"maxRetries"`
// Options are pass-through parameters that are passed to the driver handling the group snapshot
Options map[string]string `json:"options"`
}
Expand All @@ -265,6 +267,7 @@ type GroupVolumeSnapshotList struct {
type GroupVolumeSnapshotStatus struct {
Stage GroupVolumeSnapshotStageType `json:"stage"`
Status GroupVolumeSnapshotStatusType `json:"status"`
NumRetries int `json:"numRetries"`
VolumeSnapshots []*VolumeSnapshotStatus `json:"volumeSnapshots"`
}

Expand Down
36 changes: 29 additions & 7 deletions pkg/groupsnapshot/controllers/groupsnapshot.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,16 +333,34 @@ func (m *GroupSnapshotController) handleSnap(groupSnap *stork_api.GroupVolumeSna
}

if isFailed, failedTasks := isAnySnapshotFailed(response.Snapshots); isFailed {
err = fmt.Errorf("Some snapshots in group have failed: %s."+
" Resetting group snapshot to retry.", failedTasks)
errMsgPrefix := fmt.Sprintf("Some snapshots in group have failed: %s", failedTasks)

if groupSnap.Status.NumRetries < groupSnap.Spec.MaxRetries {
groupSnap.Status.NumRetries++

err = fmt.Errorf("%s. Resetting group snapshot for retry: %d",
errMsgPrefix, groupSnap.Status.NumRetries)
response.Snapshots = nil // so that snapshots are retried
stage = stork_api.GroupSnapshotStageSnapshot
status = stork_api.GroupSnapshotPending
} else {
if groupSnap.Spec.MaxRetries == 0 {
err = fmt.Errorf("%s. Failing the groupsnapshot as retries are not enabled", errMsgPrefix)
} else {
err = fmt.Errorf("%s. Failing the groupsnapshot as all %d retries are exhausted",
errMsgPrefix, groupSnap.Spec.MaxRetries)
}

// even though failed, we still need to run post rules
stage = stork_api.GroupSnapshotStagePostSnapshot
status = stork_api.GroupSnapshotFailed
}

log.GroupSnapshotLog(groupSnap).Errorf(err.Error())
m.Recorder.Event(groupSnap,
v1.EventTypeWarning,
string(stork_api.GroupSnapshotFailed),
err.Error())
response.Snapshots = nil // so that snapshots are retried
stage = stork_api.GroupSnapshotStageSnapshot
status = stork_api.GroupSnapshotPending
} else if areAllSnapshotsDone(response.Snapshots) {
log.GroupSnapshotLog(groupSnap).Infof("All snapshots in group are done")
// Create volumesnapshot and volumesnapshotdata objects in API
Expand Down Expand Up @@ -545,7 +563,9 @@ func (m *GroupSnapshotController) handlePostSnap(groupSnap *stork_api.GroupVolum
*stork_api.GroupVolumeSnapshot, bool, error) {
ruleName := groupSnap.Spec.PostExecRule
if len(ruleName) == 0 { // No rule, move to final stage
groupSnap.Status.Status = stork_api.GroupSnapshotSuccessful
if groupSnap.Status.Status != stork_api.GroupSnapshotFailed {
groupSnap.Status.Status = stork_api.GroupSnapshotSuccessful
}
groupSnap.Status.Stage = stork_api.GroupSnapshotStageFinal
return groupSnap, updateCRD, nil
}
Expand Down Expand Up @@ -573,7 +593,9 @@ func (m *GroupSnapshotController) handlePostSnap(groupSnap *stork_api.GroupVolum
}

// done with post-snapshot, move to final stage
groupSnap.Status.Status = stork_api.GroupSnapshotSuccessful
if groupSnap.Status.Status != stork_api.GroupSnapshotFailed {
groupSnap.Status.Status = stork_api.GroupSnapshotSuccessful
}
groupSnap.Status.Stage = stork_api.GroupSnapshotStageFinal
return groupSnap, updateCRD, nil
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@ kind: GroupVolumeSnapshot
metadata:
annotations:
name: load-snapshot-cloud
namespace: load-harsh
spec:
options:
portworx/snapshot-type: cloud
maxRetries: 2
postExecRule: ""
preExecRule: ""
pvcSelector:
Expand Down

0 comments on commit c35fe71

Please sign in to comment.