Skip to content

Commit

Permalink
feat: 增加toolchain发送失败后重试修复 #289
Browse files Browse the repository at this point in the history
  • Loading branch information
flyy1012 committed Sep 5, 2024
1 parent c66ad3b commit 678ffb8
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 4 deletions.
13 changes: 9 additions & 4 deletions src/backend/booster/bk_dist/controller/pkg/manager/remote/mgr.go
Original file line number Diff line number Diff line change
Expand Up @@ -967,9 +967,9 @@ func (m *Mgr) ensureSingleFile(
desc.FilePath, m.work.ID(), host.Server)
return nil
case types.FileSendRetrying:
blog.Infof("remote: single file(%s) for work(%s) to server(%s) is retrying now",
blog.Warnf("remote: single file(%s) for work(%s) to server(%s) is retrying now",
desc.FilePath, m.work.ID(), host.Server)
return nil
return types.ErrSendFileRetrying
default:
return fmt.Errorf("unknown file send status: %s", status.String())
}
Expand Down Expand Up @@ -1068,8 +1068,8 @@ func (m *Mgr) ensureSingleCorkFile(c *corkFile, r matchResult) (err error) {
desc.FilePath, m.work.ID(), host.Server)
return nil
case types.FileSendRetrying:
blog.Infof("remote: single cork file(%s) for work(%s) to server(%s) is retrying now", desc.FilePath, m.work.ID(), host.Server)
return nil
blog.Warnf("remote: single cork file(%s) for work(%s) to server(%s) is retrying now", desc.FilePath, m.work.ID(), host.Server)
return types.ErrSendFileRetrying
default:
blog.Errorf("remote: end ensure single cork file(%s) for work(%s) to server(%s), "+
" with unknown status", desc.FilePath, m.work.ID(), host.Server)
Expand Down Expand Up @@ -1278,12 +1278,17 @@ func (m *Mgr) getFailedFileCollectionByHost(server string) ([]*types.FileCollect
}
fcs := make([]*types.FileCollectionInfo, 0)
for _, re := range *target {
//如果有fc未到终结状态,则直接返回
if !re.SendStatus.IsTerminated() {
return nil, fmt.Errorf("remote: found file collection(%s) in file send cache, but not finished", re.UniqID)
}
if re.SendStatus == types.FileSendFailed {
fcs = append(fcs, re)
}
}
return fcs, nil
}

func (m *Mgr) retrySendToolChain(handler dcSDK.RemoteWorkerHandler, req *types.RemoteTaskExecuteRequest) {
if m.resource.CanWorkerRetry(req.Server) {
go func(handler dcSDK.RemoteWorkerHandler, req types.RemoteTaskExecuteRequest) {
Expand Down
1 change: 1 addition & 0 deletions src/backend/booster/bk_dist/controller/pkg/types/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ var (
ErrFileNotFound = fmt.Errorf("not found file info")
ErrWorkCannotBeUpdatedHeartbeat = fmt.Errorf("work can not be updated heartbeat")
ErrSendFileFailed = fmt.Errorf("send file failed")
ErrSendFileRetrying = fmt.Errorf("send file retrying")
ErrTaskCannotBeReleased = fmt.Errorf("task can not be released")
ErrTaskAlreadyReleased = fmt.Errorf("task already released")
ErrSlotsLockFailed = fmt.Errorf("slots lock failed`")
Expand Down
4 changes: 4 additions & 0 deletions src/backend/booster/bk_dist/controller/pkg/types/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,10 @@ func (f FileSendStatus) String() string {
return "unknown"
}

func (f FileSendStatus) IsTerminated() bool {
return f == FileSendSucceed || f == FileSendFailed
}

// FileCollectionInfo save file collection send status
type FileCollectionInfo struct {
UniqID string `json:"uniq_id"`
Expand Down

0 comments on commit 678ffb8

Please sign in to comment.