-
Notifications
You must be signed in to change notification settings - Fork 971
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Fix the conflict between preemption and antiAffinity #3070
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -407,49 +407,44 @@ func (pp *predicatesPlugin) OnSessionOpen(ssn *framework.Session) { | |||||||||||||||||
klog.V(4).Infof("NodePodNumber predicates Task <%s/%s> on Node <%s> failed", | ||||||||||||||||||
task.Namespace, task.Name, node.Name) | ||||||||||||||||||
podsNumStatus := &api.Status{ | ||||||||||||||||||
Code: api.Unschedulable, | ||||||||||||||||||
// TODO(wangyang0616): When the number of pods of a node reaches the upper limit, preemption is not supported for now. | ||||||||||||||||||
// Record details in #3079 (volcano.sh/volcano) | ||||||||||||||||||
// In the preempt stage, the pipeline of the pod number is not considered, | ||||||||||||||||||
// the preemption of the pod number is released directly, which will cause the pods in the node to be cyclically evicted. | ||||||||||||||||||
Code: api.UnschedulableAndUnresolvable, | ||||||||||||||||||
Reason: api.NodePodNumberExceeded, | ||||||||||||||||||
} | ||||||||||||||||||
predicateStatus = append(predicateStatus, podsNumStatus) | ||||||||||||||||||
return predicateStatus, nil | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we don't return, please filter out those reason is null string at volcano/pkg/scheduler/util/predicate_helper.go Lines 156 to 158 in c91eb07
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||||||||||||||||||
return predicateStatus, fmt.Errorf("%s", api.NodePodNumberExceeded) | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
predicateByStablefilter := func(pod *v1.Pod, nodeInfo *k8sframework.NodeInfo) ([]*api.Status, bool, error) { | ||||||||||||||||||
// CheckNodeUnschedulable | ||||||||||||||||||
predicateStatus := make([]*api.Status, 0) | ||||||||||||||||||
status := nodeUnscheduleFilter.Filter(context.TODO(), state, task.Pod, nodeInfo) | ||||||||||||||||||
nodeUnscheduleStatus, err := framework.ConvertPredicateStatus(status) | ||||||||||||||||||
if err != nil { | ||||||||||||||||||
return predicateStatus, false, fmt.Errorf("plugin %s predicates failed %s", nodeunschedulable.Name, status.Message()) | ||||||||||||||||||
} | ||||||||||||||||||
nodeUnscheduleStatus := framework.ConvertPredicateStatus(status) | ||||||||||||||||||
if nodeUnscheduleStatus.Code != api.Success { | ||||||||||||||||||
predicateStatus = append(predicateStatus, nodeUnscheduleStatus) | ||||||||||||||||||
return predicateStatus, false, nil | ||||||||||||||||||
return predicateStatus, false, fmt.Errorf("plugin %s predicates failed %s", nodeUnscheduleFilter.Name(), status.Message()) | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note that if error is return, those code will not hit: volcano/pkg/scheduler/actions/allocate/allocate.go Lines 110 to 113 in c91eb07
volcano/pkg/scheduler/actions/preempt/preempt.go Lines 220 to 223 in c91eb07
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
For strategies such as nodeaffinity, podaffinity, and nodeport, an error is returned directly, and the preemption action is not currently supported |
||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
// Check NodeAffinity | ||||||||||||||||||
if predicate.nodeAffinityEnable { | ||||||||||||||||||
status := nodeAffinityFilter.Filter(context.TODO(), state, task.Pod, nodeInfo) | ||||||||||||||||||
nodeAffinityStatus, err := framework.ConvertPredicateStatus(status) | ||||||||||||||||||
if err != nil { | ||||||||||||||||||
return predicateStatus, false, fmt.Errorf("plugin %s predicates failed %s", nodeaffinity.Name, status.Message()) | ||||||||||||||||||
} | ||||||||||||||||||
nodeAffinityStatus := framework.ConvertPredicateStatus(status) | ||||||||||||||||||
if nodeAffinityStatus.Code != api.Success { | ||||||||||||||||||
predicateStatus = append(predicateStatus, nodeAffinityStatus) | ||||||||||||||||||
return predicateStatus, false, nil | ||||||||||||||||||
return predicateStatus, false, fmt.Errorf("plugin %s predicates failed %s", nodeAffinityFilter.Name(), status.Message()) | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as before There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||||||||||||||||||
} | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
// PodToleratesNodeTaints: TaintToleration | ||||||||||||||||||
if predicate.taintTolerationEnable { | ||||||||||||||||||
status := tolerationFilter.Filter(context.TODO(), state, task.Pod, nodeInfo) | ||||||||||||||||||
tolerationStatus, err := framework.ConvertPredicateStatus(status) | ||||||||||||||||||
if err != nil { | ||||||||||||||||||
return predicateStatus, false, fmt.Errorf("plugin %s predicates failed %s", tainttoleration.Name, status.Message()) | ||||||||||||||||||
} | ||||||||||||||||||
tolerationStatus := framework.ConvertPredicateStatus(status) | ||||||||||||||||||
if tolerationStatus.Code != api.Success { | ||||||||||||||||||
predicateStatus = append(predicateStatus, tolerationStatus) | ||||||||||||||||||
return predicateStatus, false, nil | ||||||||||||||||||
return predicateStatus, false, fmt.Errorf("plugin %s predicates failed %s", tolerationFilter.Name(), status.Message()) | ||||||||||||||||||
} | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
|
@@ -482,65 +477,50 @@ func (pp *predicatesPlugin) OnSessionOpen(ssn *framework.Session) { | |||||||||||||||||
// Check NodePort | ||||||||||||||||||
if predicate.nodePortEnable { | ||||||||||||||||||
status := nodePortFilter.Filter(context.TODO(), state, nil, nodeInfo) | ||||||||||||||||||
nodePortStatus, err := framework.ConvertPredicateStatus(status) | ||||||||||||||||||
if err != nil { | ||||||||||||||||||
return predicateStatus, fmt.Errorf("plugin %s predicates failed %s", nodeports.Name, status.Message()) | ||||||||||||||||||
} | ||||||||||||||||||
nodePortStatus := framework.ConvertPredicateStatus(status) | ||||||||||||||||||
if nodePortStatus.Code != api.Success { | ||||||||||||||||||
predicateStatus = append(predicateStatus, nodePortStatus) | ||||||||||||||||||
return predicateStatus, nil | ||||||||||||||||||
return predicateStatus, fmt.Errorf("plugin %s predicates failed %s", nodePortFilter.Name(), status.Message()) | ||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same as before. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||||||||||||||||||
} | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
// Check PodAffinity | ||||||||||||||||||
if predicate.podAffinityEnable { | ||||||||||||||||||
status := podAffinityFilter.Filter(context.TODO(), state, task.Pod, nodeInfo) | ||||||||||||||||||
podAffinityStatus, err := framework.ConvertPredicateStatus(status) | ||||||||||||||||||
if err != nil { | ||||||||||||||||||
return predicateStatus, fmt.Errorf("plugin %s predicates failed %s", interpodaffinity.Name, status.Message()) | ||||||||||||||||||
} | ||||||||||||||||||
podAffinityStatus := framework.ConvertPredicateStatus(status) | ||||||||||||||||||
if podAffinityStatus.Code != api.Success { | ||||||||||||||||||
predicateStatus = append(predicateStatus, podAffinityStatus) | ||||||||||||||||||
return predicateStatus, nil | ||||||||||||||||||
return predicateStatus, fmt.Errorf("plugin %s predicates failed %s", podAffinityFilter.Name(), status.Message()) | ||||||||||||||||||
} | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
// Check NodeVolumeLimits | ||||||||||||||||||
if predicate.nodeVolumeLimitsEnable { | ||||||||||||||||||
status := nodeVolumeLimitsCSIFilter.Filter(context.TODO(), state, task.Pod, nodeInfo) | ||||||||||||||||||
nodeVolumeStatus, err := framework.ConvertPredicateStatus(status) | ||||||||||||||||||
if err != nil { | ||||||||||||||||||
return predicateStatus, fmt.Errorf("plugin %s predicates failed %s", nodeVolumeLimitsCSIFilter.Name(), status.Message()) | ||||||||||||||||||
} | ||||||||||||||||||
nodeVolumeStatus := framework.ConvertPredicateStatus(status) | ||||||||||||||||||
if nodeVolumeStatus.Code != api.Success { | ||||||||||||||||||
predicateStatus = append(predicateStatus, nodeVolumeStatus) | ||||||||||||||||||
return predicateStatus, nil | ||||||||||||||||||
return predicateStatus, fmt.Errorf("plugin %s predicates failed %s", nodeVolumeLimitsCSIFilter.Name(), status.Message()) | ||||||||||||||||||
} | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
// Check VolumeZone | ||||||||||||||||||
if predicate.volumeZoneEnable { | ||||||||||||||||||
status := volumeZoneFilter.Filter(context.TODO(), state, task.Pod, nodeInfo) | ||||||||||||||||||
volumeZoneStatus, err := framework.ConvertPredicateStatus(status) | ||||||||||||||||||
if err != nil { | ||||||||||||||||||
return predicateStatus, fmt.Errorf("plugin %s predicates failed %s", volumeZoneFilter.Name(), status.Message()) | ||||||||||||||||||
} | ||||||||||||||||||
volumeZoneStatus := framework.ConvertPredicateStatus(status) | ||||||||||||||||||
if volumeZoneStatus.Code != api.Success { | ||||||||||||||||||
predicateStatus = append(predicateStatus, volumeZoneStatus) | ||||||||||||||||||
return predicateStatus, nil | ||||||||||||||||||
return predicateStatus, fmt.Errorf("plugin %s predicates failed %s", volumeZoneFilter.Name(), status.Message()) | ||||||||||||||||||
} | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
// Check PodTopologySpread | ||||||||||||||||||
if predicate.podTopologySpreadEnable { | ||||||||||||||||||
status := podTopologySpreadFilter.Filter(context.TODO(), state, task.Pod, nodeInfo) | ||||||||||||||||||
podTopologyStatus, err := framework.ConvertPredicateStatus(status) | ||||||||||||||||||
if err != nil { | ||||||||||||||||||
return predicateStatus, fmt.Errorf("plugin %s predicates failed %s", podTopologySpreadFilter.Name(), status.Message()) | ||||||||||||||||||
} | ||||||||||||||||||
podTopologyStatus := framework.ConvertPredicateStatus(status) | ||||||||||||||||||
if podTopologyStatus.Code != api.Success { | ||||||||||||||||||
predicateStatus = append(predicateStatus, podTopologyStatus) | ||||||||||||||||||
return predicateStatus, nil | ||||||||||||||||||
return predicateStatus, fmt.Errorf("plugin %s predicates failed %s", podTopologySpreadFilter.Name(), status.Message()) | ||||||||||||||||||
} | ||||||||||||||||||
} | ||||||||||||||||||
|
||||||||||||||||||
|
@@ -556,7 +536,7 @@ func (pp *predicatesPlugin) OnSessionOpen(ssn *framework.Session) { | |||||||||||||||||
} | ||||||||||||||||||
if filterNodeStatus.Code != api.Success { | ||||||||||||||||||
predicateStatus = append(predicateStatus, filterNodeStatus) | ||||||||||||||||||
return predicateStatus, nil | ||||||||||||||||||
return predicateStatus, fmt.Errorf("plugin device filternode predicates failed %s", msg) | ||||||||||||||||||
} | ||||||||||||||||||
} else { | ||||||||||||||||||
klog.Warningf("Devices %s assertion conversion failed, skip", val) | ||||||||||||||||||
|
@@ -569,12 +549,9 @@ func (pp *predicatesPlugin) OnSessionOpen(ssn *framework.Session) { | |||||||||||||||||
if predicate.proportionalEnable { | ||||||||||||||||||
// Check ProportionalPredicate | ||||||||||||||||||
proportionalStatus, err := checkNodeResourceIsProportional(task, node, predicate.proportional) | ||||||||||||||||||
if err != nil { | ||||||||||||||||||
return predicateStatus, err | ||||||||||||||||||
} | ||||||||||||||||||
if proportionalStatus.Code != api.Success { | ||||||||||||||||||
predicateStatus = append(predicateStatus, proportionalStatus) | ||||||||||||||||||
return predicateStatus, nil | ||||||||||||||||||
return predicateStatus, err | ||||||||||||||||||
} | ||||||||||||||||||
klog.V(4).Infof("checkNodeResourceIsProportional predicates Task <%s/%s> on Node <%s>: fit %v", | ||||||||||||||||||
task.Namespace, task.Name, node.Name, fit) | ||||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's the reason of change level 3 to level 5?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When a large number of pending pods in the cluster perform the reclaim operation and resource reclamation fails, a large number of repeated logs are generated in each round of scheduling. I think it is more appropriate to change the log level to debug.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ok