Skip to content

Commit

Permalink
Merge pull request #559 from KunWuLuan/master
Browse files Browse the repository at this point in the history
add podGroup backoff time for coscheduling
  • Loading branch information
k8s-ci-robot authored Jul 25, 2023
2 parents d827483 + a0bf090 commit fb9bf41
Show file tree
Hide file tree
Showing 22 changed files with 388 additions and 5 deletions.
14 changes: 13 additions & 1 deletion apis/config/scheme/scheme_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import (
"k8s.io/kubernetes/pkg/scheduler/apis/config/testing/defaults"

"sigs.k8s.io/scheduler-plugins/apis/config"
"sigs.k8s.io/scheduler-plugins/apis/config/v1"
v1 "sigs.k8s.io/scheduler-plugins/apis/config/v1"
"sigs.k8s.io/scheduler-plugins/apis/config/v1beta2"
"sigs.k8s.io/scheduler-plugins/apis/config/v1beta3"
"sigs.k8s.io/scheduler-plugins/pkg/coscheduling"
Expand Down Expand Up @@ -67,6 +67,7 @@ profiles:
- name: Coscheduling
args:
permitWaitingTimeSeconds: 10
podGroupBackoffSeconds: 0
deniedPGExpirationTimeSeconds: 3
- name: NodeResourcesAllocatable
args:
Expand Down Expand Up @@ -473,6 +474,7 @@ kind: KubeSchedulerConfiguration
profiles:
- schedulerName: scheduler-plugins
pluginConfig:
- name: Coscheduling # Test argument defaulting logic
- name: TopologicalSort
args:
namespaces:
Expand All @@ -489,6 +491,12 @@ profiles:
SchedulerName: "scheduler-plugins",
Plugins: defaults.PluginsV1,
PluginConfig: []schedconfig.PluginConfig{
{
Name: coscheduling.Name,
Args: &config.CoschedulingArgs{
PermitWaitingTimeSeconds: 60,
},
},
{
Name: topologicalsort.Name,
Args: &config.TopologicalSortArgs{
Expand Down Expand Up @@ -729,6 +737,7 @@ profiles:
apiVersion: kubescheduler.config.k8s.io/v1beta2
kind: CoschedulingArgs
permitWaitingTimeSeconds: 10
podGroupBackoffSeconds: 0
name: Coscheduling
- args:
apiVersion: kubescheduler.config.k8s.io/v1beta2
Expand Down Expand Up @@ -782,6 +791,7 @@ profiles:
Name: coscheduling.Name,
Args: &config.CoschedulingArgs{
PermitWaitingTimeSeconds: 10,
PodGroupBackoffSeconds: 20,
},
},
{
Expand Down Expand Up @@ -886,6 +896,7 @@ profiles:
apiVersion: kubescheduler.config.k8s.io/v1beta3
kind: CoschedulingArgs
permitWaitingTimeSeconds: 10
podGroupBackoffSeconds: 20
name: Coscheduling
- args:
apiVersion: kubescheduler.config.k8s.io/v1beta3
Expand Down Expand Up @@ -1071,6 +1082,7 @@ profiles:
apiVersion: kubescheduler.config.k8s.io/v1
kind: CoschedulingArgs
permitWaitingTimeSeconds: 10
podGroupBackoffSeconds: 0
name: Coscheduling
- args:
apiVersion: kubescheduler.config.k8s.io/v1
Expand Down
2 changes: 2 additions & 0 deletions apis/config/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ type CoschedulingArgs struct {

// PermitWaitingTimeSeconds is the waiting timeout in seconds.
PermitWaitingTimeSeconds int64
// PodGroupBackoffSeconds is the backoff time in seconds before a pod group can be scheduled again.
PodGroupBackoffSeconds int64
}

// ModeType is a "string" type.
Expand Down
4 changes: 4 additions & 0 deletions apis/config/v1/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (

var (
defaultPermitWaitingTimeSeconds int64 = 60
defaultPodGroupBackoffSeconds int64 = 0

defaultNodeResourcesAllocatableMode = Least

Expand Down Expand Up @@ -100,6 +101,9 @@ func SetDefaults_CoschedulingArgs(obj *CoschedulingArgs) {
if obj.PermitWaitingTimeSeconds == nil {
obj.PermitWaitingTimeSeconds = &defaultPermitWaitingTimeSeconds
}
if obj.PodGroupBackoffSeconds == nil {
obj.PodGroupBackoffSeconds = &defaultPodGroupBackoffSeconds
}
}

// SetDefaults_NodeResourcesAllocatableArgs sets the defaults parameters for NodeResourceAllocatable.
Expand Down
3 changes: 3 additions & 0 deletions apis/config/v1/defaults_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,18 @@ func TestSchedulingDefaults(t *testing.T) {
config: &CoschedulingArgs{},
expect: &CoschedulingArgs{
PermitWaitingTimeSeconds: pointer.Int64Ptr(60),
PodGroupBackoffSeconds: pointer.Int64Ptr(0),
},
},
{
name: "set non default CoschedulingArgs",
config: &CoschedulingArgs{
PermitWaitingTimeSeconds: pointer.Int64Ptr(60),
PodGroupBackoffSeconds: pointer.Int64Ptr(20),
},
expect: &CoschedulingArgs{
PermitWaitingTimeSeconds: pointer.Int64Ptr(60),
PodGroupBackoffSeconds: pointer.Int64Ptr(20),
},
},
{
Expand Down
2 changes: 2 additions & 0 deletions apis/config/v1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ type CoschedulingArgs struct {

// PermitWaitingTimeSeconds is the waiting timeout in seconds.
PermitWaitingTimeSeconds *int64 `json:"permitWaitingTimeSeconds,omitempty"`
// PodGroupBackoffSeconds is the backoff time in seconds before a pod group can be scheduled again.
PodGroupBackoffSeconds *int64 `json:"podGroupBackoffSeconds,omitempty"`
}

// ModeType is a type "string".
Expand Down
6 changes: 6 additions & 0 deletions apis/config/v1/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions apis/config/v1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions apis/config/v1beta2/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (

var (
defaultPermitWaitingTimeSeconds int64 = 60
defaultPodGroupBackoffSeconds int64 = 0
defaultDeniedPGExpirationTimeSeconds int64 = 20

defaultNodeResourcesAllocatableMode = Least
Expand Down Expand Up @@ -85,6 +86,9 @@ func SetDefaults_CoschedulingArgs(obj *CoschedulingArgs) {
if obj.DeniedPGExpirationTimeSeconds == nil {
obj.DeniedPGExpirationTimeSeconds = &defaultDeniedPGExpirationTimeSeconds
}
if obj.PodGroupBackoffSeconds == nil {
obj.PodGroupBackoffSeconds = &defaultPodGroupBackoffSeconds
}
}

// SetDefaults_NodeResourcesAllocatableArgs sets the defaults parameters for NodeResourceAllocatable.
Expand Down
3 changes: 3 additions & 0 deletions apis/config/v1beta2/defaults_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,20 @@ func TestSchedulingDefaults(t *testing.T) {
expect: &CoschedulingArgs{
PermitWaitingTimeSeconds: pointer.Int64Ptr(60),
DeniedPGExpirationTimeSeconds: pointer.Int64Ptr(20),
PodGroupBackoffSeconds: pointer.Int64Ptr(0),
},
},
{
name: "set non default CoschedulingArgs",
config: &CoschedulingArgs{
PermitWaitingTimeSeconds: pointer.Int64Ptr(60),
DeniedPGExpirationTimeSeconds: pointer.Int64Ptr(10),
PodGroupBackoffSeconds: pointer.Int64Ptr(20),
},
expect: &CoschedulingArgs{
PermitWaitingTimeSeconds: pointer.Int64Ptr(60),
DeniedPGExpirationTimeSeconds: pointer.Int64Ptr(10),
PodGroupBackoffSeconds: pointer.Int64Ptr(20),
},
},
{
Expand Down
2 changes: 2 additions & 0 deletions apis/config/v1beta2/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ type CoschedulingArgs struct {

// PermitWaitingTimeSeconds is the waiting timeout in seconds.
PermitWaitingTimeSeconds *int64 `json:"permitWaitingTimeSeconds,omitempty"`
// PodGroupBackoffSeconds is the backoff time in seconds before a pod group can be scheduled again.
PodGroupBackoffSeconds *int64 `json:"podGroupBackoffSeconds,omitempty"`
// DeniedPGExpirationTimeSeconds is the expiration time of the denied podgroup store.
DeniedPGExpirationTimeSeconds *int64 `json:"deniedPGExpirationTimeSeconds,omitempty"`
}
Expand Down
6 changes: 6 additions & 0 deletions apis/config/v1beta2/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions apis/config/v1beta2/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 6 additions & 3 deletions apis/config/v1beta3/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@ import (
)

var (
defaultPermitWaitingTimeSeconds int64 = 60

defaultNodeResourcesAllocatableMode = Least
defaultPermitWaitingTimeSeconds int64 = 60
defaultPodGroupBackoffSeconds int64 = 0
defaultNodeResourcesAllocatableMode = Least

// defaultResourcesToWeightMap is used to set the default resourceToWeight map for CPU and memory
// used by the NodeResourcesAllocatable scoring plugin.
Expand Down Expand Up @@ -101,6 +101,9 @@ func SetDefaults_CoschedulingArgs(obj *CoschedulingArgs) {
if obj.PermitWaitingTimeSeconds == nil {
obj.PermitWaitingTimeSeconds = &defaultPermitWaitingTimeSeconds
}
if obj.PodGroupBackoffSeconds == nil {
obj.PodGroupBackoffSeconds = &defaultPodGroupBackoffSeconds
}
}

// SetDefaults_NodeResourcesAllocatableArgs sets the defaults parameters for NodeResourceAllocatable.
Expand Down
3 changes: 3 additions & 0 deletions apis/config/v1beta3/defaults_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,15 +40,18 @@ func TestSchedulingDefaults(t *testing.T) {
config: &CoschedulingArgs{},
expect: &CoschedulingArgs{
PermitWaitingTimeSeconds: pointer.Int64Ptr(60),
PodGroupBackoffSeconds: pointer.Int64Ptr(0),
},
},
{
name: "set non default CoschedulingArgs",
config: &CoschedulingArgs{
PermitWaitingTimeSeconds: pointer.Int64Ptr(60),
PodGroupBackoffSeconds: pointer.Int64Ptr(20),
},
expect: &CoschedulingArgs{
PermitWaitingTimeSeconds: pointer.Int64Ptr(60),
PodGroupBackoffSeconds: pointer.Int64Ptr(20),
},
},
{
Expand Down
2 changes: 2 additions & 0 deletions apis/config/v1beta3/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ type CoschedulingArgs struct {

// PermitWaitingTimeSeconds is the waiting timeout in seconds.
PermitWaitingTimeSeconds *int64 `json:"permitWaitingTimeSeconds,omitempty"`
// PodGroupBackoffSeconds is the backoff time in seconds before a pod group can be scheduled again.
PodGroupBackoffSeconds *int64 `json:"podGroupBackoffSeconds,omitempty"`
}

// ModeType is a type "string".
Expand Down
6 changes: 6 additions & 0 deletions apis/config/v1beta3/zz_generated.conversion.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions apis/config/v1beta3/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions pkg/coscheduling/core/core.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ type Manager interface {
DeletePermittedPodGroup(string)
CalculateAssignedPods(string, string) int
ActivateSiblings(pod *corev1.Pod, state *framework.CycleState)
BackoffPodGroup(string, time.Duration)
}

// PodGroupManager defines the scheduling operation called
Expand All @@ -74,6 +75,8 @@ type PodGroupManager struct {
scheduleTimeout *time.Duration
// permittedPG stores the podgroup name which has passed the pre resource check.
permittedPG *gochache.Cache
// backedOffPG stores the podgorup name which failed scheudling recently.
backedOffPG *gochache.Cache
// pgLister is podgroup lister
pgLister pglister.PodGroupLister
// podLister is pod lister
Expand All @@ -91,10 +94,18 @@ func NewPodGroupManager(pgClient pgclientset.Interface, snapshotSharedLister fra
pgLister: pgInformer.Lister(),
podLister: podInformer.Lister(),
permittedPG: gochache.New(3*time.Second, 3*time.Second),
backedOffPG: gochache.New(10*time.Second, 10*time.Second),
}
return pgMgr
}

func (pgMgr *PodGroupManager) BackoffPodGroup(pgName string, backoff time.Duration) {
if backoff == time.Duration(0) {
return
}
pgMgr.backedOffPG.Add(pgName, nil, backoff)
}

// ActivateSiblings stashes the pods belonging to the same PodGroup of the given pod
// in the given state, with a reserved key "kubernetes.io/pods-to-activate".
func (pgMgr *PodGroupManager) ActivateSiblings(pod *corev1.Pod, state *framework.CycleState) {
Expand Down Expand Up @@ -143,6 +154,10 @@ func (pgMgr *PodGroupManager) PreFilter(ctx context.Context, pod *corev1.Pod) er
return nil
}

if _, exist := pgMgr.backedOffPG.Get(pgFullName); exist {
return fmt.Errorf("podGroup %v failed recently", pgFullName)
}

pods, err := pgMgr.podLister.Pods(pod.Namespace).List(
labels.SelectorFromSet(labels.Set{v1alpha1.PodGroupLabel: util.GetPodGroupLabel(pod)}),
)
Expand Down
2 changes: 1 addition & 1 deletion pkg/coscheduling/core/core_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ func TestPreFilter(t *testing.T) {
existingPods, allNodes := testutil.MakeNodesAndPods(map[string]string{"test": "a"}, 60, 30)
snapshot := testutil.NewFakeSharedLister(existingPods, allNodes)
pgMgr := &PodGroupManager{pgLister: pgLister, permittedPG: newCache(),
snapshotSharedLister: snapshot, podLister: podInformer.Lister(), scheduleTimeout: &scheduleTimeout}
snapshotSharedLister: snapshot, podLister: podInformer.Lister(), scheduleTimeout: &scheduleTimeout, backedOffPG: gochache.New(10*time.Second, 10*time.Second)}
informerFactory.Start(ctx.Done())
if !clicache.WaitForCacheSync(ctx.Done(), podInformer.Informer().HasSynced) {
t.Fatal("WaitForCacheSync failed")
Expand Down
Loading

0 comments on commit fb9bf41

Please sign in to comment.