Skip to content

Commit

Permalink
Merge pull request #567 from luomingmeng/dev/reporter-support-context…
Browse files Browse the repository at this point in the history
…-timeout

remote reporter plugin and topology adapter support context timeout
  • Loading branch information
luomingmeng authored May 24, 2024
2 parents c0bd50d + 9f983f1 commit 6685716
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ import (

const (
podResourcesClientTimeout = 10 * time.Second
getTopologyZonesTimeout = 10 * time.Second
podResourcesClientMaxMsgSize = 1024 * 1024 * 16
)

Expand Down Expand Up @@ -139,6 +140,8 @@ func (p *topologyAdapterImpl) GetTopologyZones(parentCtx context.Context) ([]*no
// always force getting pod list instead of cache
ctx := context.WithValue(parentCtx, metaserverpod.BypassCacheKey, metaserverpod.BypassCacheTrue)

ctx, cancel := context.WithTimeout(ctx, getTopologyZonesTimeout)
defer cancel()
podList, err := p.metaServer.GetPodList(ctx, nil)
if err != nil {
return nil, errors.Wrap(err, "get pod list from metaServer failed")
Expand Down
5 changes: 4 additions & 1 deletion pkg/agent/resourcemanager/fetcher/plugin/endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ import (

const (
dialRemoteEndpointTimeout = 10 * time.Second
getReportContentTimeout = 10 * time.Second
)

// ListAndWatchCallback should be called when plugins report info update.
Expand Down Expand Up @@ -173,7 +174,9 @@ func (e *remoteEndpointImpl) GetReportContent(c context.Context) (*v1alpha1.GetR
return nil, fmt.Errorf("endpoint %v has been stopped", e.pluginName)
}

resp, err := e.client.GetReportContent(c, &v1alpha1.Empty{})
ctx, cancel := context.WithTimeout(c, getReportContentTimeout)
defer cancel()
resp, err := e.client.GetReportContent(ctx, &v1alpha1.Empty{})
if err == nil {
e.setCache(resp)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ func (o *OvercommitRatioReporterPlugin) Stop() error {
// GetReportContent get overcommitment ratio from manager directly.
// Since the metrics collected by Manager are already an average within a time period,
// we expect a faster response to node load fluctuations to avoid excessive overcommit of online resources.
func (o *OvercommitRatioReporterPlugin) GetReportContent(_ context.Context, _ *v1alpha1.Empty) (*v1alpha1.GetReportContentResponse, error) {
func (o *OvercommitRatioReporterPlugin) GetReportContent(ctx context.Context, _ *v1alpha1.Empty) (*v1alpha1.GetReportContentResponse, error) {
response := &v1alpha1.GetReportContentResponse{
Content: []*v1alpha1.ReportContent{},
}
Expand All @@ -173,7 +173,7 @@ func (o *OvercommitRatioReporterPlugin) GetReportContent(_ context.Context, _ *v
response.Content = append(response.Content, overcommitRatioContent)

// get topologyProvider and guaranteed cpus
topologyProviderContent, err := o.getTopologyProviderReportContent(o.ctx)
topologyProviderContent, err := o.getTopologyProviderReportContent(ctx)
if err != nil {
return nil, err
}
Expand Down

0 comments on commit 6685716

Please sign in to comment.