diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index b68fbe5..f185443 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -23,7 +23,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v4 with: - go-version: '1.22' + go-version: '1.23.2' - name: Install Protoc run: | diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index a9989e3..45451c2 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -24,12 +24,12 @@ jobs: - name: Set up Go uses: actions/setup-go@v4 with: - go-version: '1.22' + go-version: '1.23.2' - name: Lint - uses: golangci/golangci-lint-action@v5 + uses: golangci/golangci-lint-action@v6 with: - version: 'v1.58.0' + version: v1.61 args: -v --timeout 5m skip-cache: true @@ -49,7 +49,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v4 with: - go-version: '1.22' + go-version: '1.23.2' - name: Test run: go test -v ./... @@ -62,7 +62,7 @@ jobs: - name: Set up Go uses: actions/setup-go@v4 with: - go-version: '1.22' + go-version: '1.23.2' - name: Install Protoc run: | diff --git a/.gitignore b/.gitignore index 38a1653..af76f79 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ +/coverage.out /bin /ssl /deb/topograph/DEBIAN/control diff --git a/cmd/node-observer/main.go b/cmd/node-observer/main.go index 53e02b4..8233dee 100644 --- a/cmd/node-observer/main.go +++ b/cmd/node-observer/main.go @@ -49,7 +49,7 @@ func main() { } if err := mainInternal(c); err != nil { - klog.Errorf(err.Error()) + klog.Error(err.Error()) os.Exit(1) } } diff --git a/cmd/topograph/main.go b/cmd/topograph/main.go index d615c97..08ca75e 100644 --- a/cmd/topograph/main.go +++ b/cmd/topograph/main.go @@ -48,7 +48,7 @@ func main() { } if err := mainInternal(c); err != nil { - klog.Errorf(err.Error()) + klog.Error(err.Error()) os.Exit(1) } } diff --git a/cmd/toposim/main.go b/cmd/toposim/main.go index 84d2b79..8befce1 100644 --- a/cmd/toposim/main.go +++ b/cmd/toposim/main.go @@ -32,7 +32,7 @@ import ( func main() { if err := mainInternal(); err != nil { - klog.Errorf(err.Error()) + klog.Error(err.Error()) os.Exit(1) } } diff --git a/go.mod b/go.mod index 2841183..624651b 100644 --- a/go.mod +++ b/go.mod @@ -1,56 +1,64 @@ module github.com/NVIDIA/topograph -go 1.22.3 +go 1.23 + +toolchain go1.23.2 require ( - cloud.google.com/go/compute v1.28.1 + cloud.google.com/go/compute v1.28.2 cloud.google.com/go/compute/metadata v0.5.2 - github.com/aws/aws-sdk-go-v2/config v1.27.37 - github.com/aws/aws-sdk-go-v2/credentials v1.17.35 - github.com/aws/aws-sdk-go-v2/service/ec2 v1.179.0 + github.com/aws/aws-sdk-go-v2 v1.32.3 + github.com/aws/aws-sdk-go-v2/config v1.28.1 + github.com/aws/aws-sdk-go-v2/credentials v1.17.42 + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.18 + github.com/aws/aws-sdk-go-v2/service/ec2 v1.187.0 + github.com/go-playground/validator/v10 v10.22.1 github.com/google/uuid v1.6.0 + github.com/googleapis/gax-go/v2 v2.13.0 github.com/hashicorp/golang-lru v1.0.2 + github.com/mitchellh/mapstructure v1.5.0 github.com/oklog/run v1.1.0 - github.com/oracle/oci-go-sdk/v65 v65.75.0 - github.com/prometheus/client_golang v1.20.4 + github.com/oracle/oci-go-sdk/v65 v65.78.0 + github.com/prometheus/client_golang v1.20.5 github.com/stretchr/testify v1.9.0 - golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 - google.golang.org/api v0.198.0 - google.golang.org/grpc v1.67.0 - google.golang.org/protobuf v1.34.2 + golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c + google.golang.org/api v0.204.0 + google.golang.org/grpc v1.67.1 + google.golang.org/protobuf v1.35.1 gopkg.in/yaml.v3 v3.0.1 - k8s.io/api v0.31.1 - k8s.io/apimachinery v0.31.1 - k8s.io/client-go v0.31.1 + k8s.io/api v0.31.2 + k8s.io/apimachinery v0.31.2 + k8s.io/client-go v0.31.2 k8s.io/klog/v2 v2.130.1 ) require ( - cloud.google.com/go/auth v0.9.4 // indirect - cloud.google.com/go/auth/oauth2adapt v0.2.4 // indirect - github.com/aws/aws-sdk-go-v2 v1.31.0 // indirect - github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.14 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.18 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.18 // indirect + cloud.google.com/go/auth v0.10.0 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.5 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.22 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.22 // indirect github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.5 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.20 // indirect - github.com/aws/aws-sdk-go-v2/service/sso v1.23.1 // indirect - github.com/aws/aws-sdk-go-v2/service/ssooidc v1.27.1 // indirect - github.com/aws/aws-sdk-go-v2/service/sts v1.31.1 // indirect - github.com/aws/smithy-go v1.21.0 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.0 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.3 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.24.3 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.3 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.32.3 // indirect + github.com/aws/smithy-go v1.22.0 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect - github.com/emicklei/go-restful/v3 v3.11.0 // indirect + github.com/emicklei/go-restful/v3 v3.12.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fxamacker/cbor/v2 v2.7.0 // indirect + github.com/gabriel-vasile/mimetype v1.4.6 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect - github.com/go-openapi/jsonpointer v0.19.6 // indirect - github.com/go-openapi/jsonreference v0.20.2 // indirect - github.com/go-openapi/swag v0.22.4 // indirect - github.com/gofrs/flock v0.8.1 // indirect + github.com/go-openapi/jsonpointer v0.21.0 // indirect + github.com/go-openapi/jsonreference v0.21.0 // indirect + github.com/go-openapi/swag v0.23.0 // indirect + github.com/go-playground/locales v0.14.1 // indirect + github.com/go-playground/universal-translator v0.18.1 // indirect + github.com/gofrs/flock v0.12.1 // indirect github.com/gogo/protobuf v1.3.2 // indirect github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/golang/protobuf v1.5.4 // indirect @@ -59,40 +67,40 @@ require ( github.com/google/gofuzz v1.2.0 // indirect github.com/google/s2a-go v0.1.8 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect - github.com/googleapis/gax-go/v2 v2.13.0 // indirect github.com/josharian/intern v1.0.0 // indirect github.com/json-iterator/go v1.1.12 // indirect - github.com/klauspost/compress v1.17.9 // indirect + github.com/klauspost/compress v1.17.11 // indirect + github.com/leodido/go-urn v1.4.0 // indirect github.com/mailru/easyjson v0.7.7 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/prometheus/client_model v0.6.1 // indirect - github.com/prometheus/common v0.55.0 // indirect + github.com/prometheus/common v0.60.1 // indirect github.com/prometheus/procfs v0.15.1 // indirect - github.com/sony/gobreaker v0.5.0 // indirect + github.com/sony/gobreaker v1.0.0 // indirect github.com/x448/float16 v0.8.4 // indirect go.opencensus.io v0.24.0 // indirect - go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect - go.opentelemetry.io/otel v1.29.0 // indirect - go.opentelemetry.io/otel/metric v1.29.0 // indirect - go.opentelemetry.io/otel/trace v1.29.0 // indirect - golang.org/x/crypto v0.27.0 // indirect - golang.org/x/net v0.29.0 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 // indirect + go.opentelemetry.io/otel v1.31.0 // indirect + go.opentelemetry.io/otel/metric v1.31.0 // indirect + go.opentelemetry.io/otel/trace v1.31.0 // indirect + golang.org/x/crypto v0.28.0 // indirect + golang.org/x/net v0.30.0 // indirect golang.org/x/oauth2 v0.23.0 // indirect - golang.org/x/sys v0.25.0 // indirect - golang.org/x/term v0.24.0 // indirect - golang.org/x/text v0.18.0 // indirect - golang.org/x/time v0.6.0 // indirect - google.golang.org/genproto v0.0.0-20240903143218-8af14fe29dc1 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1 // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 // indirect + golang.org/x/sys v0.26.0 // indirect + golang.org/x/term v0.25.0 // indirect + golang.org/x/text v0.19.0 // indirect + golang.org/x/time v0.7.0 // indirect + google.golang.org/genproto v0.0.0-20241104194629-dd2ea8efbc28 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect - k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 // indirect - k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect - sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect + k8s.io/kube-openapi v0.0.0-20241009091222-67ed5848f094 // indirect + k8s.io/utils v0.0.0-20241104163129-6fe5fd82f078 // indirect + sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect sigs.k8s.io/yaml v1.4.0 // indirect ) diff --git a/go.sum b/go.sum index 53ef7e0..2a7badc 100644 --- a/go.sum +++ b/go.sum @@ -1,43 +1,43 @@ cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= -cloud.google.com/go v0.115.1 h1:Jo0SM9cQnSkYfp44+v+NQXHpcHqlnRJk2qxh6yvxxxQ= -cloud.google.com/go v0.115.1/go.mod h1:DuujITeaufu3gL68/lOFIirVNJwQeyf5UXyi+Wbgknc= -cloud.google.com/go/auth v0.9.4 h1:DxF7imbEbiFu9+zdKC6cKBko1e8XeJnipNqIbWZ+kDI= -cloud.google.com/go/auth v0.9.4/go.mod h1:SHia8n6//Ya940F1rLimhJCjjx7KE17t0ctFEci3HkA= -cloud.google.com/go/auth/oauth2adapt v0.2.4 h1:0GWE/FUsXhf6C+jAkWgYm7X9tK8cuEIfy19DBn6B6bY= -cloud.google.com/go/auth/oauth2adapt v0.2.4/go.mod h1:jC/jOpwFP6JBxhB3P5Rr0a9HLMC/Pe3eaL4NmdvqPtc= -cloud.google.com/go/compute v1.28.1 h1:XwPcZjgMCnU2tkwY10VleUjSAfpTj9RDn+kGrbYsi8o= -cloud.google.com/go/compute v1.28.1/go.mod h1:b72iXMY4FucVry3NR3Li4kVyyTvbMDE7x5WsqvxjsYk= +cloud.google.com/go v0.116.0 h1:B3fRrSDkLRt5qSHWe40ERJvhvnQwdZiHu0bJOpldweE= +cloud.google.com/go v0.116.0/go.mod h1:cEPSRWPzZEswwdr9BxE6ChEn01dWlTaF05LiC2Xs70U= +cloud.google.com/go/auth v0.10.0 h1:tWlkvFAh+wwTOzXIjrwM64karR1iTBZ/GRr0S/DULYo= +cloud.google.com/go/auth v0.10.0/go.mod h1:xxA5AqpDrvS+Gkmo9RqrGGRh6WSNKKOXhY3zNOr38tI= +cloud.google.com/go/auth/oauth2adapt v0.2.5 h1:2p29+dePqsCHPP1bqDJcKj4qxRyYCcbzKpFyKGt3MTk= +cloud.google.com/go/auth/oauth2adapt v0.2.5/go.mod h1:AlmsELtlEBnaNTL7jCj8VQFLy6mbZv0s4Q7NGBeQ5E8= +cloud.google.com/go/compute v1.28.2 h1:M9Rf+Ii9xQZk+rTBEgaz3J5ZyKs6kSNAgdZQpyBpxVk= +cloud.google.com/go/compute v1.28.2/go.mod h1:HFlsDurE5DpQZClAGf/cYh+gxssMhBxBovZDYkEn/Og= cloud.google.com/go/compute/metadata v0.5.2 h1:UxK4uu/Tn+I3p2dYWTfiX4wva7aYlKixAHn3fyqngqo= cloud.google.com/go/compute/metadata v0.5.2/go.mod h1:C66sj2AluDcIqakBq/M8lw8/ybHgOZqin2obFxa/E5k= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= -github.com/aws/aws-sdk-go-v2 v1.31.0 h1:3V05LbxTSItI5kUqNwhJrrrY1BAXxXt0sN0l72QmG5U= -github.com/aws/aws-sdk-go-v2 v1.31.0/go.mod h1:ztolYtaEUtdpf9Wftr31CJfLVjOnD/CVRkKOOYgF8hA= -github.com/aws/aws-sdk-go-v2/config v1.27.37 h1:xaoIwzHVuRWRHFI0jhgEdEGc8xE1l91KaeRDsWEIncU= -github.com/aws/aws-sdk-go-v2/config v1.27.37/go.mod h1:S2e3ax9/8KnMSyRVNd3sWTKs+1clJ2f1U6nE0lpvQRg= -github.com/aws/aws-sdk-go-v2/credentials v1.17.35 h1:7QknrZhYySEB1lEXJxGAmuD5sWwys5ZXNr4m5oEz0IE= -github.com/aws/aws-sdk-go-v2/credentials v1.17.35/go.mod h1:8Vy4kk7at4aPSmibr7K+nLTzG6qUQAUO4tW49fzUV4E= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.14 h1:C/d03NAmh8C4BZXhuRNboF/DqhBkBCeDiJDcaqIT5pA= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.14/go.mod h1:7I0Ju7p9mCIdlrfS+JCgqcYD0VXz/N4yozsox+0o078= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.18 h1:kYQ3H1u0ANr9KEKlGs/jTLrBFPo8P8NaH/w7A01NeeM= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.18/go.mod h1:r506HmK5JDUh9+Mw4CfGJGSSoqIiLCndAuqXuhbv67Y= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.18 h1:Z7IdFUONvTcvS7YuhtVxN99v2cCoHRXOS4mTr0B/pUc= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.18/go.mod h1:DkKMmksZVVyat+Y+r1dEOgJEfUeA7UngIHWeKsi0yNc= +github.com/aws/aws-sdk-go-v2 v1.32.3 h1:T0dRlFBKcdaUPGNtkBSwHZxrtis8CQU17UpNBZYd0wk= +github.com/aws/aws-sdk-go-v2 v1.32.3/go.mod h1:2SK5n0a2karNTv5tbP1SjsX0uhttou00v/HpXKM1ZUo= +github.com/aws/aws-sdk-go-v2/config v1.28.1 h1:oxIvOUXy8x0U3fR//0eq+RdCKimWI900+SV+10xsCBw= +github.com/aws/aws-sdk-go-v2/config v1.28.1/go.mod h1:bRQcttQJiARbd5JZxw6wG0yIK3eLeSCPdg6uqmmlIiI= +github.com/aws/aws-sdk-go-v2/credentials v1.17.42 h1:sBP0RPjBU4neGpIYyx8mkU2QqLPl5u9cmdTWVzIpHkM= +github.com/aws/aws-sdk-go-v2/credentials v1.17.42/go.mod h1:FwZBfU530dJ26rv9saAbxa9Ej3eF/AK0OAY86k13n4M= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.18 h1:68jFVtt3NulEzojFesM/WVarlFpCaXLKaBxDpzkQ9OQ= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.18/go.mod h1:Fjnn5jQVIo6VyedMc0/EhPpfNlPl7dHV916O6B+49aE= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.22 h1:Jw50LwEkVjuVzE1NzkhNKkBf9cRN7MtE1F/b2cOKTUM= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.22/go.mod h1:Y/SmAyPcOTmpeVaWSzSKiILfXTVJwrGmYZhcRbhWuEY= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.22 h1:981MHwBaRZM7+9QSR6XamDzF/o7ouUGxFzr+nVSIhrs= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.22/go.mod h1:1RA1+aBEfn+CAB/Mh0MB6LsdCYCnjZm7tKXtnk499ZQ= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1 h1:VaRN3TlFdd6KxX1x3ILT5ynH6HvKgqdiXoTxAF4HQcQ= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.1/go.mod h1:FbtygfRFze9usAadmnGJNc8KsP346kEe+y2/oyhGAGc= -github.com/aws/aws-sdk-go-v2/service/ec2 v1.179.0 h1:yCb6SUDqSodc2t8Jqdc35zq9V81a9pyV8SUTBluvA/Q= -github.com/aws/aws-sdk-go-v2/service/ec2 v1.179.0/go.mod h1:W6sNzs5T4VpZn1Vy+FMKw8s24vt5k6zPJXcNOK0asBo= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.5 h1:QFASJGfT8wMXtuP3D5CRmMjARHv9ZmzFUMJznHDOY3w= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.11.5/go.mod h1:QdZ3OmoIjSX+8D1OPAzPxDfjXASbBMDsz9qvtyIhtik= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.20 h1:Xbwbmk44URTiHNx6PNo0ujDE6ERlsCKJD3u1zfnzAPg= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.11.20/go.mod h1:oAfOFzUB14ltPZj1rWwRc3d/6OgD76R8KlvU3EqM9Fg= -github.com/aws/aws-sdk-go-v2/service/sso v1.23.1 h1:2jrVsMHqdLD1+PA4BA6Nh1eZp0Gsy3mFSB5MxDvcJtU= -github.com/aws/aws-sdk-go-v2/service/sso v1.23.1/go.mod h1:XRlMvmad0ZNL+75C5FYdMvbbLkd6qiqz6foR1nA1PXY= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.27.1 h1:0L7yGCg3Hb3YQqnSgBTZM5wepougtL1aEccdcdYhHME= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.27.1/go.mod h1:FnvDM4sfa+isJ3kDXIzAB9GAwVSzFzSy97uZ3IsHo4E= -github.com/aws/aws-sdk-go-v2/service/sts v1.31.1 h1:8K0UNOkZiK9Uh3HIF6Bx0rcNCftqGCeKmOaR7Gp5BSo= -github.com/aws/aws-sdk-go-v2/service/sts v1.31.1/go.mod h1:yMWe0F+XG0DkRZK5ODZhG7BEFYhLXi2dqGsv6tX0cgI= -github.com/aws/smithy-go v1.21.0 h1:H7L8dtDRk0P1Qm6y0ji7MCYMQObJ5R9CRpyPhRUkLYA= -github.com/aws/smithy-go v1.21.0/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= +github.com/aws/aws-sdk-go-v2/service/ec2 v1.187.0 h1:cA4hWo269CN5RY7Arqt8BfzXF0KIN8DSNo/KcqHKkWk= +github.com/aws/aws-sdk-go-v2/service/ec2 v1.187.0/go.mod h1:ossaD9Z1ugYb6sq9QIqQLEOorCGcqUoxlhud9M9yE70= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.0 h1:TToQNkvGguu209puTojY/ozlqy2d/SFNcoLIqTFi42g= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.0/go.mod h1:0jp+ltwkf+SwG2fm/PKo8t4y8pJSgOCO4D8Lz3k0aHQ= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.3 h1:qcxX0JYlgWH3hpPUnd6U0ikcl6LLA9sLkXE2w1fpMvY= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.3/go.mod h1:cLSNEmI45soc+Ef8K/L+8sEA3A3pYFEYf5B5UI+6bH4= +github.com/aws/aws-sdk-go-v2/service/sso v1.24.3 h1:UTpsIf0loCIWEbrqdLb+0RxnTXfWh2vhw4nQmFi4nPc= +github.com/aws/aws-sdk-go-v2/service/sso v1.24.3/go.mod h1:FZ9j3PFHHAR+w0BSEjK955w5YD2UwB/l/H0yAK3MJvI= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.3 h1:2YCmIXv3tmiItw0LlYf6v7gEHebLY45kBEnPezbUKyU= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.28.3/go.mod h1:u19stRyNPxGhj6dRm+Cdgu6N75qnbW7+QN0q0dsAk58= +github.com/aws/aws-sdk-go-v2/service/sts v1.32.3 h1:wVnQ6tigGsRqSWDEEyH6lSAJ9OyFUsSnbaUWChuSGzs= +github.com/aws/aws-sdk-go-v2/service/sts v1.32.3/go.mod h1:VZa9yTFyj4o10YGsmDO4gbQJUvvhY72fhumT8W4LqsE= +github.com/aws/smithy-go v1.22.0 h1:uunKnWlcoL3zO7q+gG2Pk53joueEOsnNB28QdMsmiMM= +github.com/aws/smithy-go v1.22.0/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= @@ -45,13 +45,12 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/emicklei/go-restful/v3 v3.11.0 h1:rAQeMHw1c7zTmncogyy8VvRZwtkmkZ4FxERmMY4rD+g= -github.com/emicklei/go-restful/v3 v3.11.0/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= +github.com/emicklei/go-restful/v3 v3.12.1 h1:PJMDIM/ak7btuL8Ex0iYET9hxM3CI2sjZtzpL63nKAU= +github.com/emicklei/go-restful/v3 v3.12.1/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -60,22 +59,32 @@ github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/fxamacker/cbor/v2 v2.7.0 h1:iM5WgngdRBanHcxugY4JySA0nk1wZorNOpTgCMedv5E= github.com/fxamacker/cbor/v2 v2.7.0/go.mod h1:pxXPTn3joSm21Gbwsv0w9OSA2y1HFR9qXEeXQVeNoDQ= +github.com/gabriel-vasile/mimetype v1.4.6 h1:3+PzJTKLkvgjeTbts6msPJt4DixhT4YtFNf1gtGe3zc= +github.com/gabriel-vasile/mimetype v1.4.6/go.mod h1:JX1qVKqZd40hUPpAfiNTe0Sne7hdfKSbOqqmkq8GCXc= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/go-openapi/jsonpointer v0.19.6 h1:eCs3fxoIi3Wh6vtgmLTOjdhSpiqphQ+DaPn38N2ZdrE= -github.com/go-openapi/jsonpointer v0.19.6/go.mod h1:osyAmYz/mB/C3I+WsTTSgw1ONzaLJoLCyoi6/zppojs= -github.com/go-openapi/jsonreference v0.20.2 h1:3sVjiK66+uXK/6oQ8xgcRKcFgQ5KXa2KvnJRumpMGbE= -github.com/go-openapi/jsonreference v0.20.2/go.mod h1:Bl1zwGIM8/wsvqjsOQLJ/SH+En5Ap4rVB5KVcIDZG2k= -github.com/go-openapi/swag v0.22.3/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= -github.com/go-openapi/swag v0.22.4 h1:QLMzNJnMGPRNDCbySlcj1x01tzU8/9LTTL9hZZZogBU= -github.com/go-openapi/swag v0.22.4/go.mod h1:UzaqsxGiab7freDnrUUra0MwWfN/q7tE4j+VcZ0yl14= +github.com/go-openapi/jsonpointer v0.21.0 h1:YgdVicSA9vH5RiHs9TZW5oyafXZFc6+2Vc1rr/O9oNQ= +github.com/go-openapi/jsonpointer v0.21.0/go.mod h1:IUyH9l/+uyhIYQ/PXVA41Rexl+kOkAPDdXEYns6fzUY= +github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF2+tg1TRrwQ= +github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= +github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+GrE= +github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= +github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= +github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= +github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= +github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= +github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= +github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= +github.com/go-playground/validator/v10 v10.22.1 h1:40JcKH+bBNGFczGuoBYgX4I6m/i27HYW8P9FDk5PbgA= +github.com/go-playground/validator/v10 v10.22.1/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM= github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI= github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= -github.com/gofrs/flock v0.8.1 h1:+gYjHKf32LDeiEEFhQaotPbLuUXjY5ZqxKgXy7n59aw= github.com/gofrs/flock v0.8.1/go.mod h1:F1TvTiK9OcQqauNUHlbJvyl9Qa1QvF/gOUDKA14jxHU= +github.com/gofrs/flock v0.12.1 h1:MTLVXXHf8ekldpJk3AKicLij9MdwOWkZ+a/jHHZby9E= +github.com/gofrs/flock v0.12.1/go.mod h1:9zxTsyu5xtJ9DK+1tFZyibEV7y3uwDxPPfbxeeHCoD0= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= @@ -108,8 +117,8 @@ github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/gofuzz v1.2.0 h1:xRy4A+RhZaiKjJ1bPfwQ8sedCA+YS2YcCHW6ec7JMi0= github.com/google/gofuzz v1.2.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= -github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af h1:kmjWCqn2qkEml422C2Rrd27c3VGxi6a/6HNq8QmHRKM= -github.com/google/pprof v0.0.0-20240525223248-4bfdf5a9a2af/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo= +github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8 h1:FKHo8hFI3A+7w0aUQuYXQ+6EN5stWmeY/AZqtM8xk9k= +github.com/google/pprof v0.0.0-20240727154555-813a5fbdbec8/go.mod h1:K1liHPHnj73Fdn/EKuT8nrFqBihUSKXoLYU0BuatOYo= github.com/google/s2a-go v0.1.8 h1:zZDs9gcbt9ZPLV0ndSyQk6Kacx2g/X+SKYovpnz3SMM= github.com/google/s2a-go v0.1.8/go.mod h1:6iNWHTpQ+nfNRN5E00MSdfDwVesa8hhS32PhPO8deJA= github.com/google/uuid v1.1.2/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= @@ -127,19 +136,20 @@ github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnr github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= -github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA= -github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= +github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= +github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= +github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= +github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -151,26 +161,27 @@ github.com/oklog/run v1.1.0 h1:GEenZ1cK0+q0+wsJew9qUg/DyD8k3JzYsZAi5gYi2mA= github.com/oklog/run v1.1.0/go.mod h1:sVPdnTZT1zYwAJeCMu2Th4T21pA3FPOQRfWjQlk7DVU= github.com/onsi/ginkgo/v2 v2.19.0 h1:9Cnnf7UHo57Hy3k6/m5k3dRfGTMXGvxhHFvkDTCTpvA= github.com/onsi/ginkgo/v2 v2.19.0/go.mod h1:rlwLi9PilAFJ8jCg9UE1QP6VBpd6/xj3SRC0d6TU0To= -github.com/onsi/gomega v1.19.0 h1:4ieX6qQjPP/BfC3mpsAtIGGlxTWPeA3Inl/7DtXw1tw= -github.com/onsi/gomega v1.19.0/go.mod h1:LY+I3pBVzYsTBU1AnDwOSxaYi9WoWiqgwooUqq9yPro= -github.com/oracle/oci-go-sdk/v65 v65.75.0 h1:tifYRSqCjxANJb0xnMSZ6N2bF2xGyqcCIMg7xihgk+s= -github.com/oracle/oci-go-sdk/v65 v65.75.0/go.mod h1:IBEV9l1qBzUpo7zgGaRUhbB05BVfcDGYRFBCPlTcPp0= +github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= +github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0= +github.com/oracle/oci-go-sdk/v65 v65.78.0 h1:iM7lFFA7cJkUD4tmrlsAHWgL3HuTuF9mdvTAliMkcFA= +github.com/oracle/oci-go-sdk/v65 v65.78.0/go.mod h1:IBEV9l1qBzUpo7zgGaRUhbB05BVfcDGYRFBCPlTcPp0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/prometheus/client_golang v1.20.4 h1:Tgh3Yr67PaOv/uTqloMsCEdeuFTatm5zIq5+qNN23vI= -github.com/prometheus/client_golang v1.20.4/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= +github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y= +github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E= github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY= -github.com/prometheus/common v0.55.0 h1:KEi6DK7lXW/m7Ig5i47x0vRzuBsHuvJdi5ee6Y3G1dc= -github.com/prometheus/common v0.55.0/go.mod h1:2SECS4xJG1kd8XF9IcM1gMX6510RAEL65zxzNImwdc8= +github.com/prometheus/common v0.60.1 h1:FUas6GcOw66yB/73KC+BOZoFJmbo/1pojoILArPAaSc= +github.com/prometheus/common v0.60.1/go.mod h1:h0LYf1R1deLSKtD4Vdg8gy4RuOvENW2J/h19V5NADQw= github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc= github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk= github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= -github.com/sony/gobreaker v0.5.0 h1:dRCvqm0P490vZPmy7ppEk2qCnCieBooFJ+YoXGYB+yg= github.com/sony/gobreaker v0.5.0/go.mod h1:ZKptC7FHNvhBz7dN2LGjPVBz2sZJmc0/PkyDJOjmxWY= +github.com/sony/gobreaker v1.0.0 h1:feX5fGGXSl3dYd4aHZItw+FpHLvvoaqkawKjVNiFMNQ= +github.com/sony/gobreaker v1.0.0/go.mod h1:ZKptC7FHNvhBz7dN2LGjPVBz2sZJmc0/PkyDJOjmxWY= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -191,24 +202,24 @@ github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 h1:TT4fX+nBOA/+LUkobKGW1ydGcn+G3vRw9+g5HwCphpk= -go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0/go.mod h1:L7UH0GbB0p47T4Rri3uHjbpCFYrVrwc1I25QhNPiGK8= -go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw= -go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8= -go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc= -go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0 h1:UP6IpuHFkUgOQL9FFQFrZ+5LiwhhYRbi7VZSIx6Nj5s= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.56.0/go.mod h1:qxuZLtbq5QDtdeSHsS7bcf6EH6uO6jUAgk764zd3rhM= +go.opentelemetry.io/otel v1.31.0 h1:NsJcKPIW0D0H3NgzPDHmo0WW6SptzPdqg/L1zsIm2hY= +go.opentelemetry.io/otel v1.31.0/go.mod h1:O0C14Yl9FgkjqcCZAsE053C13OaddMYr/hz6clDkEJE= +go.opentelemetry.io/otel/metric v1.31.0 h1:FSErL0ATQAmYHUIzSezZibnyVlft1ybhy4ozRPcF2fE= +go.opentelemetry.io/otel/metric v1.31.0/go.mod h1:C3dEloVbLuYoX41KpmAhOqNriGbA+qqH6PQ5E5mUfnY= go.opentelemetry.io/otel/sdk v1.29.0 h1:vkqKjk7gwhS8VaWb0POZKmIEDimRCMsopNYnriHyryo= go.opentelemetry.io/otel/sdk v1.29.0/go.mod h1:pM8Dx5WKnvxLCb+8lG1PRNIDxu9g9b9g59Qr7hfAAok= -go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4= -go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ= +go.opentelemetry.io/otel/trace v1.31.0 h1:ffjsj1aRouKewfr85U2aGagJ46+MvodynlQ1HYdmJys= +go.opentelemetry.io/otel/trace v1.31.0/go.mod h1:TXZkRk7SM2ZQLtR6eoAWQFIHPvzQ06FJAsO1tJg480A= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.27.0 h1:GXm2NjJrPaiv/h1tb2UH8QfgC/hOf/+z0p6PT8o1w7A= -golang.org/x/crypto v0.27.0/go.mod h1:1Xngt8kV6Dvbssa53Ziq6Eqn0HqbZi5Z6R0ZpwQzt70= +golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= +golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20240909161429-701f63a606c0 h1:e66Fs6Z+fZTbFBAxKfP3PALWBtpfqks2bwGcexMxgtk= -golang.org/x/exp v0.0.0-20240909161429-701f63a606c0/go.mod h1:2TbTHSBQa924w8M6Xs1QcRcFwyucIwBGpK1p2f1YFFY= +golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c h1:7dEasQXItcW1xKJ2+gg5VOiBnqWrJc+rq0DPKyvvdbY= +golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c/go.mod h1:NQtJDoLvd6faHhE7m4T/1IY708gDefGGjR/iUW8yQQ8= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= @@ -223,8 +234,8 @@ golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.29.0 h1:5ORfpBpCs4HzDYoodCDBbwHzdR5UrLBZ3sOnUJmFoHo= -golang.org/x/net v0.29.0/go.mod h1:gLkgy8jTGERgjzMic6DS9+SP0ajcu6Xu3Orq/SpETg0= +golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= +golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs= golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= @@ -240,16 +251,16 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.25.0 h1:r+8e+loiHxRqhXVl6ML1nO3l1+oFoWbnlu2Ehimmi34= -golang.org/x/sys v0.25.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM= -golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8= +golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= +golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.25.0 h1:WtHI/ltw4NvSUig5KARz9h521QvRC8RmF/cuYqifU24= +golang.org/x/term v0.25.0/go.mod h1:RPyXicDX+6vLxogjjRxjgD2TKtmAO6NZBsBRfrOLu7M= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.18.0 h1:XvMDiNzPAl0jr17s6W9lcaIhGUfUORdGCNsuLmPG224= -golang.org/x/text v0.18.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= -golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U= -golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= +golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= +golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ= +golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= @@ -258,32 +269,32 @@ golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBn golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/tools v0.25.0 h1:oFU9pkj/iJgs+0DT+VMHrx+oBKs/LJMV+Uvg78sl+fE= -golang.org/x/tools v0.25.0/go.mod h1:/vtpO8WL1N9cQC3FN5zPqb//fRXskFHbLKk4OW1Q7rg= +golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= +golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/api v0.198.0 h1:OOH5fZatk57iN0A7tjJQzt6aPfYQ1JiWkt1yGseazks= -google.golang.org/api v0.198.0/go.mod h1:/Lblzl3/Xqqk9hw/yS97TImKTUwnf1bv89v7+OagJzc= +google.golang.org/api v0.204.0 h1:3PjmQQEDkR/ENVZZwIYB4W/KzYtN8OrqnNcHWpeR8E4= +google.golang.org/api v0.204.0/go.mod h1:69y8QSoKIbL9F94bWgWAq6wGqGwyjBgi2y8rAK8zLag= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20240903143218-8af14fe29dc1 h1:BulPr26Jqjnd4eYDVe+YvyR7Yc2vJGkO5/0UxD0/jZU= -google.golang.org/genproto v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:hL97c3SYopEHblzpxRL4lSs523++l8DYxGM1FQiYmb4= -google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1 h1:hjSy6tcFQZ171igDaN5QHOw2n6vx40juYbC/x67CEhc= -google.golang.org/genproto/googleapis/api v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:qpvKtACPCQhAdu3PyQgV4l3LMXZEtft7y8QcarRsp9I= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1 h1:pPJltXNxVzT4pK9yD8vR9X75DaWYYmLGMsEvBfFQZzQ= -google.golang.org/genproto/googleapis/rpc v0.0.0-20240903143218-8af14fe29dc1/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU= +google.golang.org/genproto v0.0.0-20241104194629-dd2ea8efbc28 h1:KJjNNclfpIkVqrZlTWcgOOaVQ00LdBnoEaRfkUx760s= +google.golang.org/genproto v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:mt9/MofW7AWQ+Gy179ChOnvmJatV8YHUmrcedo9CIFI= +google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 h1:M0KvPgPmDZHPlbRbaNU1APr28TvwvvdUPlSv7PUvy8g= +google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:dguCy7UOdZhTvLzDyt15+rOrawrpM4q7DD9dQ1P11P4= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 h1:XVhgTWWV3kGQlwJHR3upFWZeTsei6Oks1apkZSeonIE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.67.0 h1:IdH9y6PF5MPSdAntIcpjQ+tXO41pcQsfZV2RxtQgVcw= -google.golang.org/grpc v1.67.0/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= +google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E= +google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= @@ -293,8 +304,8 @@ google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2 google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= -google.golang.org/protobuf v1.34.2 h1:6xV6lTsCfpGD21XK49h7MhtcApnLqkfYgPcdHftf6hg= -google.golang.org/protobuf v1.34.2/go.mod h1:qYOHts0dSfpeUzUFpOMr/WGzszTmLH+DiWniOlNbLDw= +google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= +google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= @@ -308,20 +319,20 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= -k8s.io/api v0.31.1 h1:Xe1hX/fPW3PXYYv8BlozYqw63ytA92snr96zMW9gWTU= -k8s.io/api v0.31.1/go.mod h1:sbN1g6eY6XVLeqNsZGLnI5FwVseTrZX7Fv3O26rhAaI= -k8s.io/apimachinery v0.31.1 h1:mhcUBbj7KUjaVhyXILglcVjuS4nYXiwC+KKFBgIVy7U= -k8s.io/apimachinery v0.31.1/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= -k8s.io/client-go v0.31.1 h1:f0ugtWSbWpxHR7sjVpQwuvw9a3ZKLXX0u0itkFXufb0= -k8s.io/client-go v0.31.1/go.mod h1:sKI8871MJN2OyeqRlmA4W4KM9KBdBUpDLu/43eGemCg= +k8s.io/api v0.31.2 h1:3wLBbL5Uom/8Zy98GRPXpJ254nEFpl+hwndmk9RwmL0= +k8s.io/api v0.31.2/go.mod h1:bWmGvrGPssSK1ljmLzd3pwCQ9MgoTsRCuK35u6SygUk= +k8s.io/apimachinery v0.31.2 h1:i4vUt2hPK56W6mlT7Ry+AO8eEsyxMD1U44NR22CLTYw= +k8s.io/apimachinery v0.31.2/go.mod h1:rsPdaZJfTfLsNJSQzNHQvYoTmxhoOEofxtOsF3rtsMo= +k8s.io/client-go v0.31.2 h1:Y2F4dxU5d3AQj+ybwSMqQnpZH9F30//1ObxOKlTI9yc= +k8s.io/client-go v0.31.2/go.mod h1:NPa74jSVR/+eez2dFsEIHNa+3o09vtNaWwWwb1qSxSs= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= -k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340 h1:BZqlfIlq5YbRMFko6/PM7FjZpUb45WallggurYhKGag= -k8s.io/kube-openapi v0.0.0-20240228011516-70dd3763d340/go.mod h1:yD4MZYeKMBwQKVht279WycxKyM84kkAx2DPrTXaeb98= -k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 h1:pUdcCO1Lk/tbT5ztQWOBi5HBgbBP1J8+AsQnQCKsi8A= -k8s.io/utils v0.0.0-20240711033017-18e509b52bc8/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= -sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd h1:EDPBXCAspyGV4jQlpZSudPeMmr1bNJefnuqLsRAsHZo= -sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd/go.mod h1:B8JuhiUyNFVKdsE8h686QcCxMaH6HrOAZj4vswFpcB0= +k8s.io/kube-openapi v0.0.0-20241009091222-67ed5848f094 h1:MErs8YA0abvOqJ8gIupA1Tz6PKXYUw34XsGlA7uSL1k= +k8s.io/kube-openapi v0.0.0-20241009091222-67ed5848f094/go.mod h1:7ioBJr1A6igWjsR2fxq2EZ0mlMwYLejazSIc2bzMp2U= +k8s.io/utils v0.0.0-20241104163129-6fe5fd82f078 h1:jGnCPejIetjiy2gqaJ5V0NLwTpF4wbQ6cZIItJCSHno= +k8s.io/utils v0.0.0-20241104163129-6fe5fd82f078/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= +sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+sGiqlzvrtq4= sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= diff --git a/internal/component/component.go b/internal/component/component.go new file mode 100644 index 0000000..3e8e9be --- /dev/null +++ b/internal/component/component.go @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package component + +import ( + "context" +) + +type ( + // NamedLoader returns a name/loader pair for a component + // that is used to add to an instance of `Registry`. + NamedLoader[T, C any] func() (string, Loader[T, C]) + // Loader returns a component of type `T` for + // the configuration `config` of type `C`. + Loader[T, C any] func(ctx context.Context, config C) (T, error) + // Registry is a simple map of name to `Loader` so that + // component loaders can be looked up by name. + Registry[T, C any] map[string]Loader[T, C] +) + +// Named is a shorthand wrapper around creating a dynamically named +// component. +func Named[T, C any](name string, loader Loader[T, C]) NamedLoader[T, C] { + return func() (string, Loader[T, C]) { + return name, loader + } +} + +// NewRegistry returns a pre-populated `Registry` based on the provided +// `namedLoaders`. +func NewRegistry[T, C any](namedLoaders ...NamedLoader[T, C]) Registry[T, C] { + r := make(Registry[T, C], len(namedLoaders)) + r.Register(namedLoaders...) + return r +} + +// Register adds name/loader pairs to an existing `Registry` +// by calling each of the `namedLoaders`. +func (r Registry[T, C]) Register(namedLoaders ...NamedLoader[T, C]) { + for _, l := range namedLoaders { + name, loader := l() + r[name] = loader + } +} diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..fbcff5f --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package config + +import ( + "fmt" + "reflect" + "strconv" + "time" + + "github.com/go-playground/validator/v10" + "github.com/mitchellh/mapstructure" +) + +// Use a single instance of Validate, it caches struct info. +var validate = validator.New() + +var ( + typeDuration = reflect.TypeOf(time.Duration(5)) // nolint: gochecknoglobals + typeTime = reflect.TypeOf(time.Time{}) // nolint: gochecknoglobals + typeStringDecoder = reflect.TypeOf((*StringDecoder)(nil)).Elem() // nolint: gochecknoglobals + typeFromStringer = reflect.TypeOf((*FromStringer)(nil)).Elem() // nolint: gochecknoglobals +) + +// StringDecoder is used as a way for custom types (or alias types) to +// override the basic decoding function in the `decodeString` +// DecodeHook. `encoding.TextMashaller` was not used because it +// matches many Go types and would have potentially unexpected results. +// Specifying a custom decoding func should be very intentional. +type StringDecoder interface { + DecodeString(value string) error +} + +type FromStringer interface { + FromString(str string) error +} + +// Decode decodes generic map values from `input` to `output`, while providing helpful error information. +// `output` must be a pointer to a Go struct that contains `mapstructure` struct tags on fields that should +// be decoded. This function is useful when decoding values from configuration files parsed as +// `map[string]any` or component metadata as `map[string]string`. +// +// Most of the heavy lifting is handled by the mapstructure library. A custom decoder is used to handle +// decoding string values to the supported primitives. +func Decode(input any, output any) error { + decoder, err := mapstructure.NewDecoder(&mapstructure.DecoderConfig{ + Result: output, + ErrorUnused: true, + DecodeHook: decodeHook, + }) + if err != nil { + return fmt.Errorf("could not create decoder: %w", err) + } + + if err = decoder.Decode(input); err != nil { + return fmt.Errorf("could not decode configuration: %w", err) + } + + if err = validate.Struct(output); err != nil { + return fmt.Errorf("invalidation configuration: %w", err) + } + + return nil +} + +func decodeHook( + f reflect.Type, + t reflect.Type, + data any) (any, error) { + if t.Kind() == reflect.String && f.Kind() != reflect.String { + return fmt.Sprintf("%v", data), nil + } + if f.Kind() == reflect.Ptr { + f = f.Elem() + data = reflect.ValueOf(data).Elem().Interface() + } + if f.Kind() != reflect.String { + return data, nil + } + + dataString := data.(string) + + var result any + var decoder StringDecoder + var from FromStringer + + if t.Implements(typeStringDecoder) { + result = reflect.New(t.Elem()).Interface() + decoder = result.(StringDecoder) + } else if reflect.PointerTo(t).Implements(typeStringDecoder) { + result = reflect.New(t).Interface() + decoder = result.(StringDecoder) + } + + if t.Implements(typeFromStringer) { + result = reflect.New(t.Elem()).Interface() + from = result.(FromStringer) + } else if reflect.PointerTo(t).Implements(typeFromStringer) { + result = reflect.New(t).Interface() + from = result.(FromStringer) + } + + if decoder != nil || from != nil { + if dataString == "" { + return nil, nil + } + var err error + if decoder != nil { + err = decoder.DecodeString(dataString) + } else if from != nil { + err = from.FromString(dataString) + } + if t.Kind() == reflect.Ptr { + t = t.Elem() + } + if err != nil { + return nil, fmt.Errorf("invalid %s %q: %w", t.Name(), dataString, err) + } + + return result, nil + } + + switch t { + case typeDuration: + return DecodeDuration(dataString) + case typeTime: + return DecodeTime(dataString) + } + + return decodeOther(t, data, dataString) +} + +func DecodeDuration(dataString string) (time.Duration, error) { + if val, err := strconv.Atoi(dataString); err == nil { + return time.Duration(val) * time.Millisecond, nil + } + + // Convert it by parsing + d, err := time.ParseDuration(dataString) + + return d, invalidError(err, "duration", dataString) +} + +func DecodeTime(dataString string) (time.Time, error) { + // Convert it by parsing + t, err := time.Parse(time.RFC3339Nano, dataString) + if err == nil { + return t, nil + } + t, err = time.Parse(time.RFC3339, dataString) + + return t, invalidError(err, "time", dataString) +} + +func decodeOther(t reflect.Type, + data any, dataString string) (any, error) { + switch t.Kind() { // nolint: exhaustive + case reflect.Uint: + val, err := strconv.ParseUint(dataString, 10, 64) + + return uint(val), invalidError(err, "uint", dataString) + case reflect.Uint64: + val, err := strconv.ParseUint(dataString, 10, 64) + + return val, invalidError(err, "uint64", dataString) + case reflect.Uint32: + val, err := strconv.ParseUint(dataString, 10, 32) + + return uint32(val), invalidError(err, "uint32", dataString) + case reflect.Uint16: + val, err := strconv.ParseUint(dataString, 10, 16) + + return uint16(val), invalidError(err, "uint16", dataString) + case reflect.Uint8: + val, err := strconv.ParseUint(dataString, 10, 8) + + return uint8(val), invalidError(err, "uint8", dataString) + + case reflect.Int: + val, err := strconv.ParseInt(dataString, 10, 64) + + return int(val), invalidError(err, "int", dataString) + case reflect.Int64: + val, err := strconv.ParseInt(dataString, 10, 64) + + return val, invalidError(err, "int64", dataString) + case reflect.Int32: + val, err := strconv.ParseInt(dataString, 10, 32) + + return int32(val), invalidError(err, "int32", dataString) + case reflect.Int16: + val, err := strconv.ParseInt(dataString, 10, 16) + + return int16(val), invalidError(err, "int16", dataString) + case reflect.Int8: + val, err := strconv.ParseInt(dataString, 10, 8) + + return int8(val), invalidError(err, "int8", dataString) + + case reflect.Float32: + val, err := strconv.ParseFloat(dataString, 32) + + return float32(val), invalidError(err, "float32", dataString) + case reflect.Float64: + val, err := strconv.ParseFloat(dataString, 64) + + return val, invalidError(err, "float64", dataString) + + case reflect.Bool: + val, err := strconv.ParseBool(dataString) + + return val, invalidError(err, "bool", dataString) + + default: + return data, nil + } +} + +func invalidError(err error, msg, value string) error { + if err == nil { + return nil + } + + return fmt.Errorf("invalid %s %q", msg, value) +} diff --git a/pkg/utils/exec.go b/internal/exec/exec.go similarity index 99% rename from pkg/utils/exec.go rename to internal/exec/exec.go index 352b2d7..802e574 100644 --- a/pkg/utils/exec.go +++ b/internal/exec/exec.go @@ -14,7 +14,7 @@ * limitations under the License. */ -package utils +package exec import ( "bytes" diff --git a/pkg/utils/utils.go b/internal/files/utils.go similarity index 90% rename from pkg/utils/utils.go rename to internal/files/utils.go index 49cd957..eb0da77 100644 --- a/pkg/utils/utils.go +++ b/internal/files/utils.go @@ -14,14 +14,14 @@ * limitations under the License. */ -package utils +package files import ( "fmt" "os" ) -func ValidateFile(name, description string) error { +func Validate(name, description string) error { if len(name) == 0 { return fmt.Errorf("missing filename for %s", description) } @@ -31,7 +31,7 @@ func ValidateFile(name, description string) error { return nil } -func CreateFile(path string, data []byte) error { +func Create(path string, data []byte) error { file, err := os.Create(path) if err != nil { return fmt.Errorf("failed to create %q: %v", path, err) diff --git a/pkg/utils/utils_test.go b/internal/files/utils_test.go similarity index 93% rename from pkg/utils/utils_test.go rename to internal/files/utils_test.go index 95df540..67cc925 100644 --- a/pkg/utils/utils_test.go +++ b/internal/files/utils_test.go @@ -14,13 +14,15 @@ * limitations under the License. */ -package utils +package files_test import ( "os" "testing" "github.com/stretchr/testify/require" + + "github.com/NVIDIA/topograph/internal/files" ) func TestValidateFile(t *testing.T) { @@ -59,7 +61,7 @@ func TestValidateFile(t *testing.T) { defer func() { _ = f.Close() }() tc.fname = f.Name() } - err := ValidateFile(tc.fname, tc.descr) + err := files.Validate(tc.fname, tc.descr) if len(tc.err) != 0 { require.EqualError(t, err, tc.err) } else { diff --git a/pkg/utils/http.go b/internal/httpreq/httpreq.go similarity index 82% rename from pkg/utils/http.go rename to internal/httpreq/httpreq.go index 185fa16..10d9387 100644 --- a/pkg/utils/http.go +++ b/internal/httpreq/httpreq.go @@ -14,7 +14,7 @@ * limitations under the License. */ -package utils +package httpreq import ( "fmt" @@ -40,10 +40,10 @@ var ( } ) -type HttpRequestFunc func() (*http.Request, error) +type RequestFunc func() (*http.Request, error) -// HttpRequest sends HTTP requests and returns HTTP response -func HttpRequest(f HttpRequestFunc) (*http.Response, []byte, error) { +// DoRequest sends HTTP requests and returns HTTP response +func DoRequest(f RequestFunc) (*http.Response, []byte, error) { req, err := f() if err != nil { return nil, nil, err @@ -68,11 +68,11 @@ func HttpRequest(f HttpRequestFunc) (*http.Response, []byte, error) { return resp, body, fmt.Errorf("HTTP %d %s: %s", resp.StatusCode, resp.Status, string(body)) } -// HttpRequestWithRetries sends HTTP requests and returns HTTP response; retries if needed -func HttpRequestWithRetries(f HttpRequestFunc) (resp *http.Response, body []byte, err error) { +// DoRequestWithRetries sends HTTP requests and returns HTTP response; retries if needed +func DoRequestWithRetries(f RequestFunc) (resp *http.Response, body []byte, err error) { klog.V(4).Infof("Sending HTTP request with retries") for r := 1; r <= retries; r++ { - resp, body, err = HttpRequest(f) + resp, body, err = DoRequest(f) if err == nil || !retryHttpCodes[resp.StatusCode] { break } diff --git a/pkg/config/config.go b/pkg/config/config.go index 930073c..97f1dae 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -25,8 +25,8 @@ import ( "gopkg.in/yaml.v3" "k8s.io/klog/v2" - "github.com/NVIDIA/topograph/pkg/common" - "github.com/NVIDIA/topograph/pkg/utils" + "github.com/NVIDIA/topograph/internal/files" + "github.com/NVIDIA/topograph/pkg/registry" ) type Config struct { @@ -79,18 +79,18 @@ func (cfg *Config) validate() error { return fmt.Errorf("port is not set") } - switch cfg.Provider { - case common.ProviderAWS, common.ProviderOCI, common.ProviderGCP, common.ProviderCW, common.ProviderBM, common.ProviderTest, "": - //nop - default: - return fmt.Errorf("unsupported provider %s", cfg.Provider) + if cfg.Provider != "" { + _, ok := registry.Providers[cfg.Provider] + if !ok { + return fmt.Errorf("unsupported provider %s", cfg.Provider) + } } - switch cfg.Engine { - case common.EngineK8S, common.EngineSLURM, common.EngineTest, "": - //nop - default: - return fmt.Errorf("unsupported engine %s", cfg.Engine) + if cfg.Engine != "" { + _, ok := registry.Engines[cfg.Engine] + if !ok { + return fmt.Errorf("unsupported engine %s", cfg.Engine) + } } if cfg.RequestAggregationDelay == 0 { @@ -101,13 +101,13 @@ func (cfg *Config) validate() error { if cfg.SSL == nil { return fmt.Errorf("missing ssl section") } - if err := utils.ValidateFile(cfg.SSL.Cert, "server certificate"); err != nil { + if err := files.Validate(cfg.SSL.Cert, "server certificate"); err != nil { return err } - if err := utils.ValidateFile(cfg.SSL.Key, "server key"); err != nil { + if err := files.Validate(cfg.SSL.Key, "server key"); err != nil { return err } - if err := utils.ValidateFile(cfg.SSL.CaCert, "CA certificate"); err != nil { + if err := files.Validate(cfg.SSL.CaCert, "CA certificate"); err != nil { return err } } @@ -135,7 +135,7 @@ func (cfg *Config) readCredentials() error { if cfg.CredsPath == nil { return nil } - if err := utils.ValidateFile(*cfg.CredsPath, "API credentials"); err != nil { + if err := files.Validate(*cfg.CredsPath, "API credentials"); err != nil { return err } diff --git a/pkg/engines/engines.go b/pkg/engines/engines.go new file mode 100644 index 0000000..fcc0166 --- /dev/null +++ b/pkg/engines/engines.go @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package engines + +import ( + "context" + "errors" + "fmt" + + "github.com/NVIDIA/topograph/internal/component" + "github.com/NVIDIA/topograph/pkg/topology" +) + +const ( + // TODO: Try to remove this + EngineSLURM = "slurm" +) + +type Engine interface { + GetComputeInstances(ctx context.Context, environment Environment) ([]topology.ComputeInstances, error) + GenerateOutput(ctx context.Context, vertex *topology.Vertex, params map[string]any) ([]byte, error) +} + +type Environment interface{} + +type Config = struct{} +type NamedLoader = component.NamedLoader[Engine, Config] +type Loader = component.Loader[Engine, Config] +type Registry component.Registry[Engine, Config] + +var ErrUnsupportedEngine = errors.New("unsupported engine") + +func NewRegistry(namedLoaders ...NamedLoader) Registry { + return Registry(component.NewRegistry(namedLoaders...)) +} + +func (r Registry) Get(name string) (Loader, error) { + loader, ok := r[name] + if !ok { + return nil, fmt.Errorf("unsupported engine %q, %w", name, ErrUnsupportedEngine) + } + + return loader, nil +} diff --git a/pkg/engines/k8s/engine.go b/pkg/engines/k8s/engine.go index 897b3eb..79a017a 100644 --- a/pkg/engines/k8s/engine.go +++ b/pkg/engines/k8s/engine.go @@ -20,18 +20,42 @@ import ( "bytes" "context" + k8s_core_v1 "k8s.io/api/core/v1" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" - "github.com/NVIDIA/topograph/pkg/common" + "github.com/NVIDIA/topograph/internal/config" + "github.com/NVIDIA/topograph/pkg/engines" + "github.com/NVIDIA/topograph/pkg/topology" "github.com/NVIDIA/topograph/pkg/translate" ) +const NAME = "k8s" + type K8sEngine struct { kubeClient *kubernetes.Clientset } -func GetK8sEngine() (*K8sEngine, error) { +type Params struct { + TopoConfigPath string `mapstructure:"topology_config_path"` + TopoConfigmapName string `mapstructure:"topology_configmap_name"` + TopoConfigmapNamespace string `mapstructure:"topology_configmap_namespace"` +} + +type k8sNodeInfo interface { + GetNodeRegion(node *k8s_core_v1.Node) (string, error) + GetNodeInstance(node *k8s_core_v1.Node) (string, error) +} + +func NamedLoader() (string, engines.Loader) { + return NAME, Loader +} + +func Loader(ctx context.Context, config engines.Config) (engines.Engine, error) { + return New() +} + +func New() (*K8sEngine, error) { config, err := rest.InClusterConfig() if err != nil { return nil, err @@ -45,7 +69,7 @@ func GetK8sEngine() (*K8sEngine, error) { return &K8sEngine{kubeClient: kubeClient}, nil } -func (eng *K8sEngine) GenerateOutput(ctx context.Context, tree *common.Vertex, params map[string]string) ([]byte, error) { +func (eng *K8sEngine) GenerateOutput(ctx context.Context, tree *topology.Vertex, params map[string]any) ([]byte, error) { if err := NewTopologyLabeler().ApplyNodeLabels(ctx, tree, eng); err != nil { return nil, err } @@ -55,11 +79,16 @@ func (eng *K8sEngine) GenerateOutput(ctx context.Context, tree *common.Vertex, p return nil, err } + var p Params + if err := config.Decode(params, &p); err != nil { + return nil, err + } + cfg := buf.Bytes() - filename := params[common.KeyTopoConfigPath] - cmName := params[common.KeyTopoConfigmapName] - cmNamespace := params[common.KeyTopoConfigmapNamespace] + filename := p.TopoConfigPath + cmName := p.TopoConfigmapName + cmNamespace := p.TopoConfigmapNamespace err = eng.UpdateTopologyConfigmap(ctx, cmName, cmNamespace, map[string]string{filename: string(cfg)}) if err != nil { return nil, err diff --git a/pkg/engines/k8s/kubernetes.go b/pkg/engines/k8s/kubernetes.go index 1168b1e..29ef767 100644 --- a/pkg/engines/k8s/kubernetes.go +++ b/pkg/engines/k8s/kubernetes.go @@ -18,6 +18,7 @@ package k8s import ( "context" + std_errors "errors" "fmt" v1 "k8s.io/api/core/v1" @@ -25,20 +26,33 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/klog/v2" - "github.com/NVIDIA/topograph/pkg/common" + "github.com/NVIDIA/topograph/pkg/engines" + "github.com/NVIDIA/topograph/pkg/topology" ) -type attr func(*v1.Node) string +var ErrEnvironmentUnsupported = std_errors.New("environment must implement k8sNodeInfo") + +func (eng *K8sEngine) GetComputeInstances(ctx context.Context, environment engines.Environment) ([]topology.ComputeInstances, error) { + k8sNodeInfo, ok := environment.(k8sNodeInfo) + if !ok { + return nil, ErrEnvironmentUnsupported + } -func (eng *K8sEngine) GetComputeInstances(ctx context.Context, getRegion, getInstance attr) ([]common.ComputeInstances, error) { nodeList, err := eng.kubeClient.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) if err != nil { return nil, fmt.Errorf("unable to list node in the cluster: %v", err) } + regions := make(map[string]map[string]string) for _, n := range nodeList.Items { - region := getRegion(&n) - instance := getInstance(&n) + region, err := k8sNodeInfo.GetNodeRegion(&n) + if err != nil { + return nil, err + } + instance, err := k8sNodeInfo.GetNodeInstance(&n) + if err != nil { + return nil, err + } _, ok := regions[region] if !ok { @@ -47,10 +61,11 @@ func (eng *K8sEngine) GetComputeInstances(ctx context.Context, getRegion, getIns regions[region][instance] = n.Name } - cis := make([]common.ComputeInstances, 0, len(regions)) + cis := make([]topology.ComputeInstances, 0, len(regions)) for region, nodes := range regions { - cis = append(cis, common.ComputeInstances{Region: region, Instances: nodes}) + cis = append(cis, topology.ComputeInstances{Region: region, Instances: nodes}) } + return cis, nil } diff --git a/pkg/engines/k8s/labeler.go b/pkg/engines/k8s/labeler.go index ebd81be..056a41f 100644 --- a/pkg/engines/k8s/labeler.go +++ b/pkg/engines/k8s/labeler.go @@ -21,7 +21,7 @@ import ( "fmt" "hash/fnv" - "github.com/NVIDIA/topograph/pkg/common" + "github.com/NVIDIA/topograph/pkg/topology" ) type Labeler interface { @@ -38,7 +38,7 @@ func NewTopologyLabeler() *topologyLabeler { } } -func (l *topologyLabeler) ApplyNodeLabels(ctx context.Context, v *common.Vertex, labeler Labeler) error { +func (l *topologyLabeler) ApplyNodeLabels(ctx context.Context, v *topology.Vertex, labeler Labeler) error { if v == nil { return nil } @@ -50,7 +50,7 @@ func (l *topologyLabeler) ApplyNodeLabels(ctx context.Context, v *common.Vertex, return l.applyNodeLabels(ctx, v, labeler, levels) } -func (l *topologyLabeler) applyNodeLabels(ctx context.Context, v *common.Vertex, labeler Labeler, levels []string) error { +func (l *topologyLabeler) applyNodeLabels(ctx context.Context, v *topology.Vertex, labeler Labeler, levels []string) error { if len(v.Vertices) == 0 { // compute node if len(levels) != 0 { if v.ID != levels[0] { diff --git a/pkg/engines/slurm/slurm.go b/pkg/engines/slurm/slurm.go index 3d58f4c..3fed155 100644 --- a/pkg/engines/slurm/slurm.go +++ b/pkg/engines/slurm/slurm.go @@ -20,14 +20,18 @@ import ( "bufio" "bytes" "context" + "errors" "fmt" "strings" "k8s.io/klog/v2" - "github.com/NVIDIA/topograph/pkg/common" + "github.com/NVIDIA/topograph/internal/config" + "github.com/NVIDIA/topograph/internal/exec" + "github.com/NVIDIA/topograph/internal/files" + "github.com/NVIDIA/topograph/pkg/engines" + "github.com/NVIDIA/topograph/pkg/topology" "github.com/NVIDIA/topograph/pkg/translate" - "github.com/NVIDIA/topograph/pkg/utils" ) const TopologyHeader = ` @@ -37,13 +41,69 @@ const TopologyHeader = ` ############################################################### ` +const NAME = "slurm" + type SlurmEngine struct{} +type Params struct { + Plugin string `mapstructure:"plugin"` + TopoConfigPath string `mapstructure:"topology_config_path"` + BlockSizes string `mapstructure:"block_sizes"` + SkipReload string `mapstructure:"skip_reload"` // TODO: Should this be a bool +} + +type instanceMapper interface { + Instances2NodeMap(ctx context.Context, nodes []string) (map[string]string, error) + GetComputeInstancesRegion() (string, error) +} + +var ErrEnvironmentUnsupported = errors.New("environment must implement instanceMapper") + +func NamedLoader() (string, engines.Loader) { + return NAME, Loader +} + +func Loader(ctx context.Context, config engines.Config) (engines.Engine, error) { + return New() +} + +func New() (*SlurmEngine, error) { + return &SlurmEngine{}, nil +} + +func (eng *SlurmEngine) GetComputeInstances(ctx context.Context, environment engines.Environment) ([]topology.ComputeInstances, error) { + instanceMapper, ok := environment.(instanceMapper) + if !ok { + return nil, ErrEnvironmentUnsupported + } + + nodes, err := GetNodeList(ctx) + if err != nil { + return nil, err + } + + i2n, err := instanceMapper.Instances2NodeMap(ctx, nodes) + if err != nil { + return nil, err + } + + region, err := instanceMapper.GetComputeInstancesRegion() + if err != nil { + return nil, err + } + + return []topology.ComputeInstances{{ + Region: region, + Instances: i2n, + }}, nil +} + func GetNodeList(ctx context.Context) ([]string, error) { - stdout, err := utils.Exec(ctx, "scontrol", []string{"show", "nodes", "-o"}, nil) + stdout, err := exec.Exec(ctx, "scontrol", []string{"show", "nodes", "-o"}, nil) if err != nil { return nil, err } + klog.V(4).Infof("stdout: %s", stdout.String()) nodes := []string{} @@ -65,31 +125,40 @@ func GetNodeList(ctx context.Context) ([]string, error) { return nodes, nil } -func (eng *SlurmEngine) GenerateOutput(ctx context.Context, tree *common.Vertex, params map[string]string) ([]byte, error) { +func (eng *SlurmEngine) GenerateOutput(ctx context.Context, tree *topology.Vertex, params map[string]any) ([]byte, error) { return GenerateOutput(ctx, tree, params) } -func GenerateOutput(ctx context.Context, tree *common.Vertex, params map[string]string) ([]byte, error) { +func GenerateOutput(ctx context.Context, tree *topology.Vertex, params map[string]any) ([]byte, error) { + var p Params + if err := config.Decode(params, &p); err != nil { + return nil, err + } + + return GenerateOutputParams(ctx, tree, &p) +} + +func GenerateOutputParams(ctx context.Context, tree *topology.Vertex, params *Params) ([]byte, error) { buf := &bytes.Buffer{} - path := params[common.KeyTopoConfigPath] + path := params.TopoConfigPath if len(path) != 0 { var plugin string if len(tree.Metadata) != 0 { - plugin = tree.Metadata[common.KeyPlugin] + plugin = tree.Metadata[topology.KeyPlugin] } if len(plugin) == 0 { - plugin = common.ValTopologyTree + plugin = topology.ValTopologyTree } if _, err := buf.WriteString(fmt.Sprintf(TopologyHeader, plugin)); err != nil { return nil, err } } - blockSize := params[common.KeyBlockSizes] + blockSize := params.BlockSizes if len(blockSize) != 0 { - tree.Metadata[common.KeyBlockSizes] = blockSize + tree.Metadata[topology.KeyBlockSizes] = blockSize } err := translate.ToGraph(buf, tree) @@ -104,24 +173,27 @@ func GenerateOutput(ctx context.Context, tree *common.Vertex, params map[string] } klog.Infof("Writing topology config in %q", path) - if err = utils.CreateFile(path, cfg); err != nil { + if err = files.Create(path, cfg); err != nil { return nil, err } - if _, ok := params[common.KeySkipReload]; ok { + if len(params.SkipReload) > 0 { klog.Infof("Skip SLURM reconfiguration") } else { if err = reconfigure(ctx); err != nil { return nil, err } } + return []byte("OK\n"), nil } func reconfigure(ctx context.Context) error { - stdout, err := utils.Exec(ctx, "scontrol", []string{"reconfigure"}, nil) + stdout, err := exec.Exec(ctx, "scontrol", []string{"reconfigure"}, nil) if err != nil { return err } + klog.V(4).Infof("stdout: %s", stdout.String()) + return nil } diff --git a/pkg/engines/test/test.go b/pkg/engines/test/test.go new file mode 100644 index 0000000..5bdcc0e --- /dev/null +++ b/pkg/engines/test/test.go @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package test + +import ( + "context" + "errors" + + "github.com/NVIDIA/topograph/internal/config" + "github.com/NVIDIA/topograph/pkg/engines" + "github.com/NVIDIA/topograph/pkg/engines/slurm" + "github.com/NVIDIA/topograph/pkg/topology" +) + +const NAME = "test" + +type TestEngine struct{} + +var ErrEnvironmentUnsupported = errors.New("test engine does not support GetComputeInstances") + +func NamedLoader() (string, engines.Loader) { + return NAME, Loader +} + +func Loader(ctx context.Context, config engines.Config) (engines.Engine, error) { + return New() +} + +func New() (*TestEngine, error) { + return &TestEngine{}, nil +} + +func (eng *TestEngine) GetComputeInstances(ctx context.Context, environment engines.Environment) ([]topology.ComputeInstances, error) { + return nil, ErrEnvironmentUnsupported +} + +func (eng *TestEngine) GenerateOutput(ctx context.Context, tree *topology.Vertex, params map[string]any) ([]byte, error) { + if params == nil { + params = make(map[string]any) + } + + var p slurm.Params + if err := config.Decode(params, &p); err != nil { + return nil, err + } + + if len(tree.Metadata) == 0 { + tree.Metadata = make(map[string]string) + } + + tree.Metadata[topology.KeyPlugin] = p.Plugin + tree.Metadata[topology.KeyBlockSizes] = p.BlockSizes + return slurm.GenerateOutputParams(ctx, tree, &p) +} diff --git a/pkg/factory/engine.go b/pkg/factory/engine.go deleted file mode 100644 index 9f5d034..0000000 --- a/pkg/factory/engine.go +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package factory - -import ( - "context" - "fmt" - "net/http" - - "github.com/NVIDIA/topograph/pkg/common" - "github.com/NVIDIA/topograph/pkg/engines/k8s" - "github.com/NVIDIA/topograph/pkg/engines/slurm" -) - -func GetEngine(engine string) (common.Engine, *common.HTTPError) { - var ( - eng common.Engine - err error - ) - - switch engine { - case common.EngineSLURM: - eng = &slurm.SlurmEngine{} - case common.EngineK8S: - eng, err = k8s.GetK8sEngine() - case common.EngineTest: - eng = &testEngine{} - default: - return nil, common.NewHTTPError(http.StatusBadRequest, fmt.Sprintf("unsupported engine %q", engine)) - } - - if err != nil { - return nil, common.NewHTTPError(http.StatusInternalServerError, err.Error()) - } - - return eng, nil -} - -type testEngine struct{} - -func (eng *testEngine) GenerateOutput(ctx context.Context, tree *common.Vertex, params map[string]string) ([]byte, error) { - if params == nil { - params = make(map[string]string) - } - - params[common.KeySkipReload] = "" - if len(tree.Metadata) == 0 { - tree.Metadata = make(map[string]string) - } - - tree.Metadata[common.KeyPlugin] = params[common.KeyPlugin] - tree.Metadata[common.KeyBlockSizes] = params[common.KeyBlockSizes] - return slurm.GenerateOutput(ctx, tree, params) -} diff --git a/pkg/factory/provider.go b/pkg/factory/provider.go deleted file mode 100644 index e17cd29..0000000 --- a/pkg/factory/provider.go +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package factory - -import ( - "context" - "fmt" - "net/http" - - "k8s.io/klog/v2" - - "github.com/NVIDIA/topograph/pkg/common" - "github.com/NVIDIA/topograph/pkg/models" - "github.com/NVIDIA/topograph/pkg/providers/aws" - "github.com/NVIDIA/topograph/pkg/providers/baremetal" - "github.com/NVIDIA/topograph/pkg/providers/cw" - "github.com/NVIDIA/topograph/pkg/providers/gcp" - "github.com/NVIDIA/topograph/pkg/providers/oci" - "github.com/NVIDIA/topograph/pkg/translate" -) - -func GetProvider(provider string, params map[string]string) (common.Provider, *common.HTTPError) { - var ( - prv common.Provider - err error - ) - - switch provider { - case common.ProviderAWS: - prv, err = aws.GetProvider() - case common.ProviderGCP: - prv, err = gcp.GetProvider() - case common.ProviderOCI: - prv, err = oci.GetProvider() - case common.ProviderCW: - prv, err = cw.GetProvider() - case common.ProviderBM: - prv, err = baremetal.GetProvider() - case common.ProviderTest: - prv, err = GetTestProvider(params) - default: - return nil, common.NewHTTPError(http.StatusBadRequest, fmt.Sprintf("unsupported provider %q", provider)) - } - - if err != nil { - return nil, common.NewHTTPError(http.StatusInternalServerError, err.Error()) - } - - return prv, nil -} - -type testProvider struct { - tree *common.Vertex - instance2node map[string]string -} - -func GetTestProvider(params map[string]string) (*testProvider, error) { - p := &testProvider{} - - if path, ok := params[common.KeyModelPath]; !ok || len(path) == 0 { - p.tree, p.instance2node = translate.GetTreeTestSet(false) - } else { - klog.InfoS("Using simulated topology", "model path", params[common.KeyModelPath]) - model, err := models.NewModelFromFile(params[common.KeyModelPath]) - if err != nil { - return nil, err // Wrapped by models.NewModelFromFile - } - p.tree, p.instance2node = model.ToTree() - } - return p, nil -} - -func (p *testProvider) GetCredentials(_ map[string]string) (interface{}, error) { - return nil, nil -} - -func (p *testProvider) GetComputeInstances(_ context.Context, _ common.Engine) ([]common.ComputeInstances, error) { - return []common.ComputeInstances{{Instances: p.instance2node}}, nil -} - -func (p *testProvider) GenerateTopologyConfig(_ context.Context, _ interface{}, _ int, _ []common.ComputeInstances) (*common.Vertex, error) { - return p.tree, nil -} diff --git a/pkg/ib/ib.go b/pkg/ib/ib.go index 1152be5..d0a48b4 100644 --- a/pkg/ib/ib.go +++ b/pkg/ib/ib.go @@ -24,9 +24,8 @@ import ( "sort" "strings" + "github.com/NVIDIA/topograph/pkg/topology" "golang.org/x/exp/maps" - - "github.com/NVIDIA/topograph/pkg/common" ) var ( @@ -53,7 +52,7 @@ type Switch struct { Nodes map[string]string // ID:node name } -func GenerateTopologyConfig(data []byte) (*common.Vertex, error) { +func GenerateTopologyConfig(data []byte) (*topology.Vertex, error) { switches, hca, err := ParseIbnetdiscoverFile(data) if err != nil { return nil, fmt.Errorf("unable to parse ibnetdiscover file: %v", err) @@ -68,14 +67,14 @@ func GenerateTopologyConfig(data []byte) (*common.Vertex, error) { return root.toGraph() } -func (sw *Switch) toGraph() (*common.Vertex, error) { - vertex := &common.Vertex{ - Vertices: make(map[string]*common.Vertex), +func (sw *Switch) toGraph() (*topology.Vertex, error) { + vertex := &topology.Vertex{ + Vertices: make(map[string]*topology.Vertex), } vertex.ID = sw.Name if len(sw.Children) == 0 { for id, name := range sw.Nodes { - vertex.Vertices[id] = &common.Vertex{ + vertex.Vertices[id] = &topology.Vertex{ Name: name, ID: id, } diff --git a/pkg/models/model.go b/pkg/models/model.go index 1b562c0..1d61743 100644 --- a/pkg/models/model.go +++ b/pkg/models/model.go @@ -22,7 +22,7 @@ import ( "gopkg.in/yaml.v3" - "github.com/NVIDIA/topograph/pkg/common" + "github.com/NVIDIA/topograph/pkg/topology" ) type Model struct { @@ -140,28 +140,28 @@ func getNetworkLayers(name string, swmap map[string]string) ([]string, error) { } } -func (model *Model) ToTree() (*common.Vertex, map[string]string) { +func (model *Model) ToTree() (*topology.Vertex, map[string]string) { instance2node := make(map[string]string) - nodeVertexMap := make(map[string]*common.Vertex) - swVertexMap := make(map[string]*common.Vertex) + nodeVertexMap := make(map[string]*topology.Vertex) + swVertexMap := make(map[string]*topology.Vertex) swRootMap := make(map[string]bool) - blockVertexMap := make(map[string]*common.Vertex) + blockVertexMap := make(map[string]*topology.Vertex) // Create all the vertices for each node for k, v := range model.Nodes { instance2node[k] = k - nodeVertexMap[k] = &common.Vertex{ID: v.Name, Name: v.Name} + nodeVertexMap[k] = &topology.Vertex{ID: v.Name, Name: v.Name} } // Initialize all the vertices for each switch (setting each on to be a possible root) for _, sw := range model.Switches { - swVertexMap[sw.Name] = &common.Vertex{ID: sw.Name, Vertices: make(map[string]*common.Vertex)} + swVertexMap[sw.Name] = &topology.Vertex{ID: sw.Name, Vertices: make(map[string]*topology.Vertex)} swRootMap[sw.Name] = true } // Initializes all the block vertices for _, cb := range model.CapacityBlocks { - blockVertexMap[cb.Name] = &common.Vertex{ID: cb.Name, Vertices: make(map[string]*common.Vertex)} + blockVertexMap[cb.Name] = &topology.Vertex{ID: cb.Name, Vertices: make(map[string]*topology.Vertex)} for _, node := range cb.Nodes { blockVertexMap[cb.Name].Vertices[node] = nodeVertexMap[node] } @@ -186,18 +186,21 @@ func (model *Model) ToTree() (*common.Vertex, map[string]string) { } // Connects all root vertices to the hidden root - treeRoot := &common.Vertex{Vertices: make(map[string]*common.Vertex)} + treeRoot := &topology.Vertex{Vertices: make(map[string]*topology.Vertex)} for k, v := range swRootMap { if v { treeRoot.Vertices[k] = swVertexMap[k] } } - blockRoot := &common.Vertex{Vertices: make(map[string]*common.Vertex)} + blockRoot := &topology.Vertex{Vertices: make(map[string]*topology.Vertex)} for k, v := range blockVertexMap { blockRoot.Vertices[k] = v } - root := &common.Vertex{ - Vertices: map[string]*common.Vertex{common.ValTopologyBlock: blockRoot, common.ValTopologyTree: treeRoot}, + root := &topology.Vertex{ + Vertices: map[string]*topology.Vertex{ + topology.ValTopologyBlock: blockRoot, + topology.ValTopologyTree: treeRoot, + }, } return root, instance2node } diff --git a/pkg/node_observer/controller.go b/pkg/node_observer/controller.go index 2b25a87..0f4d652 100644 --- a/pkg/node_observer/controller.go +++ b/pkg/node_observer/controller.go @@ -26,8 +26,8 @@ import ( "k8s.io/client-go/kubernetes" "k8s.io/klog/v2" - "github.com/NVIDIA/topograph/pkg/common" - "github.com/NVIDIA/topograph/pkg/utils" + "github.com/NVIDIA/topograph/internal/httpreq" + "github.com/NVIDIA/topograph/pkg/topology" ) type Controller struct { @@ -38,13 +38,13 @@ type Controller struct { } func NewController(ctx context.Context, client kubernetes.Interface, cfg *Config) (*Controller, error) { - var f utils.HttpRequestFunc = func() (*http.Request, error) { - params := map[string]string{ - common.KeyTopoConfigPath: cfg.TopologyConfigmap.Filename, - common.KeyTopoConfigmapName: cfg.TopologyConfigmap.Name, - common.KeyTopoConfigmapNamespace: cfg.TopologyConfigmap.Namespace, + var f httpreq.RequestFunc = func() (*http.Request, error) { + params := map[string]any{ + topology.KeyTopoConfigPath: cfg.TopologyConfigmap.Filename, + topology.KeyTopoConfigmapName: cfg.TopologyConfigmap.Name, + topology.KeyTopoConfigmapNamespace: cfg.TopologyConfigmap.Namespace, } - payload := common.NewTopologyRequest(cfg.Provider, nil, cfg.Engine, params) + payload := topology.NewRequest(cfg.Provider, nil, cfg.Engine, params) data, err := json.Marshal(payload) if err != nil { return nil, fmt.Errorf("failed to parse payload: %v", err) diff --git a/pkg/node_observer/node_informer.go b/pkg/node_observer/node_informer.go index 0e84f2c..c8f4134 100644 --- a/pkg/node_observer/node_informer.go +++ b/pkg/node_observer/node_informer.go @@ -27,17 +27,17 @@ import ( "k8s.io/client-go/tools/cache" "k8s.io/klog/v2" - "github.com/NVIDIA/topograph/pkg/utils" + "github.com/NVIDIA/topograph/internal/httpreq" ) type NodeInformer struct { ctx context.Context client kubernetes.Interface - reqFunc utils.HttpRequestFunc + reqFunc httpreq.RequestFunc factory informers.SharedInformerFactory } -func NewNodeInformer(ctx context.Context, client kubernetes.Interface, nodeLabels map[string]string, reqFunc utils.HttpRequestFunc) *NodeInformer { +func NewNodeInformer(ctx context.Context, client kubernetes.Interface, nodeLabels map[string]string, reqFunc httpreq.RequestFunc) *NodeInformer { klog.Infof("Configuring node informer with labels %v", nodeLabels) listOptionsFunc := func(options *metav1.ListOptions) { options.LabelSelector = labels.Set(nodeLabels).AsSelector().String() @@ -86,7 +86,7 @@ func (n *NodeInformer) Stop(_ error) { } func (n *NodeInformer) SendRequest() { - _, _, err := utils.HttpRequestWithRetries(n.reqFunc) + _, _, err := httpreq.DoRequestWithRetries(n.reqFunc) if err != nil { klog.Errorf("failed to send HTTP request: %v", err) } diff --git a/pkg/protos/topology.pb.go b/pkg/protos/topology.pb.go index 8c8029e..3b950c7 100644 --- a/pkg/protos/topology.pb.go +++ b/pkg/protos/topology.pb.go @@ -15,8 +15,8 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: -// protoc-gen-go v1.34.2 -// protoc v5.27.0 +// protoc-gen-go v1.35.1 +// protoc v5.28.2 // source: topology.proto package protos @@ -47,11 +47,9 @@ type TopologyRequest struct { func (x *TopologyRequest) Reset() { *x = TopologyRequest{} - if protoimpl.UnsafeEnabled { - mi := &file_topology_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_topology_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *TopologyRequest) String() string { @@ -62,7 +60,7 @@ func (*TopologyRequest) ProtoMessage() {} func (x *TopologyRequest) ProtoReflect() protoreflect.Message { mi := &file_topology_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -108,11 +106,9 @@ type TopologyResponse struct { func (x *TopologyResponse) Reset() { *x = TopologyResponse{} - if protoimpl.UnsafeEnabled { - mi := &file_topology_proto_msgTypes[1] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_topology_proto_msgTypes[1] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *TopologyResponse) String() string { @@ -123,7 +119,7 @@ func (*TopologyResponse) ProtoMessage() {} func (x *TopologyResponse) ProtoReflect() protoreflect.Message { mi := &file_topology_proto_msgTypes[1] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -161,11 +157,9 @@ type Instance struct { func (x *Instance) Reset() { *x = Instance{} - if protoimpl.UnsafeEnabled { - mi := &file_topology_proto_msgTypes[2] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } + mi := &file_topology_proto_msgTypes[2] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) } func (x *Instance) String() string { @@ -176,7 +170,7 @@ func (*Instance) ProtoMessage() {} func (x *Instance) ProtoReflect() protoreflect.Message { mi := &file_topology_proto_msgTypes[2] - if protoimpl.UnsafeEnabled && x != nil { + if x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { ms.StoreMessageInfo(mi) @@ -314,44 +308,6 @@ func file_topology_proto_init() { if File_topology_proto != nil { return } - if !protoimpl.UnsafeEnabled { - file_topology_proto_msgTypes[0].Exporter = func(v any, i int) any { - switch v := v.(*TopologyRequest); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_topology_proto_msgTypes[1].Exporter = func(v any, i int) any { - switch v := v.(*TopologyResponse); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_topology_proto_msgTypes[2].Exporter = func(v any, i int) any { - switch v := v.(*Instance); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ diff --git a/pkg/protos/topology_grpc.pb.go b/pkg/protos/topology_grpc.pb.go index 637747c..9210432 100644 --- a/pkg/protos/topology_grpc.pb.go +++ b/pkg/protos/topology_grpc.pb.go @@ -16,7 +16,7 @@ // Code generated by protoc-gen-go-grpc. DO NOT EDIT. // versions: // - protoc-gen-go-grpc v1.5.1 -// - protoc v5.27.0 +// - protoc v5.28.2 // source: topology.proto package protos diff --git a/pkg/providers/aws/imds.go b/pkg/providers/aws/imds.go deleted file mode 100644 index d076a34..0000000 --- a/pkg/providers/aws/imds.go +++ /dev/null @@ -1,185 +0,0 @@ -/* - * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package aws - -import ( - "bufio" - "context" - "encoding/json" - "fmt" - "net/http" - "strings" - "time" - - "k8s.io/klog/v2" - - "github.com/NVIDIA/topograph/pkg/utils" -) - -const ( - IMDS = "http://169.254.169.254" - IMDS_TOKEN_URL = IMDS + "/latest/api/token" - IMDS_URL = IMDS + "/latest/meta-data" - - tokenTimeDelay = 15 * time.Second -) - -type Creds struct { - Code string `json:"Code"` - AccessKeyId string `json:"AccessKeyId"` - SecretAccessKey string `json:"SecretAccessKey"` - Token string `json:"Token"` - Expiration string `json:"Expiration"` -} - -func getToken() (string, error) { - var f utils.HttpRequestFunc = (func() (*http.Request, error) { - req, err := http.NewRequest("PUT", IMDS_TOKEN_URL, nil) - if err != nil { - return nil, fmt.Errorf("failed to create HTTP request: %v", err) - } - req.Header.Add("X-aws-ec2-metadata-token-ttl-seconds", "21600") - return req, nil - }) - - _, data, err := utils.HttpRequest(f) - if err != nil { - return "", fmt.Errorf("failed to send HTTP request: %v", err) - } - - return string(data), nil -} - -func addToken(req *http.Request) error { - token, err := getToken() - if err != nil { - return err - } - - if len(token) != 0 { - req.Header.Add("X-aws-ec2-metadata-token", token) - } - - return nil -} - -func getMetadata(path string) ([]byte, error) { - url := fmt.Sprintf("%s/%s", IMDS_URL, path) - klog.V(4).Infof("Requesting URL %s", url) - - var f utils.HttpRequestFunc = func() (*http.Request, error) { - req, err := http.NewRequest("GET", url, nil) - if err != nil { - return nil, fmt.Errorf("failed to create HTTP request: %v", err) - } - err = addToken(req) - if err != nil { - return nil, err - } - return req, nil - } - - resp, data, err := utils.HttpRequest(f) - if err != nil { - return nil, fmt.Errorf("failed to send HTTP request: %v", err) - } - - if resp.StatusCode != http.StatusOK { - return nil, fmt.Errorf("HTTP status: %s", resp.Status) - } - - return data, nil -} - -func GetRegion() (string, error) { - data, err := getMetadata("placement/region") - if err != nil { - return "", err - } - - return string(data), nil -} - -func GetCredentials() (*Creds, error) { - path := "iam/security-credentials" - data, err := getMetadata(path) - if err != nil { - return nil, err - } - - lines := strings.Split(string(data), "\n") - for _, line := range lines { - path = fmt.Sprintf("%s/%s", path, line) - break - } - - // ensure the credentials remain valid for at least the next tokenTimeDelay - for { - klog.V(4).Infof("Getting credentials from %s", path) - data, err = getMetadata(path) - if err != nil { - return nil, err - } - - creds := &Creds{} - if err = json.Unmarshal(data, creds); err != nil { - return nil, err - } - - klog.V(4).Infof("Credentials expire at %s", creds.Expiration) - expiration, err := time.Parse(time.RFC3339, creds.Expiration) - if err != nil { - klog.Errorf("Error parsing expiration time %q: %v", creds.Expiration, err) - } else if time.Now().Add(tokenTimeDelay).After(expiration) { - klog.V(4).Infof("Waiting %s for new token", tokenTimeDelay.String()) - time.Sleep(tokenTimeDelay) - continue - } - - if creds.Code != "Success" { - return nil, fmt.Errorf("failed to get creds: status %s", creds.Code) - } - return creds, nil - } -} - -func Instance2NodeMap(ctx context.Context, nodes []string) (map[string]string, error) { - args := []string{"-w", strings.Join(nodes, ","), - fmt.Sprintf("TOKEN=$(curl -s -X PUT -H \"X-aws-ec2-metadata-token-ttl-seconds: 21600\" %s); echo $(curl -s -H \"X-aws-ec2-metadata-token: $TOKEN\" %s/instance-id)", IMDS_TOKEN_URL, IMDS_URL)} - - stdout, err := utils.Exec(ctx, "pdsh", args, nil) - if err != nil { - return nil, err - } - klog.V(4).Infof("data: %s", stdout.String()) - - i2n := map[string]string{} - scanner := bufio.NewScanner(stdout) - for scanner.Scan() { - arr := strings.Split(scanner.Text(), ": ") - if len(arr) == 2 { - node, instance := arr[0], arr[1] - i2n[instance] = node - } - } - - if err := scanner.Err(); err != nil { - return nil, err - } - - return i2n, nil -} diff --git a/pkg/providers/aws/instance_topology.go b/pkg/providers/aws/instance_topology.go index a3c333e..d7a478c 100644 --- a/pkg/providers/aws/instance_topology.go +++ b/pkg/providers/aws/instance_topology.go @@ -21,23 +21,21 @@ import ( "fmt" "time" - "github.com/aws/aws-sdk-go-v2/config" - "github.com/aws/aws-sdk-go-v2/credentials" "github.com/aws/aws-sdk-go-v2/service/ec2" "github.com/aws/aws-sdk-go-v2/service/ec2/types" "k8s.io/klog/v2" - "github.com/NVIDIA/topograph/pkg/common" "github.com/NVIDIA/topograph/pkg/metrics" + "github.com/NVIDIA/topograph/pkg/topology" ) var defaultPageSize int32 = 100 -func GenerateInstanceTopology(ctx context.Context, creds *Credentials, pageSize int32, cis []common.ComputeInstances) ([]types.InstanceTopology, error) { +func (p *Provider) generateInstanceTopology(ctx context.Context, pageSize int32, cis []topology.ComputeInstances) ([]types.InstanceTopology, error) { var err error topology := []types.InstanceTopology{} for _, ci := range cis { - if topology, err = generateInstanceTopology(ctx, creds, pageSize, &ci, topology); err != nil { + if topology, err = p.generateInstanceTopologyForRegionInstances(ctx, pageSize, &ci, topology); err != nil { return nil, err } } @@ -45,24 +43,16 @@ func GenerateInstanceTopology(ctx context.Context, creds *Credentials, pageSize return topology, nil } -func generateInstanceTopology(ctx context.Context, creds *Credentials, pageSize int32, ci *common.ComputeInstances, topology []types.InstanceTopology) ([]types.InstanceTopology, error) { +func (p *Provider) generateInstanceTopologyForRegionInstances(ctx context.Context, pageSize int32, ci *topology.ComputeInstances, topology []types.InstanceTopology) ([]types.InstanceTopology, error) { if len(ci.Region) == 0 { return nil, fmt.Errorf("must specify region to query instance topology") } klog.Infof("Getting instance topology for %s region", ci.Region) - opts := []func(*config.LoadOptions) error{ - config.WithRegion(ci.Region), - config.WithCredentialsProvider( - credentials.NewStaticCredentialsProvider(creds.AccessKeyId, creds.SecretAccessKey, creds.Token), - )} - - cfg, err := config.LoadDefaultConfig(ctx, opts...) + client, err := p.clientFactory(ci.Region) if err != nil { - return nil, fmt.Errorf("unable to load SDK config, %v", err) + return nil, err } - - svc := ec2.NewFromConfig(cfg) input := &ec2.DescribeInstanceTopologyInput{} // AWS allows up to 100 explicitly specified instance IDs @@ -85,7 +75,7 @@ func generateInstanceTopology(ctx context.Context, creds *Credentials, pageSize cycle++ klog.V(4).Infof("Starting cycle %d", cycle) start := time.Now() - output, err := svc.DescribeInstanceTopology(ctx, input) + output, err := client.EC2.DescribeInstanceTopology(ctx, input) if err != nil { apiLatency.WithLabelValues(ci.Region, "Error").Observe(time.Since(start).Seconds()) return nil, fmt.Errorf("failed to describe instance topology: %v", err) @@ -110,7 +100,7 @@ func generateInstanceTopology(ctx context.Context, creds *Credentials, pageSize return topology, nil } -func toGraph(topology []types.InstanceTopology, cis []common.ComputeInstances) (*common.Vertex, error) { +func toGraph(top []types.InstanceTopology, cis []topology.ComputeInstances) (*topology.Vertex, error) { i2n := make(map[string]string) for _, ci := range cis { for instance, node := range ci.Instances { @@ -119,10 +109,10 @@ func toGraph(topology []types.InstanceTopology, cis []common.ComputeInstances) ( } klog.V(4).Infof("Instance/Node map %v", i2n) - forest := make(map[string]*common.Vertex) - nodes := make(map[string]*common.Vertex) + forest := make(map[string]*topology.Vertex) + nodes := make(map[string]*topology.Vertex) - for _, inst := range topology { + for _, inst := range top { //klog.V(4).Infof("Checking instance %q", c.InstanceId) nodeName, ok := i2n[*inst.InstanceId] if !ok { @@ -131,7 +121,7 @@ func toGraph(topology []types.InstanceTopology, cis []common.ComputeInstances) ( klog.V(4).Infof("Found node %q instance %q", nodeName, *inst.InstanceId) delete(i2n, *inst.InstanceId) - instance := &common.Vertex{ + instance := &topology.Vertex{ Name: nodeName, ID: *inst.InstanceId, } @@ -139,9 +129,9 @@ func toGraph(topology []types.InstanceTopology, cis []common.ComputeInstances) ( id3 := inst.NetworkNodes[2] sw3, ok := nodes[id3] if !ok { // - sw3 = &common.Vertex{ + sw3 = &topology.Vertex{ ID: id3, - Vertices: make(map[string]*common.Vertex), + Vertices: make(map[string]*topology.Vertex), } nodes[id3] = sw3 } @@ -151,9 +141,9 @@ func toGraph(topology []types.InstanceTopology, cis []common.ComputeInstances) ( id2 := inst.NetworkNodes[1] sw2, ok := nodes[id2] if !ok { // - sw2 = &common.Vertex{ + sw2 = &topology.Vertex{ ID: id2, - Vertices: make(map[string]*common.Vertex), + Vertices: make(map[string]*topology.Vertex), } nodes[id2] = sw2 } @@ -163,9 +153,9 @@ func toGraph(topology []types.InstanceTopology, cis []common.ComputeInstances) ( id1 := inst.NetworkNodes[0] sw1, ok := nodes[id1] if !ok { // - sw1 = &common.Vertex{ + sw1 = &topology.Vertex{ ID: id1, - Vertices: make(map[string]*common.Vertex), + Vertices: make(map[string]*topology.Vertex), } nodes[id1] = sw1 forest[id1] = sw1 @@ -175,22 +165,22 @@ func toGraph(topology []types.InstanceTopology, cis []common.ComputeInstances) ( if len(i2n) != 0 { klog.V(4).Infof("Adding nodes w/o topology: %v", i2n) - metrics.SetMissingTopology(common.ProviderAWS, len(i2n)) - sw := &common.Vertex{ - ID: common.NoTopology, - Vertices: make(map[string]*common.Vertex), + metrics.SetMissingTopology(NAME, len(i2n)) + sw := &topology.Vertex{ + ID: topology.NoTopology, + Vertices: make(map[string]*topology.Vertex), } for instanceID, nodeName := range i2n { - sw.Vertices[instanceID] = &common.Vertex{ + sw.Vertices[instanceID] = &topology.Vertex{ Name: nodeName, ID: instanceID, } } - forest[common.NoTopology] = sw + forest[topology.NoTopology] = sw } - root := &common.Vertex{ - Vertices: make(map[string]*common.Vertex), + root := &topology.Vertex{ + Vertices: make(map[string]*topology.Vertex), } for name, node := range forest { root.Vertices[name] = node diff --git a/pkg/providers/aws/instance_topology_test.go b/pkg/providers/aws/instance_topology_test.go index 9ddbe53..036e43c 100644 --- a/pkg/providers/aws/instance_topology_test.go +++ b/pkg/providers/aws/instance_topology_test.go @@ -19,62 +19,58 @@ package aws import ( "testing" + "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/ec2/types" "github.com/stretchr/testify/require" - "github.com/NVIDIA/topograph/pkg/common" + "github.com/NVIDIA/topograph/pkg/topology" ) -func ptrString(s string) *string { - return &s -} - func TestNewInstanceTopology(t *testing.T) { - - topology := []types.InstanceTopology{ + top := []types.InstanceTopology{ { - InstanceId: ptrString("i-0febfe7a633a552cc"), - InstanceType: ptrString("p5.48xlarge"), + InstanceId: aws.String("i-0febfe7a633a552cc"), + InstanceType: aws.String("p5.48xlarge"), NetworkNodes: []string{ "nn-098f9e7674016cb1c", "nn-224a2a4d9df61a975", "nn-20da390f7d602f42f", }, - AvailabilityZone: ptrString("us-east-1e"), - ZoneId: ptrString("use1-az3"), + AvailabilityZone: aws.String("us-east-1e"), + ZoneId: aws.String("use1-az3"), }, { - InstanceId: ptrString("i-0727864293842c5f1"), - InstanceType: ptrString("p5.48xlarge"), + InstanceId: aws.String("i-0727864293842c5f1"), + InstanceType: aws.String("p5.48xlarge"), NetworkNodes: []string{ "nn-098f9e7674016cb1c", "nn-224a2a4d9df61a975", "nn-568b52163b3ce19c8", }, - AvailabilityZone: ptrString("us-east-1e"), - ZoneId: ptrString("use1-az3"), + AvailabilityZone: aws.String("us-east-1e"), + ZoneId: aws.String("use1-az3"), }, { - InstanceId: ptrString("i-04e4ca4199532bbba"), - InstanceType: ptrString("p5.48xlarge"), + InstanceId: aws.String("i-04e4ca4199532bbba"), + InstanceType: aws.String("p5.48xlarge"), NetworkNodes: []string{ "nn-098f9e7674016cb1c", "nn-224a2a4d9df61a975", "nn-d7d7a965aec389018", }, - AvailabilityZone: ptrString("us-east-1e"), - ZoneId: ptrString("use1-az3"), + AvailabilityZone: aws.String("us-east-1e"), + ZoneId: aws.String("use1-az3"), }, { - InstanceId: ptrString("i-0359d6503bf895535"), - InstanceType: ptrString("p5.48xlarge"), + InstanceId: aws.String("i-0359d6503bf895535"), + InstanceType: aws.String("p5.48xlarge"), NetworkNodes: []string{ "nn-098f9e7674016cb1c", "nn-224a2a4d9df61a975", "nn-ef5c999131844763a", }, - AvailabilityZone: ptrString("us-east-1e"), - ZoneId: ptrString("use1-az3"), + AvailabilityZone: aws.String("us-east-1e"), + ZoneId: aws.String("use1-az3"), }, } @@ -85,19 +81,19 @@ func TestNewInstanceTopology(t *testing.T) { "i-0359d6503bf895535": "node4", } - n1 := &common.Vertex{ID: "i-0febfe7a633a552cc", Name: "node1"} - n2 := &common.Vertex{ID: "i-0727864293842c5f1", Name: "node2"} - n3 := &common.Vertex{ID: "i-04e4ca4199532bbba", Name: "node3"} - n4 := &common.Vertex{ID: "i-0359d6503bf895535", Name: "node4"} + n1 := &topology.Vertex{ID: "i-0febfe7a633a552cc", Name: "node1"} + n2 := &topology.Vertex{ID: "i-0727864293842c5f1", Name: "node2"} + n3 := &topology.Vertex{ID: "i-04e4ca4199532bbba", Name: "node3"} + n4 := &topology.Vertex{ID: "i-0359d6503bf895535", Name: "node4"} - v31 := &common.Vertex{ID: "nn-20da390f7d602f42f", Vertices: map[string]*common.Vertex{"i-0febfe7a633a552cc": n1}} - v32 := &common.Vertex{ID: "nn-568b52163b3ce19c8", Vertices: map[string]*common.Vertex{"i-0727864293842c5f1": n2}} - v33 := &common.Vertex{ID: "nn-d7d7a965aec389018", Vertices: map[string]*common.Vertex{"i-04e4ca4199532bbba": n3}} - v34 := &common.Vertex{ID: "nn-ef5c999131844763a", Vertices: map[string]*common.Vertex{"i-0359d6503bf895535": n4}} + v31 := &topology.Vertex{ID: "nn-20da390f7d602f42f", Vertices: map[string]*topology.Vertex{"i-0febfe7a633a552cc": n1}} + v32 := &topology.Vertex{ID: "nn-568b52163b3ce19c8", Vertices: map[string]*topology.Vertex{"i-0727864293842c5f1": n2}} + v33 := &topology.Vertex{ID: "nn-d7d7a965aec389018", Vertices: map[string]*topology.Vertex{"i-04e4ca4199532bbba": n3}} + v34 := &topology.Vertex{ID: "nn-ef5c999131844763a", Vertices: map[string]*topology.Vertex{"i-0359d6503bf895535": n4}} - v2 := &common.Vertex{ + v2 := &topology.Vertex{ ID: "nn-224a2a4d9df61a975", - Vertices: map[string]*common.Vertex{ + Vertices: map[string]*topology.Vertex{ "nn-20da390f7d602f42f": v31, "nn-568b52163b3ce19c8": v32, "nn-d7d7a965aec389018": v33, @@ -105,11 +101,11 @@ func TestNewInstanceTopology(t *testing.T) { }, } - v1 := &common.Vertex{ID: "nn-098f9e7674016cb1c", Vertices: map[string]*common.Vertex{"nn-224a2a4d9df61a975": v2}} + v1 := &topology.Vertex{ID: "nn-098f9e7674016cb1c", Vertices: map[string]*topology.Vertex{"nn-224a2a4d9df61a975": v2}} - expected := &common.Vertex{Vertices: map[string]*common.Vertex{"nn-098f9e7674016cb1c": v1}} + expected := &topology.Vertex{Vertices: map[string]*topology.Vertex{"nn-098f9e7674016cb1c": v1}} - tree, err := toGraph(topology, []common.ComputeInstances{{Instances: i2n}}) + tree, err := toGraph(top, []topology.ComputeInstances{{Instances: i2n}}) require.NoError(t, err) require.Equal(t, expected, tree) } diff --git a/pkg/providers/aws/provider.go b/pkg/providers/aws/provider.go index 488eb3b..f6fd459 100644 --- a/pkg/providers/aws/provider.go +++ b/pkg/providers/aws/provider.go @@ -17,32 +17,106 @@ package aws import ( + "bufio" "context" "fmt" "os" "strings" + "time" + "github.com/aws/aws-sdk-go-v2/aws" + "github.com/aws/aws-sdk-go-v2/config" + "github.com/aws/aws-sdk-go-v2/credentials" + "github.com/aws/aws-sdk-go-v2/credentials/ec2rolecreds" + "github.com/aws/aws-sdk-go-v2/feature/ec2/imds" + "github.com/aws/aws-sdk-go-v2/service/ec2" v1 "k8s.io/api/core/v1" "k8s.io/klog/v2" - "github.com/NVIDIA/topograph/pkg/common" - "github.com/NVIDIA/topograph/pkg/engines/k8s" - "github.com/NVIDIA/topograph/pkg/engines/slurm" + "github.com/NVIDIA/topograph/internal/exec" + "github.com/NVIDIA/topograph/pkg/providers" + "github.com/NVIDIA/topograph/pkg/topology" ) -type Provider struct{} +const NAME = "aws" + +const ( + IMDS = "http://169.254.169.254" + IMDS_TOKEN_URL = IMDS + "/latest/api/token" + IMDS_URL = IMDS + "/latest/meta-data" + + tokenTimeDelay = 15 * time.Second +) + +type Provider struct { + clientFactory ClientFactory + imdsClient IDMSClient +} + +type EC2Client interface { + DescribeInstanceTopology(ctx context.Context, params *ec2.DescribeInstanceTopologyInput, optFns ...func(*ec2.Options)) (*ec2.DescribeInstanceTopologyOutput, error) +} + +type IDMSClient interface { + GetRegion(ctx context.Context, params *imds.GetRegionInput, optFns ...func(*imds.Options)) (*imds.GetRegionOutput, error) +} + +type CredsClient interface { + Retrieve(ctx context.Context) (aws.Credentials, error) +} + +type ClientFactory func(region string) (*Client, error) + +type Client struct { + EC2 EC2Client +} type Credentials struct { AccessKeyId string SecretAccessKey string - Token string // token is optional + Token string // Token is optional } -func GetProvider() (*Provider, error) { - return &Provider{}, nil +func NamedLoader() (string, providers.Loader) { + return NAME, Loader } -func (p *Provider) GetCredentials(creds map[string]string) (interface{}, error) { +func Loader(ctx context.Context, cfg providers.Config) (providers.Provider, error) { + defaultCfg, err := config.LoadDefaultConfig(ctx) + if err != nil { + return nil, err + } + + imdsClient := imds.NewFromConfig(defaultCfg) + + creds, err := getCredentials(ctx, cfg.Creds) + if err != nil { + return nil, err + } + + clientFactory := func(region string) (*Client, error) { + opts := []func(*config.LoadOptions) error{ + config.WithRegion(region), + config.WithCredentialsProvider( + credentials.NewStaticCredentialsProvider(creds.AccessKeyId, creds.SecretAccessKey, creds.Token), + )} + + awsCfg, err := config.LoadDefaultConfig(ctx, opts...) + if err != nil { + return nil, fmt.Errorf("unable to load SDK config, %v", err) + } + + ec2Client := ec2.NewFromConfig(awsCfg) + + return &Client{ + EC2: ec2Client, + }, nil + } + + return New(clientFactory, imdsClient), nil +} + +func getCredentials(ctx context.Context, creds map[string]string) (*Credentials, error) { var accessKeyID, secretAccessKey, sessionToken string if len(creds) != 0 { @@ -61,13 +135,13 @@ func (p *Provider) GetCredentials(creds map[string]string) (interface{}, error) sessionToken = os.Getenv("AWS_SESSION_TOKEN") } else { klog.Infof("Using node AWS access credentials") - nodeCreds, err := GetCredentials() + creds, err := getCredentialsFromProvider(ctx) if err != nil { return nil, err } - accessKeyID = nodeCreds.AccessKeyId - secretAccessKey = nodeCreds.SecretAccessKey - sessionToken = nodeCreds.Token + accessKeyID = creds.AccessKeyID + secretAccessKey = creds.SecretAccessKey + sessionToken = creds.SessionToken } return &Credentials{ @@ -77,40 +151,34 @@ func (p *Provider) GetCredentials(creds map[string]string) (interface{}, error) }, nil } -func (p *Provider) GetComputeInstances(ctx context.Context, engine common.Engine) ([]common.ComputeInstances, error) { - klog.InfoS("Getting compute instances", "provider", common.ProviderAWS, "engine", engine) +func getCredentialsFromProvider(ctx context.Context) (creds aws.Credentials, err error) { + credsClient := ec2rolecreds.New() - switch eng := engine.(type) { - case *slurm.SlurmEngine: - nodes, err := slurm.GetNodeList(ctx) + for { + creds, err = credsClient.Retrieve(ctx) if err != nil { - return nil, err + return creds, err } - i2n, err := Instance2NodeMap(ctx, nodes) - if err != nil { - return nil, err - } - region, err := GetRegion() - if err != nil { - return nil, err + + if time.Now().Add(tokenTimeDelay).After(creds.Expires) { + klog.V(4).Infof("Waiting %s for new token", tokenTimeDelay.String()) + time.Sleep(tokenTimeDelay) + continue } - return []common.ComputeInstances{{Region: region, Instances: i2n}}, nil - case *k8s.K8sEngine: - return eng.GetComputeInstances(ctx, - func(n *v1.Node) string { return n.Labels["topology.kubernetes.io/region"] }, - func(n *v1.Node) string { - // ProviderID format: "aws:///us-east-1f/i-0acd9257c6569d371" - parts := strings.Split(n.Spec.ProviderID, "/") - return parts[len(parts)-1] - }) - default: - return nil, fmt.Errorf("unsupported engine %q", engine) + + return creds, nil + } +} + +func New(clientFactory ClientFactory, imdsClient IDMSClient) *Provider { + return &Provider{ + clientFactory: clientFactory, + imdsClient: imdsClient, } } -func (p *Provider) GenerateTopologyConfig(ctx context.Context, cr interface{}, pageSize int, instances []common.ComputeInstances) (*common.Vertex, error) { - creds := cr.(*Credentials) - topology, err := GenerateInstanceTopology(ctx, creds, int32(pageSize), instances) +func (p *Provider) GenerateTopologyConfig(ctx context.Context, pageSize int, instances []topology.ComputeInstances) (*topology.Vertex, error) { + topology, err := p.generateInstanceTopology(ctx, int32(pageSize), instances) if err != nil { return nil, err } @@ -119,3 +187,54 @@ func (p *Provider) GenerateTopologyConfig(ctx context.Context, cr interface{}, p return toGraph(topology, instances) } + +// Engine support + +// Instances2NodeMap implements slurm.instanceMapper +func (p *Provider) Instances2NodeMap(ctx context.Context, nodes []string) (map[string]string, error) { + args := []string{"-w", strings.Join(nodes, ","), + fmt.Sprintf("TOKEN=$(curl -s -X PUT -H \"X-aws-ec2-metadata-token-ttl-seconds: 21600\" %s); echo $(curl -s -H \"X-aws-ec2-metadata-token: $TOKEN\" %s/instance-id)", IMDS_TOKEN_URL, IMDS_URL)} + + stdout, err := exec.Exec(ctx, "pdsh", args, nil) + if err != nil { + return nil, err + } + klog.V(4).Infof("data: %s", stdout.String()) + + i2n := map[string]string{} + scanner := bufio.NewScanner(stdout) + for scanner.Scan() { + arr := strings.Split(scanner.Text(), ": ") + if len(arr) == 2 { + node, instance := arr[0], arr[1] + i2n[instance] = node + } + } + + if err := scanner.Err(); err != nil { + return nil, err + } + + return i2n, nil +} + +// GetComputeInstancesRegion implements slurm.instanceMapper +func (p *Provider) GetComputeInstancesRegion() (string, error) { + output, err := p.imdsClient.GetRegion(context.Background(), &imds.GetRegionInput{}) + if err != nil { + return "", err + } + return output.Region, nil +} + +// GetNodeRegion implements k8s.k8sNodeInfo +func (p *Provider) GetNodeRegion(node *v1.Node) (string, error) { + return node.Labels["topology.kubernetes.io/region"], nil +} + +// GetNodeInstance implements k8s.k8sNodeInfo +func (p *Provider) GetNodeInstance(node *v1.Node) (string, error) { + // ProviderID format: "aws:///us-east-1f/i-0acd9257c6569d371" + parts := strings.Split(node.Spec.ProviderID, "/") + return parts[len(parts)-1], nil +} diff --git a/pkg/providers/baremetal/mnnvl.go b/pkg/providers/baremetal/mnnvl.go index 405b1d0..c5b9fc7 100644 --- a/pkg/providers/baremetal/mnnvl.go +++ b/pkg/providers/baremetal/mnnvl.go @@ -7,9 +7,10 @@ import ( "strconv" "strings" - "github.com/NVIDIA/topograph/pkg/common" + "github.com/NVIDIA/topograph/internal/exec" + "github.com/NVIDIA/topograph/pkg/engines" "github.com/NVIDIA/topograph/pkg/ib" - "github.com/NVIDIA/topograph/pkg/utils" + "github.com/NVIDIA/topograph/pkg/topology" ) // domain contains map of each domainID(clusterUUID) -> list of nodeNames in that domain @@ -19,13 +20,14 @@ type domain struct { } // getNodeList retrieves all the nodenames on the cluster -func getNodeList(cis []common.ComputeInstances) []string { +func getNodeList(cis []topology.ComputeInstances) []string { nodes := []string{} for _, ci := range cis { for _, node := range ci.Instances { nodes = append(nodes, node) } } + return nodes } @@ -34,13 +36,14 @@ func domainIDExists(id string, domainMap map[string]domain) bool { if _, exists := domainMap[id]; exists { return true } + return false } -func getIbTree(ctx context.Context, nodes []string) (*common.Vertex, error) { +func getIbTree(ctx context.Context, _ []string) (*topology.Vertex, error) { nodeVisited := make(map[string]bool) - treeRoot := &common.Vertex{ - Vertices: make(map[string]*common.Vertex), + treeRoot := &topology.Vertex{ + Vertices: make(map[string]*topology.Vertex), } ibPrefix := "IB" ibCount := 0 @@ -48,7 +51,7 @@ func getIbTree(ctx context.Context, nodes []string) (*common.Vertex, error) { partitionVisitedMap := make(map[string]bool) args := []string{"-h"} - stdout, err := utils.Exec(ctx, "sinfo", args, nil) + stdout, err := exec.Exec(ctx, "sinfo", args, nil) if err != nil { return nil, fmt.Errorf("exec error in sinfo: %v", err) } @@ -69,12 +72,13 @@ func getIbTree(ctx context.Context, nodes []string) (*common.Vertex, error) { nodesArr := deCompressNodeNames(nodeList) partitionNodeMap[partitionName] = append(partitionNodeMap[partitionName], nodesArr...) } + for pName, nodes := range partitionNodeMap { if _, exists := partitionVisitedMap[pName]; !exists { for _, node := range nodes { if _, exists := nodeVisited[node]; !exists { args := []string{"-N", "-R", "ssh", "-w", node, "sudo ibnetdiscover"} - stdout, err := utils.Exec(ctx, "pdsh", args, nil) + stdout, err := exec.Exec(ctx, "pdsh", args, nil) if err != nil { return nil, fmt.Errorf("exec error while pdsh IB command: %v", err) } @@ -99,6 +103,7 @@ func getIbTree(ctx context.Context, nodes []string) (*common.Vertex, error) { } } } + return treeRoot, nil } @@ -108,6 +113,7 @@ func deCompressNodeNames(nodeList string) []string { arr := strings.Split(nodeList, ",") prefix := "" var nodeName string + for _, entry := range arr { if strings.Contains(entry, "[") { tuple := strings.Split(entry, "[") @@ -150,13 +156,14 @@ func deCompressNodeNames(nodeList string) []string { } nodeArr = append(nodeArr, nodeName) } + return nodeArr } // getClusterOutput reads output from nodeInfo and populates the structs func getClusterOutput(ctx context.Context, domainMap map[string]domain, nodes []string, cmd string) error { args := []string{"-R", "ssh", "-w", strings.Join(nodes, ","), cmd} - stdout, err := utils.Exec(ctx, "pdsh", args, nil) + stdout, err := exec.Exec(ctx, "pdsh", args, nil) if err != nil { return fmt.Errorf("exec error while pdsh: %v", err) } @@ -178,35 +185,38 @@ func getClusterOutput(ctx context.Context, domainMap map[string]domain, nodes [] if err := scanner.Err(); err != nil { return fmt.Errorf("scanner error while reading pdsh output: %v", err) } + return nil } -func toGraph(domainMap map[string]domain, treeRoot *common.Vertex) *common.Vertex { - root := &common.Vertex{ - Vertices: make(map[string]*common.Vertex), + +func toGraph(domainMap map[string]domain, treeRoot *topology.Vertex) *topology.Vertex { + root := &topology.Vertex{ + Vertices: make(map[string]*topology.Vertex), Metadata: make(map[string]string), } - blockRoot := &common.Vertex{ - Vertices: make(map[string]*common.Vertex), + blockRoot := &topology.Vertex{ + Vertices: make(map[string]*topology.Vertex), } - root.Vertices[common.ValTopologyTree] = treeRoot + root.Vertices[topology.ValTopologyTree] = treeRoot for domainName, domain := range domainMap { - tree := &common.Vertex{ + tree := &topology.Vertex{ ID: domainName, - Vertices: make(map[string]*common.Vertex), + Vertices: make(map[string]*topology.Vertex), } for node := range domain.nodeMap { - tree.Vertices[node] = &common.Vertex{Name: node, ID: node} + tree.Vertices[node] = &topology.Vertex{Name: node, ID: node} } blockRoot.Vertices[domainName] = tree } // add root metadata - root.Metadata[common.KeyEngine] = common.EngineSLURM - root.Metadata[common.KeyPlugin] = common.ValTopologyBlock - root.Vertices[common.ValTopologyBlock] = blockRoot + root.Metadata[topology.KeyEngine] = engines.EngineSLURM // TODO: Check if this should be dynamic + root.Metadata[topology.KeyPlugin] = topology.ValTopologyBlock + root.Vertices[topology.ValTopologyBlock] = blockRoot + return root } -func generateTopologyConfig(ctx context.Context, cis []common.ComputeInstances) (*common.Vertex, error) { +func generateTopologyConfig(ctx context.Context, cis []topology.ComputeInstances) (*topology.Vertex, error) { domainMap := make(map[string]domain) // domainID: domain nodes := getNodeList(cis) err := getClusterOutput(ctx, domainMap, nodes, "nvidia-smi -q | grep ClusterUUID") @@ -218,5 +228,6 @@ func generateTopologyConfig(ctx context.Context, cis []common.ComputeInstances) if err != nil { return nil, fmt.Errorf("getIbTree failed: %v", err) } + return toGraph(domainMap, treeRoot), nil } diff --git a/pkg/providers/baremetal/provider.go b/pkg/providers/baremetal/provider.go index 29f1a0b..db96e5f 100644 --- a/pkg/providers/baremetal/provider.go +++ b/pkg/providers/baremetal/provider.go @@ -2,48 +2,52 @@ package baremetal import ( "context" - "fmt" + "errors" - "k8s.io/klog/v2" - - "github.com/NVIDIA/topograph/pkg/common" - "github.com/NVIDIA/topograph/pkg/engines/slurm" + "github.com/NVIDIA/topograph/pkg/providers" + "github.com/NVIDIA/topograph/pkg/topology" ) +const NAME = "baremetal" + type Provider struct{} -func GetProvider() (*Provider, error) { - return &Provider{}, nil +var ErrMultiRegionNotSupported = errors.New("on-prem does not support multi-region topology requests") + +func NamedLoader() (string, providers.Loader) { + return NAME, Loader } -func (p *Provider) GetCredentials(_ map[string]string) (interface{}, error) { - return nil, nil +func Loader(ctx context.Context, config providers.Config) (providers.Provider, error) { + return New() } -func (p *Provider) GetComputeInstances(ctx context.Context, engine common.Engine) ([]common.ComputeInstances, error) { - klog.InfoS("Getting compute instances", "provider", common.ProviderBM, "engine", engine) - - switch engine.(type) { - case *slurm.SlurmEngine: - nodes, err := slurm.GetNodeList(ctx) - if err != nil { - return nil, err - } - i2n := make(map[string]string) - for _, node := range nodes { - i2n[node] = node - } - return []common.ComputeInstances{{Instances: i2n}}, nil - default: - return nil, fmt.Errorf("unsupported engine %q", engine) - } +func New() (*Provider, error) { + return &Provider{}, nil } -func (p *Provider) GenerateTopologyConfig(ctx context.Context, _ interface{}, _ int, instances []common.ComputeInstances) (*common.Vertex, error) { +func (p *Provider) GenerateTopologyConfig(ctx context.Context, _ int, instances []topology.ComputeInstances) (*topology.Vertex, error) { if len(instances) > 1 { - return nil, fmt.Errorf("On-prem does not support multi-region topology requests") + return nil, ErrMultiRegionNotSupported } //call mnnvl code from here return generateTopologyConfig(ctx, instances) } + +// Engine support + +// Instances2NodeMap implements slurm.instanceMapper +func (p *Provider) Instances2NodeMap(ctx context.Context, nodes []string) (map[string]string, error) { + i2n := make(map[string]string) + for _, node := range nodes { + i2n[node] = node + } + + return i2n, nil +} + +// GetComputeInstancesRegion implements slurm.instanceMapper +func (p *Provider) GetComputeInstancesRegion() (string, error) { + return "", nil +} diff --git a/pkg/providers/cw/provider.go b/pkg/providers/cw/provider.go index dafa8ba..d414d18 100644 --- a/pkg/providers/cw/provider.go +++ b/pkg/providers/cw/provider.go @@ -22,48 +22,29 @@ import ( "os/exec" v1 "k8s.io/api/core/v1" - "k8s.io/klog/v2" - "github.com/NVIDIA/topograph/pkg/common" - "github.com/NVIDIA/topograph/pkg/engines/k8s" - "github.com/NVIDIA/topograph/pkg/engines/slurm" "github.com/NVIDIA/topograph/pkg/ib" + "github.com/NVIDIA/topograph/pkg/providers" + "github.com/NVIDIA/topograph/pkg/topology" ) +const NAME = "cw" + type Provider struct{} -func GetProvider() (*Provider, error) { - return &Provider{}, nil +func NamedLoader() (string, providers.Loader) { + return NAME, Loader } -func (p *Provider) GetCredentials(_ map[string]string) (interface{}, error) { - return nil, nil +func Loader(ctx context.Context, config providers.Config) (providers.Provider, error) { + return New() } -func (p *Provider) GetComputeInstances(ctx context.Context, engine common.Engine) ([]common.ComputeInstances, error) { - klog.InfoS("Getting compute instances", "provider", common.ProviderCW, "engine", engine) - - switch eng := engine.(type) { - case *slurm.SlurmEngine: - nodes, err := slurm.GetNodeList(ctx) - if err != nil { - return nil, err - } - i2n := make(map[string]string) - for _, node := range nodes { - i2n[node] = node - } - return []common.ComputeInstances{{Instances: i2n}}, nil - case *k8s.K8sEngine: - return eng.GetComputeInstances(ctx, - func(n *v1.Node) string { return n.Labels["topology.kubernetes.io/region"] }, - func(n *v1.Node) string { return n.Labels["kubernetes.io/hostname"] }) - default: - return nil, fmt.Errorf("unsupported engine %q", engine) - } +func New() (*Provider, error) { + return &Provider{}, nil } -func (p *Provider) GenerateTopologyConfig(ctx context.Context, _ interface{}, _ int, instances []common.ComputeInstances) (*common.Vertex, error) { +func (p *Provider) GenerateTopologyConfig(ctx context.Context, _ int, instances []topology.ComputeInstances) (*topology.Vertex, error) { if len(instances) > 1 { return nil, fmt.Errorf("CW does not support mult-region topology requests") } @@ -77,3 +58,30 @@ func (p *Provider) GenerateTopologyConfig(ctx context.Context, _ interface{}, _ return ib.GenerateTopologyConfig(output) } + +// Engine support + +// Instances2NodeMap implements slurm.instanceMapper +func (p *Provider) Instances2NodeMap(ctx context.Context, nodes []string) (map[string]string, error) { + i2n := make(map[string]string) + for _, node := range nodes { + i2n[node] = node + } + + return i2n, nil +} + +// GetComputeInstancesRegion implements slurm.instanceMapper +func (p *Provider) GetComputeInstancesRegion() (string, error) { + return "", nil +} + +// GetNodeRegion implements k8s.k8sNodeInfo +func (p *Provider) GetNodeRegion(node *v1.Node) (string, error) { + return node.Labels["topology.kubernetes.io/region"], nil +} + +// GetNodeInstance implements k8s.k8sNodeInfo +func (p *Provider) GetNodeInstance(node *v1.Node) (string, error) { + return node.Labels["kubernetes.io/hostname"], nil +} diff --git a/pkg/providers/gcp/instance_topology.go b/pkg/providers/gcp/instance_topology.go index d4d6b14..42561e1 100644 --- a/pkg/providers/gcp/instance_topology.go +++ b/pkg/providers/gcp/instance_topology.go @@ -22,12 +22,11 @@ import ( "strings" "time" - compute_v1 "cloud.google.com/go/compute/apiv1" "cloud.google.com/go/compute/apiv1/computepb" "cloud.google.com/go/compute/metadata" "google.golang.org/api/iterator" - "github.com/NVIDIA/topograph/pkg/common" + "github.com/NVIDIA/topograph/pkg/topology" ) type InstanceTopology struct { @@ -40,11 +39,12 @@ type InstanceInfo struct { name string } -func GenerateInstanceTopology(ctx context.Context, _ interface{}, instanceToNodeMap map[string]string) (*InstanceTopology, error) { - zoneClient, err := compute_v1.NewZonesRESTClient(ctx) +func (p *Provider) generateInstanceTopology(ctx context.Context, instanceToNodeMap map[string]string) (*InstanceTopology, error) { + client, err := p.clientFactory() if err != nil { - return nil, fmt.Errorf("unable to get zones client: %s", err.Error()) + return nil, err } + projectID, err := metadata.ProjectIDWithContext(ctx) if err != nil { return nil, fmt.Errorf("unable to get project ID: %s", err.Error()) @@ -53,7 +53,7 @@ func GenerateInstanceTopology(ctx context.Context, _ interface{}, instanceToNode zones := make([]string, 0) timeNow := time.Now() - res := zoneClient.List(ctx, &listZoneRequest) + res := client.Zones.List(ctx, &listZoneRequest) requestLatency.WithLabelValues("ListZones").Observe(time.Since(timeNow).Seconds()) for { @@ -64,11 +64,6 @@ func GenerateInstanceTopology(ctx context.Context, _ interface{}, instanceToNode zones = append(zones, *zone.Name) } - instanceClient, err := compute_v1.NewInstancesRESTClient(ctx) - if err != nil { - return nil, fmt.Errorf("unable to instance client for zone: %s", err.Error()) - } - instanceTopology := &InstanceTopology{instances: make([]*InstanceInfo, 0)} for _, zone := range zones { @@ -76,7 +71,7 @@ func GenerateInstanceTopology(ctx context.Context, _ interface{}, instanceToNode listInstanceRequest := computepb.ListInstancesRequest{Project: projectID, Zone: zone} requestLatency.WithLabelValues("ListInstances").Observe(time.Since(timeNow).Seconds()) - resInstance := instanceClient.List(ctx, &listInstanceRequest) + resInstance := client.Instances.List(ctx, &listInstanceRequest) for { instance, err := resInstance.Next() if err == iterator.Done { @@ -112,12 +107,12 @@ func GenerateInstanceTopology(ctx context.Context, _ interface{}, instanceToNode return instanceTopology, nil } -func (cfg *InstanceTopology) toGraph() (*common.Vertex, error) { - forest := make(map[string]*common.Vertex) - nodes := make(map[string]*common.Vertex) +func (cfg *InstanceTopology) toGraph() (*topology.Vertex, error) { + forest := make(map[string]*topology.Vertex) + nodes := make(map[string]*topology.Vertex) for _, c := range cfg.instances { - instance := &common.Vertex{ + instance := &topology.Vertex{ Name: c.name, ID: c.name, } @@ -125,9 +120,9 @@ func (cfg *InstanceTopology) toGraph() (*common.Vertex, error) { id2 := c.rackID sw2, ok := nodes[id2] if !ok { - sw2 = &common.Vertex{ + sw2 = &topology.Vertex{ ID: id2, - Vertices: make(map[string]*common.Vertex), + Vertices: make(map[string]*topology.Vertex), } nodes[id2] = sw2 } @@ -136,9 +131,9 @@ func (cfg *InstanceTopology) toGraph() (*common.Vertex, error) { id1 := c.clusterID sw1, ok := nodes[id1] if !ok { - sw1 = &common.Vertex{ + sw1 = &topology.Vertex{ ID: id1, - Vertices: make(map[string]*common.Vertex), + Vertices: make(map[string]*topology.Vertex), } nodes[id1] = sw1 forest[id1] = sw1 @@ -146,8 +141,8 @@ func (cfg *InstanceTopology) toGraph() (*common.Vertex, error) { sw1.Vertices[id2] = sw2 } - root := &common.Vertex{ - Vertices: make(map[string]*common.Vertex), + root := &topology.Vertex{ + Vertices: make(map[string]*topology.Vertex), } for name, node := range forest { root.Vertices[name] = node diff --git a/pkg/providers/gcp/provider.go b/pkg/providers/gcp/provider.go index 3947d15..d0bd2da 100644 --- a/pkg/providers/gcp/provider.go +++ b/pkg/providers/gcp/provider.go @@ -20,48 +20,68 @@ import ( "context" "fmt" + compute_v1 "cloud.google.com/go/compute/apiv1" + computepb "cloud.google.com/go/compute/apiv1/computepb" + gax "github.com/googleapis/gax-go/v2" v1 "k8s.io/api/core/v1" - "k8s.io/klog/v2" - "github.com/NVIDIA/topograph/pkg/common" - "github.com/NVIDIA/topograph/pkg/engines/k8s" - "github.com/NVIDIA/topograph/pkg/engines/slurm" + "github.com/NVIDIA/topograph/pkg/providers" + "github.com/NVIDIA/topograph/pkg/topology" ) -type Provider struct{} +const NAME = "gcp" -func GetProvider() (*Provider, error) { - return &Provider{}, nil +type Provider struct { + clientFactory ClientFactory } -func (p *Provider) GetCredentials(_ map[string]string) (interface{}, error) { - return nil, nil +type ClientFactory func() (*Client, error) + +type Client struct { + Zones ZonesClient + Instances InstancesClient +} + +type ZonesClient interface { + List(ctx context.Context, req *computepb.ListZonesRequest, opts ...gax.CallOption) *compute_v1.ZoneIterator } -func (p *Provider) GetComputeInstances(ctx context.Context, engine common.Engine) ([]common.ComputeInstances, error) { - klog.InfoS("Getting compute instances", "provider", common.ProviderGCP, "engine", engine) +type InstancesClient interface { + List(ctx context.Context, req *computepb.ListInstancesRequest, opts ...gax.CallOption) *compute_v1.InstanceIterator +} + +func NamedLoader() (string, providers.Loader) { + return NAME, Loader +} - switch eng := engine.(type) { - case *slurm.SlurmEngine: - nodes, err := slurm.GetNodeList(ctx) +func Loader(ctx context.Context, config providers.Config) (providers.Provider, error) { + clientFactory := func() (*Client, error) { + zonesClient, err := compute_v1.NewZonesRESTClient(ctx) if err != nil { - return nil, err + return nil, fmt.Errorf("unable to get zones client: %s", err.Error()) } - i2n := make(map[string]string) - for _, node := range nodes { - i2n[node] = node + + instancesClient, err := compute_v1.NewInstancesRESTClient(ctx) + if err != nil { + return nil, fmt.Errorf("unable to get instances client: %s", err.Error()) } - return []common.ComputeInstances{{Instances: i2n}}, nil - case *k8s.K8sEngine: - return eng.GetComputeInstances(ctx, - func(n *v1.Node) string { return n.Labels["topology.kubernetes.io/region"] }, - func(n *v1.Node) string { return n.Labels["kubernetes.io/hostname"] }) - default: - return nil, fmt.Errorf("unsupported engine %q", engine) + + return &Client{ + Zones: zonesClient, + Instances: instancesClient, + }, nil } + + return New(clientFactory) +} + +func New(clientFactory ClientFactory) (*Provider, error) { + return &Provider{ + clientFactory: clientFactory, + }, nil } -func (p *Provider) GenerateTopologyConfig(ctx context.Context, creds interface{}, _ int, instances []common.ComputeInstances) (*common.Vertex, error) { +func (p *Provider) GenerateTopologyConfig(ctx context.Context, _ int, instances []topology.ComputeInstances) (*topology.Vertex, error) { if len(instances) > 1 { return nil, fmt.Errorf("GCP does not support mult-region topology requests") } @@ -71,10 +91,37 @@ func (p *Provider) GenerateTopologyConfig(ctx context.Context, creds interface{} instanceToNode = instances[0].Instances } - cfg, err := GenerateInstanceTopology(ctx, creds, instanceToNode) + cfg, err := p.generateInstanceTopology(ctx, instanceToNode) if err != nil { return nil, err } return cfg.toGraph() } + +// Engine support + +// Instances2NodeMap implements slurm.instanceMapper +func (p *Provider) Instances2NodeMap(ctx context.Context, nodes []string) (map[string]string, error) { + i2n := make(map[string]string) + for _, node := range nodes { + i2n[node] = node + } + + return i2n, nil +} + +// GetComputeInstancesRegion implements slurm.instanceMapper +func (p *Provider) GetComputeInstancesRegion() (string, error) { + return "", nil +} + +// GetNodeRegion implements k8s.k8sNodeInfo +func (p *Provider) GetNodeRegion(node *v1.Node) (string, error) { + return node.Labels["topology.kubernetes.io/region"], nil +} + +// GetNodeInstance implements k8s.k8sNodeInfo +func (p *Provider) GetNodeInstance(node *v1.Node) (string, error) { + return node.Labels["kubernetes.io/hostname"], nil +} diff --git a/pkg/providers/oci/instance_topology.go b/pkg/providers/oci/instance_topology.go index d3a6c2f..62fcf97 100644 --- a/pkg/providers/oci/instance_topology.go +++ b/pkg/providers/oci/instance_topology.go @@ -23,13 +23,12 @@ import ( "sort" "time" - OCICommon "github.com/oracle/oci-go-sdk/v65/common" "github.com/oracle/oci-go-sdk/v65/core" "github.com/oracle/oci-go-sdk/v65/identity" "k8s.io/klog/v2" - "github.com/NVIDIA/topograph/pkg/common" "github.com/NVIDIA/topograph/pkg/metrics" + "github.com/NVIDIA/topograph/pkg/topology" ) type level int @@ -40,11 +39,11 @@ const ( hpcIslandLevel ) -func GenerateInstanceTopology(ctx context.Context, creds OCICommon.ConfigurationProvider, cis []common.ComputeInstances) ([]*core.ComputeBareMetalHostSummary, error) { +func GenerateInstanceTopology(ctx context.Context, factory ClientFactory, cis []topology.ComputeInstances) ([]*core.ComputeBareMetalHostSummary, error) { var err error bareMetalHostSummaries := []*core.ComputeBareMetalHostSummary{} for _, ci := range cis { - if bareMetalHostSummaries, err = generateInstanceTopology(ctx, creds, &ci, bareMetalHostSummaries); err != nil { + if bareMetalHostSummaries, err = generateInstanceTopology(ctx, factory, &ci, bareMetalHostSummaries); err != nil { return nil, err } } @@ -52,15 +51,15 @@ func GenerateInstanceTopology(ctx context.Context, creds OCICommon.Configuration return bareMetalHostSummaries, nil } -func getComputeCapacityTopologies(ctx context.Context, computeClient core.ComputeClient, identityClient identity.IdentityClient, - compartmentId string) (cct []core.ComputeCapacityTopologySummary, err error) { +func getComputeCapacityTopologies(ctx context.Context, client Client) (cct []core.ComputeCapacityTopologySummary, err error) { + compartmentId := client.TenancyOCID() adRequest := identity.ListAvailabilityDomainsRequest{ CompartmentId: &compartmentId, } timeStart := time.Now() - ads, err := identityClient.ListAvailabilityDomains(ctx, adRequest) + ads, err := client.ListAvailabilityDomains(ctx, adRequest) if err != nil { return cct, fmt.Errorf("unable to get AD: %v", err) } @@ -74,7 +73,7 @@ func getComputeCapacityTopologies(ctx context.Context, computeClient core.Comput for { timeStart := time.Now() - resp, err := computeClient.ListComputeCapacityTopologies(ctx, cctRequest) + resp, err := client.ListComputeCapacityTopologies(ctx, cctRequest) requestLatency.WithLabelValues("ListComputeCapacityTopologies", resp.HTTPResponse().Status).Observe(time.Since(timeStart).Seconds()) if err != nil { if resp.HTTPResponse().StatusCode == http.StatusNotFound { @@ -96,14 +95,15 @@ func getComputeCapacityTopologies(ctx context.Context, computeClient core.Comput return cct, nil } -func getBMHSummaryPerComputeCapacityTopology(ctx context.Context, computeClient core.ComputeClient, topologyID, compartmentId string) (bmhSummary []core.ComputeBareMetalHostSummary, err error) { +func getBMHSummaryPerComputeCapacityTopology(ctx context.Context, client Client, topologyID string) (bmhSummary []core.ComputeBareMetalHostSummary, err error) { + compartmentId := client.TenancyOCID() request := core.ListComputeCapacityTopologyComputeBareMetalHostsRequest{ ComputeCapacityTopologyId: &topologyID, CompartmentId: &compartmentId, } for { timeStart := time.Now() - response, err := computeClient.ListComputeCapacityTopologyComputeBareMetalHosts(ctx, request) + response, err := client.ListComputeCapacityTopologyComputeBareMetalHosts(ctx, request) requestLatency.WithLabelValues("ListComputeCapacityTopologyComputeBareMetalHosts", response.HTTPResponse().Status).Observe(time.Since(timeStart).Seconds()) if err != nil { klog.Errorln(err.Error()) @@ -121,10 +121,8 @@ func getBMHSummaryPerComputeCapacityTopology(ctx context.Context, computeClient return bmhSummary, nil } -func getBareMetalHostSummaries(ctx context.Context, computeClient core.ComputeClient, identityClient identity.IdentityClient, - compartmentId string) ([]core.ComputeBareMetalHostSummary, error) { - - computeCapacityTopology, err := getComputeCapacityTopologies(ctx, computeClient, identityClient, compartmentId) +func getBareMetalHostSummaries(ctx context.Context, client Client) ([]core.ComputeBareMetalHostSummary, error) { + computeCapacityTopology, err := getComputeCapacityTopologies(ctx, client) if err != nil { return nil, fmt.Errorf("unable to get compute capacity topologies: %s", err.Error()) } @@ -132,7 +130,7 @@ func getBareMetalHostSummaries(ctx context.Context, computeClient core.ComputeCl var bareMetalHostSummaries []core.ComputeBareMetalHostSummary for _, cct := range computeCapacityTopology { - bareMetalHostSummary, err := getBMHSummaryPerComputeCapacityTopology(ctx, computeClient, *cct.Id, compartmentId) + bareMetalHostSummary, err := getBMHSummaryPerComputeCapacityTopology(ctx, client, *cct.Id) if err != nil { return nil, fmt.Errorf("unable to get bare metal hosts info: %s", err.Error()) } @@ -143,7 +141,7 @@ func getBareMetalHostSummaries(ctx context.Context, computeClient core.ComputeCl return bareMetalHostSummaries, nil } -func toGraph(bareMetalHostSummaries []*core.ComputeBareMetalHostSummary, cis []common.ComputeInstances) (*common.Vertex, error) { +func toGraph(bareMetalHostSummaries []*core.ComputeBareMetalHostSummary, cis []topology.ComputeInstances) (*topology.Vertex, error) { instanceToNodeMap := make(map[string]string) for _, ci := range cis { for instance, node := range ci.Instances { @@ -152,15 +150,15 @@ func toGraph(bareMetalHostSummaries []*core.ComputeBareMetalHostSummary, cis []c } klog.V(4).Infof("Instance/Node map %v", instanceToNodeMap) - nodes := make(map[string]*common.Vertex) - forest := make(map[string]*common.Vertex) + nodes := make(map[string]*topology.Vertex) + forest := make(map[string]*topology.Vertex) levelWiseSwitchCount := map[level]int{localBlockLevel: 0, networkBlockLevel: 0, hpcIslandLevel: 0} bareMetalHostSummaries = filterAndSort(bareMetalHostSummaries, instanceToNodeMap) for _, bmhSummary := range bareMetalHostSummaries { nodeName := instanceToNodeMap[*bmhSummary.InstanceId] delete(instanceToNodeMap, *bmhSummary.InstanceId) - instance := &common.Vertex{ + instance := &topology.Vertex{ Name: nodeName, ID: *bmhSummary.InstanceId, } @@ -169,9 +167,9 @@ func toGraph(bareMetalHostSummaries []*core.ComputeBareMetalHostSummary, cis []c localBlock, ok := nodes[localBlockId] if !ok { levelWiseSwitchCount[localBlockLevel]++ - localBlock = &common.Vertex{ + localBlock = &topology.Vertex{ ID: localBlockId, - Vertices: make(map[string]*common.Vertex), + Vertices: make(map[string]*topology.Vertex), Name: fmt.Sprintf("Switch.%d.%d", localBlockLevel, levelWiseSwitchCount[localBlockLevel]), } nodes[localBlockId] = localBlock @@ -182,9 +180,9 @@ func toGraph(bareMetalHostSummaries []*core.ComputeBareMetalHostSummary, cis []c networkBlock, ok := nodes[networkBlockId] if !ok { levelWiseSwitchCount[networkBlockLevel]++ - networkBlock = &common.Vertex{ + networkBlock = &topology.Vertex{ ID: networkBlockId, - Vertices: make(map[string]*common.Vertex), + Vertices: make(map[string]*topology.Vertex), Name: fmt.Sprintf("Switch.%d.%d", networkBlockLevel, levelWiseSwitchCount[networkBlockLevel]), } nodes[networkBlockId] = networkBlock @@ -195,9 +193,9 @@ func toGraph(bareMetalHostSummaries []*core.ComputeBareMetalHostSummary, cis []c hpcIsland, ok := nodes[hpcIslandId] if !ok { levelWiseSwitchCount[hpcIslandLevel]++ - hpcIsland = &common.Vertex{ + hpcIsland = &topology.Vertex{ ID: hpcIslandId, - Vertices: make(map[string]*common.Vertex), + Vertices: make(map[string]*topology.Vertex), Name: fmt.Sprintf("Switch.%d.%d", hpcIslandLevel, levelWiseSwitchCount[hpcIslandLevel]), } nodes[hpcIslandId] = hpcIsland @@ -208,22 +206,22 @@ func toGraph(bareMetalHostSummaries []*core.ComputeBareMetalHostSummary, cis []c if len(instanceToNodeMap) != 0 { klog.V(4).Infof("Adding nodes w/o topology: %v", instanceToNodeMap) - metrics.SetMissingTopology(common.ProviderOCI, len(instanceToNodeMap)) - sw := &common.Vertex{ - ID: common.NoTopology, - Vertices: make(map[string]*common.Vertex), + metrics.SetMissingTopology(NAME, len(instanceToNodeMap)) + sw := &topology.Vertex{ + ID: topology.NoTopology, + Vertices: make(map[string]*topology.Vertex), } for instanceID, nodeName := range instanceToNodeMap { - sw.Vertices[instanceID] = &common.Vertex{ + sw.Vertices[instanceID] = &topology.Vertex{ Name: nodeName, ID: instanceID, } } - forest[common.NoTopology] = sw + forest[topology.NoTopology] = sw } - root := &common.Vertex{ - Vertices: make(map[string]*common.Vertex), + root := &topology.Vertex{ + Vertices: make(map[string]*topology.Vertex), } for name, node := range forest { root.Vertices[name] = node @@ -285,29 +283,13 @@ func filterAndSort(bareMetalHostSummaries []*core.ComputeBareMetalHostSummary, i return filtered } -func generateInstanceTopology(ctx context.Context, provider OCICommon.ConfigurationProvider, ci *common.ComputeInstances, bareMetalHostSummaries []*core.ComputeBareMetalHostSummary) ([]*core.ComputeBareMetalHostSummary, error) { - identityClient, err := identity.NewIdentityClientWithConfigurationProvider(provider) +func generateInstanceTopology(ctx context.Context, factory ClientFactory, ci *topology.ComputeInstances, bareMetalHostSummaries []*core.ComputeBareMetalHostSummary) ([]*core.ComputeBareMetalHostSummary, error) { + client, err := factory(ci.Region) if err != nil { - return nil, fmt.Errorf("unable to create identity client. Bailing out : %v", err) - } - - tenacyOCID, err := provider.TenancyOCID() - if err != nil { - return nil, fmt.Errorf("unable to get tenancy OCID from config: %s", err.Error()) - } - - computeClient, err := core.NewComputeClientWithConfigurationProvider(provider) - if err != nil { - return nil, fmt.Errorf("unable to get compute client: %s", err.Error()) - } - - if len(ci.Region) != 0 { - klog.Infof("Use provided region %s", ci.Region) - identityClient.SetRegion(ci.Region) - computeClient.SetRegion(ci.Region) + return nil, err } - bmh, err := getBareMetalHostSummaries(ctx, computeClient, identityClient, tenacyOCID) + bmh, err := getBareMetalHostSummaries(ctx, client) if err != nil { return nil, fmt.Errorf("unable to populate compute capacity topology: %s", err.Error()) } diff --git a/pkg/providers/oci/provider.go b/pkg/providers/oci/provider.go index 80d82bf..595d4c3 100644 --- a/pkg/providers/oci/provider.go +++ b/pkg/providers/oci/provider.go @@ -22,21 +22,83 @@ import ( OCICommon "github.com/oracle/oci-go-sdk/v65/common" "github.com/oracle/oci-go-sdk/v65/common/auth" + "github.com/oracle/oci-go-sdk/v65/core" + "github.com/oracle/oci-go-sdk/v65/identity" v1 "k8s.io/api/core/v1" "k8s.io/klog/v2" - "github.com/NVIDIA/topograph/pkg/common" - "github.com/NVIDIA/topograph/pkg/engines/k8s" - "github.com/NVIDIA/topograph/pkg/engines/slurm" + "github.com/NVIDIA/topograph/pkg/providers" + "github.com/NVIDIA/topograph/pkg/topology" ) -type Provider struct{} +const NAME = "oci" -func GetProvider() (*Provider, error) { - return &Provider{}, nil +type Provider struct { + clientFactory ClientFactory } -func (p *Provider) GetCredentials(creds map[string]string) (interface{}, error) { +type ClientFactory func(region string) (Client, error) + +type Client interface { + TenancyOCID() string + ListAvailabilityDomains(ctx context.Context, request identity.ListAvailabilityDomainsRequest) (response identity.ListAvailabilityDomainsResponse, err error) + ListComputeCapacityTopologies(ctx context.Context, request core.ListComputeCapacityTopologiesRequest) (response core.ListComputeCapacityTopologiesResponse, err error) + ListComputeCapacityTopologyComputeBareMetalHosts(ctx context.Context, request core.ListComputeCapacityTopologyComputeBareMetalHostsRequest) (response core.ListComputeCapacityTopologyComputeBareMetalHostsResponse, err error) +} + +type ociClient struct { + identity.IdentityClient + core.ComputeClient + tenancyOCID string +} + +func (c *ociClient) TenancyOCID() string { + return c.tenancyOCID +} + +func NamedLoader() (string, providers.Loader) { + return NAME, Loader +} + +func Loader(ctx context.Context, config providers.Config) (providers.Provider, error) { + provider, err := getConfigurationProvider(config.Creds) + if err != nil { + return nil, err + } + + clientFactory := func(region string) (Client, error) { + identityClient, err := identity.NewIdentityClientWithConfigurationProvider(provider) + if err != nil { + return nil, fmt.Errorf("unable to create identity client. Bailing out : %v", err) + } + + tenacyOCID, err := provider.TenancyOCID() + if err != nil { + return nil, fmt.Errorf("unable to get tenancy OCID from config: %s", err.Error()) + } + + computeClient, err := core.NewComputeClientWithConfigurationProvider(provider) + if err != nil { + return nil, fmt.Errorf("unable to get compute client: %s", err.Error()) + } + + if len(region) != 0 { + klog.Infof("Use provided region %s", region) + identityClient.SetRegion(region) + computeClient.SetRegion(region) + } + + return &ociClient{ + IdentityClient: identityClient, + ComputeClient: computeClient, + tenancyOCID: tenacyOCID, + }, nil + } + + return New(clientFactory), nil +} + +func getConfigurationProvider(creds map[string]string) (OCICommon.ConfigurationProvider, error) { if len(creds) != 0 { var tenancyID, userID, region, fingerprint, privateKey, passphrase string klog.Info("Using provided credentials") @@ -77,36 +139,39 @@ func (p *Provider) GetCredentials(creds map[string]string) (interface{}, error) return configProvider, nil } -func (p *Provider) GetComputeInstances(ctx context.Context, engine common.Engine) ([]common.ComputeInstances, error) { - klog.InfoS("Getting compute instances", "provider", common.ProviderOCI, "engine", engine) - - switch eng := engine.(type) { - case *slurm.SlurmEngine: - nodes, err := slurm.GetNodeList(ctx) - if err != nil { - return nil, err - } - i2n, err := instanceToNodeMap(nodes) - if err != nil { - return nil, err - } - return []common.ComputeInstances{{Instances: i2n}}, nil - - case *k8s.K8sEngine: - return eng.GetComputeInstances(ctx, - func(n *v1.Node) string { return n.Labels["topology.kubernetes.io/region"] }, - func(n *v1.Node) string { return n.Spec.ProviderID }) - default: - return nil, fmt.Errorf("unsupported engine %q", engine) +func New(ociClientFactory ClientFactory) *Provider { + return &Provider{ + clientFactory: ociClientFactory, } } -func (p *Provider) GenerateTopologyConfig(ctx context.Context, cr interface{}, _ int, instances []common.ComputeInstances) (*common.Vertex, error) { - creds := cr.(OCICommon.ConfigurationProvider) - cfg, err := GenerateInstanceTopology(ctx, creds, instances) +func (p *Provider) GenerateTopologyConfig(ctx context.Context, _ int, instances []topology.ComputeInstances) (*topology.Vertex, error) { + cfg, err := GenerateInstanceTopology(ctx, p.clientFactory, instances) if err != nil { return nil, err } return toGraph(cfg, instances) } + +// Engine support + +// Instances2NodeMap implements slurm.instanceMapper +func (p *Provider) Instances2NodeMap(ctx context.Context, nodes []string) (map[string]string, error) { + return instanceToNodeMap(nodes) +} + +// GetComputeInstancesRegion implements slurm.instanceMapper +func (p *Provider) GetComputeInstancesRegion() (string, error) { + return "", nil +} + +// GetNodeRegion implements k8s.k8sNodeInfo +func (p *Provider) GetNodeRegion(node *v1.Node) (string, error) { + return node.Labels["topology.kubernetes.io/region"], nil +} + +// GetNodeInstance implements k8s.k8sNodeInfo +func (p *Provider) GetNodeInstance(node *v1.Node) (string, error) { + return node.Spec.ProviderID, nil +} diff --git a/pkg/providers/providers.go b/pkg/providers/providers.go new file mode 100644 index 0000000..06369e5 --- /dev/null +++ b/pkg/providers/providers.go @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package providers + +import ( + "context" + "errors" + "fmt" + + "github.com/NVIDIA/topograph/internal/component" + "github.com/NVIDIA/topograph/pkg/topology" +) + +type Provider interface { + GenerateTopologyConfig(ctx context.Context, pageSize int, instances []topology.ComputeInstances) (*topology.Vertex, error) +} + +type Config struct { + Creds map[string]string + Params map[string]any +} +type NamedLoader = component.NamedLoader[Provider, Config] +type Loader = component.Loader[Provider, Config] +type Registry component.Registry[Provider, Config] + +var ErrUnsupportedProvider = errors.New("unsupported provider") + +func NewRegistry(namedLoaders ...NamedLoader) Registry { + return Registry(component.NewRegistry(namedLoaders...)) +} + +func (r Registry) Get(name string) (Loader, error) { + loader, ok := r[name] + if !ok { + return nil, fmt.Errorf("unsupported provider %q, %w", name, ErrUnsupportedProvider) + } + + return loader, nil +} diff --git a/pkg/providers/test/test.go b/pkg/providers/test/test.go new file mode 100644 index 0000000..0c2b769 --- /dev/null +++ b/pkg/providers/test/test.go @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package test + +import ( + "context" + "fmt" + + "github.com/NVIDIA/topograph/internal/config" + "github.com/NVIDIA/topograph/pkg/models" + "github.com/NVIDIA/topograph/pkg/providers" + "github.com/NVIDIA/topograph/pkg/topology" + "github.com/NVIDIA/topograph/pkg/translate" + "k8s.io/klog/v2" +) + +const NAME = "test" + +type Provider struct { + tree *topology.Vertex + instance2node map[string]string +} + +type Params struct { + ModelPath string `mapstructure:"model_path"` +} + +func NamedLoader() (string, providers.Loader) { + return NAME, Loader +} + +func Loader(ctx context.Context, config providers.Config) (providers.Provider, error) { + return New(config) +} + +func New(cfg providers.Config) (*Provider, error) { + var p Params + if err := config.Decode(cfg.Params, &p); err != nil { + return nil, fmt.Errorf("error decoding params: %w", err) + } + provider := &Provider{} + + if len(p.ModelPath) == 0 { + provider.tree, provider.instance2node = translate.GetTreeTestSet(false) + } else { + klog.InfoS("Using simulated topology", "model path", p.ModelPath) + model, err := models.NewModelFromFile(p.ModelPath) + if err != nil { + return nil, err // Wrapped by models.NewModelFromFile + } + provider.tree, provider.instance2node = model.ToTree() + } + return provider, nil +} + +func (p *Provider) GetComputeInstances(_ context.Context) ([]topology.ComputeInstances, error) { + return []topology.ComputeInstances{ + { + Instances: p.instance2node, + }, + }, nil +} + +func (p *Provider) GenerateTopologyConfig(_ context.Context, _ int, _ []topology.ComputeInstances) (*topology.Vertex, error) { + return p.tree, nil +} diff --git a/pkg/registry/registry.go b/pkg/registry/registry.go new file mode 100644 index 0000000..f5ae1c9 --- /dev/null +++ b/pkg/registry/registry.go @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package registry + +import ( + "github.com/NVIDIA/topograph/pkg/engines" + "github.com/NVIDIA/topograph/pkg/engines/k8s" + "github.com/NVIDIA/topograph/pkg/engines/slurm" + engine_test "github.com/NVIDIA/topograph/pkg/engines/test" + + "github.com/NVIDIA/topograph/pkg/providers" + "github.com/NVIDIA/topograph/pkg/providers/aws" + "github.com/NVIDIA/topograph/pkg/providers/baremetal" + "github.com/NVIDIA/topograph/pkg/providers/cw" + "github.com/NVIDIA/topograph/pkg/providers/gcp" + "github.com/NVIDIA/topograph/pkg/providers/oci" + provider_test "github.com/NVIDIA/topograph/pkg/providers/test" +) + +var Providers = providers.NewRegistry( + aws.NamedLoader, + baremetal.NamedLoader, + cw.NamedLoader, + gcp.NamedLoader, + oci.NamedLoader, + provider_test.NamedLoader, +) + +var Engines = engines.NewRegistry( + k8s.NamedLoader, + slurm.NamedLoader, + engine_test.NamedLoader, +) diff --git a/pkg/server/engine.go b/pkg/server/engine.go index a0260ed..4a415e4 100644 --- a/pkg/server/engine.go +++ b/pkg/server/engine.go @@ -18,23 +18,25 @@ package server import ( "context" + "errors" "net/http" "time" "k8s.io/klog/v2" - "github.com/NVIDIA/topograph/pkg/common" - "github.com/NVIDIA/topograph/pkg/factory" + "github.com/NVIDIA/topograph/pkg/engines" "github.com/NVIDIA/topograph/pkg/metrics" - "github.com/NVIDIA/topograph/pkg/utils" + "github.com/NVIDIA/topograph/pkg/providers" + "github.com/NVIDIA/topograph/pkg/registry" + "github.com/NVIDIA/topograph/pkg/topology" ) type asyncController struct { - queue *utils.TrailingDelayQueue + queue *TrailingDelayQueue } -func processRequest(item interface{}) (interface{}, *common.HTTPError) { - tr := item.(*common.TopologyRequest) +func processRequest(item interface{}) (interface{}, *HTTPError) { + tr := item.(*topology.Request) var code int start := time.Now() @@ -49,55 +51,82 @@ func processRequest(item interface{}) (interface{}, *common.HTTPError) { return ret, err } -func processTopologyRequest(tr *common.TopologyRequest) ([]byte, *common.HTTPError) { +func processTopologyRequest(tr *topology.Request) ([]byte, *HTTPError) { klog.InfoS("Creating topology config", "provider", tr.Provider.Name, "engine", tr.Engine.Name) - eng, httpErr := factory.GetEngine(tr.Engine.Name) - if httpErr != nil { - klog.Error(httpErr.Error()) - return nil, httpErr + engLoader, err := registry.Engines.Get(tr.Engine.Name) + if err != nil { + klog.Error(err.Error()) + if errors.Is(err, engines.ErrUnsupportedEngine) { + return nil, NewHTTPError(http.StatusBadRequest, err.Error()) + } + return nil, NewHTTPError(http.StatusInternalServerError, err.Error()) + } + + prvLoader, err := registry.Providers.Get(tr.Provider.Name) + if err != nil { + klog.Error(err.Error()) + if errors.Is(err, providers.ErrUnsupportedProvider) { + return nil, NewHTTPError(http.StatusBadRequest, err.Error()) + } + return nil, NewHTTPError(http.StatusInternalServerError, err.Error()) + } + + ctx := context.Background() + + eng, err := engLoader(ctx, engines.Config{}) + if err != nil { + // TODO: Logic to determine between StatusBadRequest and StatusInternalServerError + return nil, NewHTTPError(http.StatusBadRequest, err.Error()) } - prv, httpErr := factory.GetProvider(tr.Provider.Name, tr.Provider.Params) - if httpErr != nil { - klog.Error(httpErr.Error()) - return nil, httpErr + prv, err := prvLoader(ctx, providers.Config{ + Creds: checkCredentials(tr.Provider.Creds, srv.cfg.Credentials), + Params: tr.Provider.Params, + }) + if err != nil { + // TODO: Logic to determine between StatusBadRequest and StatusInternalServerError + return nil, NewHTTPError(http.StatusBadRequest, err.Error()) } - ctx := context.TODO() + // Optional provider interface if it directly supports getting compute instances. + // (e.g., Test provider) + type simpleGetComputeInstances interface { + GetComputeInstances(ctx context.Context) ([]topology.ComputeInstances, error) + } // if the instance/node mapping is not provided in the payload, get the mapping from the provider computeInstances := tr.Nodes if len(computeInstances) == 0 { var err error - computeInstances, err = prv.GetComputeInstances(ctx, eng) - if err != nil { - return nil, common.NewHTTPError(http.StatusInternalServerError, err.Error()) + switch t := prv.(type) { + case simpleGetComputeInstances: + computeInstances, err = t.GetComputeInstances(ctx) + default: + computeInstances, err = eng.GetComputeInstances(ctx, prv) } - } - creds, err := prv.GetCredentials(checkCredentials(tr.Provider.Creds, srv.cfg.Credentials)) - if err != nil { - klog.Error(err.Error()) - return nil, common.NewHTTPError(http.StatusUnauthorized, err.Error()) + if err != nil { + return nil, NewHTTPError(http.StatusInternalServerError, err.Error()) + } } - var root *common.Vertex + var root *topology.Vertex if srv.cfg.FwdSvcURL != nil { // forward the request to the global service root, err = forwardRequest(ctx, tr, *srv.cfg.FwdSvcURL, computeInstances) } else { - root, err = prv.GenerateTopologyConfig(ctx, creds, srv.cfg.PageSize, computeInstances) + root, err = prv.GenerateTopologyConfig(ctx, srv.cfg.PageSize, computeInstances) } if err != nil { klog.Error(err.Error()) - return nil, common.NewHTTPError(http.StatusInternalServerError, err.Error()) + return nil, NewHTTPError(http.StatusInternalServerError, err.Error()) } data, err := eng.GenerateOutput(ctx, root, tr.Engine.Params) if err != nil { klog.Error(err.Error()) - return nil, common.NewHTTPError(http.StatusInternalServerError, err.Error()) + return nil, NewHTTPError(http.StatusInternalServerError, err.Error()) } return data, nil diff --git a/pkg/server/grpc_client.go b/pkg/server/grpc_client.go index e6c439e..7f1fb63 100644 --- a/pkg/server/grpc_client.go +++ b/pkg/server/grpc_client.go @@ -24,12 +24,12 @@ import ( "google.golang.org/grpc/credentials/insecure" "k8s.io/klog/v2" - "github.com/NVIDIA/topograph/pkg/common" "github.com/NVIDIA/topograph/pkg/metrics" pb "github.com/NVIDIA/topograph/pkg/protos" + "github.com/NVIDIA/topograph/pkg/topology" ) -func forwardRequest(ctx context.Context, tr *common.TopologyRequest, url string, cis []common.ComputeInstances) (*common.Vertex, error) { +func forwardRequest(ctx context.Context, tr *topology.Request, url string, cis []topology.ComputeInstances) (*topology.Vertex, error) { klog.Infof("Forwarding request to %s", url) conn, err := grpc.NewClient(url, grpc.WithTransportCredentials(insecure.NewCredentials())) if err != nil { @@ -63,16 +63,18 @@ func forwardRequest(ctx context.Context, tr *common.TopologyRequest, url string, } // getTopologyFormat derives topology format from engine parameters: tree (default) or block -func getTopologyFormat(params map[string]string) string { +func getTopologyFormat(params map[string]any) string { if len(params) != 0 { - if format, ok := params[common.KeyPlugin]; ok && len(format) != 0 { - return format + if formatI, ok := params[topology.KeyPlugin]; ok { + if format, ok := formatI.(string); ok && len(format) != 0 { + return format + } } } - return common.ValTopologyTree + return topology.ValTopologyTree } -func toGraph(response *pb.TopologyResponse, cis []common.ComputeInstances, format string) *common.Vertex { +func toGraph(response *pb.TopologyResponse, cis []topology.ComputeInstances, format string) *topology.Vertex { i2n := make(map[string]string) for _, ci := range cis { for instance, node := range ci.Instances { @@ -81,9 +83,9 @@ func toGraph(response *pb.TopologyResponse, cis []common.ComputeInstances, forma } klog.V(4).Infof("Instance/Node map %v", i2n) - forest := make(map[string]*common.Vertex) - blocks := make(map[string]*common.Vertex) - vertices := make(map[string]*common.Vertex) + forest := make(map[string]*topology.Vertex) + blocks := make(map[string]*topology.Vertex) + vertices := make(map[string]*topology.Vertex) for _, ins := range response.Instances { nodeName, ok := i2n[ins.Id] @@ -95,7 +97,7 @@ func toGraph(response *pb.TopologyResponse, cis []common.ComputeInstances, forma klog.V(4).Infof("Found node %q instance %q", nodeName, ins.Id) delete(i2n, ins.Id) - vertex := &common.Vertex{ + vertex := &topology.Vertex{ Name: nodeName, ID: ins.Id, } @@ -107,9 +109,9 @@ func toGraph(response *pb.TopologyResponse, cis []common.ComputeInstances, forma switchName := fmt.Sprintf("nvlink-%s", ins.NvlinkDomain) sw, ok := forest[switchName] if !ok { - sw = &common.Vertex{ + sw = &topology.Vertex{ ID: switchName, - Vertices: map[string]*common.Vertex{id: vertex}, + Vertices: map[string]*topology.Vertex{id: vertex}, } forest[switchName] = sw blocks[switchName] = sw @@ -126,9 +128,9 @@ func toGraph(response *pb.TopologyResponse, cis []common.ComputeInstances, forma // create or reuse vertex sw, ok := vertices[net] if !ok { - sw = &common.Vertex{ + sw = &topology.Vertex{ ID: net, - Vertices: map[string]*common.Vertex{id: vertex}, + Vertices: map[string]*topology.Vertex{id: vertex}, } vertices[net] = sw } else { @@ -148,39 +150,39 @@ func toGraph(response *pb.TopologyResponse, cis []common.ComputeInstances, forma if len(i2n) != 0 { klog.V(4).Infof("Adding nodes w/o topology: %v", i2n) metrics.SetMissingTopology("GTS", len(i2n)) - sw := &common.Vertex{ - ID: common.NoTopology, - Vertices: make(map[string]*common.Vertex), + sw := &topology.Vertex{ + ID: topology.NoTopology, + Vertices: make(map[string]*topology.Vertex), } for instanceID, nodeName := range i2n { - sw.Vertices[instanceID] = &common.Vertex{ + sw.Vertices[instanceID] = &topology.Vertex{ Name: nodeName, ID: instanceID, } } - forest[common.NoTopology] = sw + forest[topology.NoTopology] = sw } - treeRoot := &common.Vertex{ - Vertices: make(map[string]*common.Vertex), + treeRoot := &topology.Vertex{ + Vertices: make(map[string]*topology.Vertex), } for name, node := range forest { treeRoot.Vertices[name] = node } - metadata := map[string]string{common.KeyPlugin: format} - if format == common.ValTopologyBlock { - blockRoot := &common.Vertex{ - Vertices: make(map[string]*common.Vertex), + metadata := map[string]string{topology.KeyPlugin: format} + if format == topology.ValTopologyBlock { + blockRoot := &topology.Vertex{ + Vertices: make(map[string]*topology.Vertex), } for name, domain := range blocks { blockRoot.Vertices[name] = domain } - return &common.Vertex{ - Vertices: map[string]*common.Vertex{ - common.ValTopologyBlock: blockRoot, - common.ValTopologyTree: treeRoot, + return &topology.Vertex{ + Vertices: map[string]*topology.Vertex{ + topology.ValTopologyBlock: blockRoot, + topology.ValTopologyTree: treeRoot, }, Metadata: metadata, } diff --git a/pkg/server/grpc_client_test.go b/pkg/server/grpc_client_test.go index ed4479c..56b9007 100644 --- a/pkg/server/grpc_client_test.go +++ b/pkg/server/grpc_client_test.go @@ -23,8 +23,8 @@ import ( "github.com/stretchr/testify/require" "k8s.io/klog/v2" - "github.com/NVIDIA/topograph/pkg/common" pb "github.com/NVIDIA/topograph/pkg/protos" + "github.com/NVIDIA/topograph/pkg/topology" ) // follow example in pkg/toposim/testdata/toposim.yaml @@ -95,7 +95,7 @@ func TestToGraph(t *testing.T) { }, } - cis := []common.ComputeInstances{ + cis := []topology.ComputeInstances{ { Instances: map[string]string{ "n10-1": "N10-1", @@ -113,69 +113,69 @@ func TestToGraph(t *testing.T) { }, } - v101 := &common.Vertex{Name: "N10-1", ID: "n10-1"} - v102 := &common.Vertex{Name: "N10-2", ID: "n10-2"} - v111 := &common.Vertex{Name: "N11-1", ID: "n11-1"} - v112 := &common.Vertex{Name: "N11-2", ID: "n11-2"} - v121 := &common.Vertex{Name: "N12-1", ID: "n12-1"} - v122 := &common.Vertex{Name: "N12-2", ID: "n12-2"} - v131 := &common.Vertex{Name: "N13-1", ID: "n13-1"} - v132 := &common.Vertex{Name: "N13-2", ID: "n13-2"} - v141 := &common.Vertex{Name: "N14-1", ID: "n14-1"} - v142 := &common.Vertex{Name: "N14-2", ID: "n14-2"} - cpu1 := &common.Vertex{Name: "CPU1", ID: "cpu1"} + v101 := &topology.Vertex{Name: "N10-1", ID: "n10-1"} + v102 := &topology.Vertex{Name: "N10-2", ID: "n10-2"} + v111 := &topology.Vertex{Name: "N11-1", ID: "n11-1"} + v112 := &topology.Vertex{Name: "N11-2", ID: "n11-2"} + v121 := &topology.Vertex{Name: "N12-1", ID: "n12-1"} + v122 := &topology.Vertex{Name: "N12-2", ID: "n12-2"} + v131 := &topology.Vertex{Name: "N13-1", ID: "n13-1"} + v132 := &topology.Vertex{Name: "N13-2", ID: "n13-2"} + v141 := &topology.Vertex{Name: "N14-1", ID: "n14-1"} + v142 := &topology.Vertex{Name: "N14-2", ID: "n14-2"} + cpu1 := &topology.Vertex{Name: "CPU1", ID: "cpu1"} - sw11 := &common.Vertex{ID: "sw11", Vertices: map[string]*common.Vertex{"n11-1": v111, "n11-2": v112}} - sw12 := &common.Vertex{ID: "sw12", Vertices: map[string]*common.Vertex{"n12-1": v121, "n12-2": v122}} - sw13 := &common.Vertex{ID: "sw13", Vertices: map[string]*common.Vertex{"n13-1": v131, "n13-2": v132}} - sw14 := &common.Vertex{ID: "sw14", Vertices: map[string]*common.Vertex{"n14-1": v141, "n14-2": v142}} - sw21 := &common.Vertex{ID: "sw21", Vertices: map[string]*common.Vertex{"sw11": sw11, "sw12": sw12}} - sw22 := &common.Vertex{ID: "sw22", Vertices: map[string]*common.Vertex{"sw13": sw13, "sw14": sw14}} - sw3 := &common.Vertex{ID: "sw3", Vertices: map[string]*common.Vertex{"sw21": sw21, "sw22": sw22}} + sw11 := &topology.Vertex{ID: "sw11", Vertices: map[string]*topology.Vertex{"n11-1": v111, "n11-2": v112}} + sw12 := &topology.Vertex{ID: "sw12", Vertices: map[string]*topology.Vertex{"n12-1": v121, "n12-2": v122}} + sw13 := &topology.Vertex{ID: "sw13", Vertices: map[string]*topology.Vertex{"n13-1": v131, "n13-2": v132}} + sw14 := &topology.Vertex{ID: "sw14", Vertices: map[string]*topology.Vertex{"n14-1": v141, "n14-2": v142}} + sw21 := &topology.Vertex{ID: "sw21", Vertices: map[string]*topology.Vertex{"sw11": sw11, "sw12": sw12}} + sw22 := &topology.Vertex{ID: "sw22", Vertices: map[string]*topology.Vertex{"sw13": sw13, "sw14": sw14}} + sw3 := &topology.Vertex{ID: "sw3", Vertices: map[string]*topology.Vertex{"sw21": sw21, "sw22": sw22}} - nv1 := &common.Vertex{ID: "nvlink-nv1", Vertices: map[string]*common.Vertex{"n10-1": v101, "n10-2": v102, "n11-1": v111, "n11-2": v112}} + nv1 := &topology.Vertex{ID: "nvlink-nv1", Vertices: map[string]*topology.Vertex{"n10-1": v101, "n10-2": v102, "n11-1": v111, "n11-2": v112}} - extra := &common.Vertex{ID: common.NoTopology, Vertices: map[string]*common.Vertex{"cpu1": cpu1}} - treeRoot := &common.Vertex{Vertices: map[string]*common.Vertex{"nvlink-nv1": nv1, "sw3": sw3, common.NoTopology: extra}} - blockRoot := &common.Vertex{Vertices: map[string]*common.Vertex{"nvlink-nv1": nv1}} - root := &common.Vertex{ - Vertices: map[string]*common.Vertex{common.ValTopologyBlock: blockRoot, common.ValTopologyTree: treeRoot}, - Metadata: map[string]string{common.KeyPlugin: common.ValTopologyBlock}, + extra := &topology.Vertex{ID: topology.NoTopology, Vertices: map[string]*topology.Vertex{"cpu1": cpu1}} + treeRoot := &topology.Vertex{Vertices: map[string]*topology.Vertex{"nvlink-nv1": nv1, "sw3": sw3, topology.NoTopology: extra}} + blockRoot := &topology.Vertex{Vertices: map[string]*topology.Vertex{"nvlink-nv1": nv1}} + root := &topology.Vertex{ + Vertices: map[string]*topology.Vertex{topology.ValTopologyBlock: blockRoot, topology.ValTopologyTree: treeRoot}, + Metadata: map[string]string{topology.KeyPlugin: topology.ValTopologyBlock}, } - require.Equal(t, root, toGraph(&pb.TopologyResponse{Instances: instances}, cis, common.ValTopologyBlock)) + require.Equal(t, root, toGraph(&pb.TopologyResponse{Instances: instances}, cis, topology.ValTopologyBlock)) } func TestGetTopologyFormat(t *testing.T) { testCases := []struct { name string - params map[string]string + params map[string]any format string }{ { name: "Case 1: nil params", params: nil, - format: common.ValTopologyTree, + format: topology.ValTopologyTree, }, { name: "Case 2: empty params", - params: make(map[string]string), - format: common.ValTopologyTree, + params: make(map[string]any), + format: topology.ValTopologyTree, }, { name: "Case 3: missing key", - params: map[string]string{"a": "b"}, - format: common.ValTopologyTree, + params: map[string]any{"a": "b"}, + format: topology.ValTopologyTree, }, { name: "Case 4: block topology", - params: map[string]string{common.KeyPlugin: common.ValTopologyBlock}, - format: common.ValTopologyBlock, + params: map[string]any{topology.KeyPlugin: topology.ValTopologyBlock}, + format: topology.ValTopologyBlock, }, { name: "Case 5: tree topology", - params: map[string]string{common.KeyPlugin: common.ValTopologyTree}, - format: common.ValTopologyTree, + params: map[string]any{topology.KeyPlugin: topology.ValTopologyTree}, + format: topology.ValTopologyTree, }, } diff --git a/pkg/server/http_error.go b/pkg/server/http_error.go new file mode 100644 index 0000000..ee2fb8b --- /dev/null +++ b/pkg/server/http_error.go @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package server + +import "fmt" + +type HTTPError struct { + Code int + Message string +} + +func NewHTTPError(code int, msg string) *HTTPError { + return &HTTPError{ + Code: code, + Message: msg, + } +} + +func (e *HTTPError) Error() string { + return fmt.Sprintf("HTTP %d: %s", e.Code, e.Message) +} diff --git a/pkg/server/http_server.go b/pkg/server/http_server.go index 78ecad7..a69ea22 100644 --- a/pkg/server/http_server.go +++ b/pkg/server/http_server.go @@ -25,9 +25,9 @@ import ( "github.com/prometheus/client_golang/prometheus/promhttp" "k8s.io/klog/v2" - "github.com/NVIDIA/topograph/pkg/common" "github.com/NVIDIA/topograph/pkg/config" - "github.com/NVIDIA/topograph/pkg/utils" + "github.com/NVIDIA/topograph/pkg/registry" + "github.com/NVIDIA/topograph/pkg/topology" ) type HttpServer struct { @@ -55,7 +55,7 @@ func InitHttpServer(ctx context.Context, cfg *config.Config) { Handler: mux, }, async: &asyncController{ - queue: utils.NewTrailingDelayQueue(processRequest, cfg.RequestAggregationDelay), + queue: NewTrailingDelayQueue(processRequest, cfg.RequestAggregationDelay), }, } } @@ -98,7 +98,7 @@ func generate(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(uid)) } -func readRequest(w http.ResponseWriter, r *http.Request) *common.TopologyRequest { +func readRequest(w http.ResponseWriter, r *http.Request) *topology.Request { if r.Method != http.MethodPost { http.Error(w, "Invalid request method", http.StatusMethodNotAllowed) return nil @@ -111,7 +111,7 @@ func readRequest(w http.ResponseWriter, r *http.Request) *common.TopologyRequest } defer func() { _ = r.Body.Close() }() - tr, err := common.GetTopologyRequest(body) + tr, err := topology.GetTopologyRequest(body) if err != nil { http.Error(w, err.Error(), http.StatusBadRequest) return nil @@ -135,30 +135,37 @@ func readRequest(w http.ResponseWriter, r *http.Request) *common.TopologyRequest return tr } -func validate(tr *common.TopologyRequest) error { - switch tr.Provider.Name { - case common.ProviderAWS, common.ProviderOCI, common.ProviderGCP, common.ProviderCW, common.ProviderBM, common.ProviderTest: - //nop - case "": - return fmt.Errorf("no provider given for topology request") - default: - return fmt.Errorf("unsupported provider %s", tr.Provider.Name) +func validate(tr *topology.Request) error { + _, exists := registry.Providers[tr.Provider.Name] + if !exists { + switch tr.Provider.Name { + case "": + return fmt.Errorf("no provider given for topology request") + default: + return fmt.Errorf("unsupported provider %s", tr.Provider.Name) + } } - switch tr.Engine.Name { - case common.EngineSLURM, common.EngineTest: - //nop - case common.EngineK8S: - for _, key := range []string{common.KeyTopoConfigPath, common.KeyTopoConfigmapName, common.KeyTopoConfigmapNamespace} { - if _, ok := tr.Engine.Params[key]; !ok { - return fmt.Errorf("missing %q parameter", key) - } + _, exists = registry.Engines[tr.Engine.Name] + if !exists { + switch tr.Engine.Name { + + // case common.EngineSLURM, common.EngineTest: + // //nop + // case common.EngineK8S: + // for _, key := range []string{common.KeyTopoConfigPath, common.KeyTopoConfigmapName, common.KeyTopoConfigmapNamespace} { + // if _, ok := tr.Engine.Params[key]; !ok { + // return fmt.Errorf("missing %q parameter", key) + // } + // } + case "": + return fmt.Errorf("no engine given for topology request") + default: + return fmt.Errorf("unsupported engine %s", tr.Engine.Name) } - case "": - return fmt.Errorf("no engine given for topology request") - default: - return fmt.Errorf("unsupported engine %s", tr.Engine.Name) } + // TODO: Validate K8s params + // This might be moved elsewhere in the flow return nil } @@ -169,7 +176,7 @@ func getresult(w http.ResponseWriter, r *http.Request) { return } - uid := r.URL.Query().Get(common.KeyUID) + uid := r.URL.Query().Get(topology.KeyUID) if len(uid) == 0 { http.Error(w, "must specify request uid", http.StatusBadRequest) return diff --git a/pkg/utils/trailing_delay_queue.go b/pkg/server/trailing_delay_queue.go similarity index 95% rename from pkg/utils/trailing_delay_queue.go rename to pkg/server/trailing_delay_queue.go index 7588b4c..32fdbf4 100644 --- a/pkg/utils/trailing_delay_queue.go +++ b/pkg/server/trailing_delay_queue.go @@ -14,7 +14,7 @@ * limitations under the License. */ -package utils +package server import ( "fmt" @@ -25,13 +25,11 @@ import ( "github.com/google/uuid" lru "github.com/hashicorp/golang-lru" "k8s.io/klog/v2" - - "github.com/NVIDIA/topograph/pkg/common" ) const RequestHistorySize = 100 -type HandleFunc func(interface{}) (interface{}, *common.HTTPError) +type HandleFunc func(interface{}) (interface{}, *HTTPError) type Completion struct { Ret interface{} @@ -89,7 +87,7 @@ func (q *TrailingDelayQueue) run() { if data, err := q.handle(item); err != nil { res.Status = err.Code res.Message = err.Error() - klog.Errorf(res.Message) + klog.Error(res.Message) } else { res.Ret = data res.Status = http.StatusOK diff --git a/pkg/utils/trailing_delay_queue_test.go b/pkg/server/trailing_delay_queue_test.go similarity index 90% rename from pkg/utils/trailing_delay_queue_test.go rename to pkg/server/trailing_delay_queue_test.go index 54f1823..6ce74e3 100644 --- a/pkg/utils/trailing_delay_queue_test.go +++ b/pkg/server/trailing_delay_queue_test.go @@ -14,31 +14,30 @@ * limitations under the License. */ -package utils +package server_test import ( "sync/atomic" "testing" "time" + "github.com/NVIDIA/topograph/pkg/server" lru "github.com/hashicorp/golang-lru" "github.com/stretchr/testify/require" "k8s.io/klog/v2" - - "github.com/NVIDIA/topograph/pkg/common" ) func TestTrailingDelayQueue(t *testing.T) { var counter int32 type Int struct{ val int } - processItem := func(item interface{}) (interface{}, *common.HTTPError) { + processItem := func(item interface{}) (interface{}, *server.HTTPError) { klog.Infof("Processing item: %v\n", item) atomic.AddInt32(&counter, 1) return nil, nil } - queue := NewTrailingDelayQueue(processItem, 2*time.Second) + queue := server.NewTrailingDelayQueue(processItem, 2*time.Second) for cycle := 1; cycle <= 2; cycle++ { for i := 0; i < 3; i++ { diff --git a/pkg/common/types.go b/pkg/topology/request.go similarity index 50% rename from pkg/common/types.go rename to pkg/topology/request.go index 69a2ba5..cdb5699 100644 --- a/pkg/common/types.go +++ b/pkg/topology/request.go @@ -14,76 +14,30 @@ * limitations under the License. */ -package common +package topology import ( - "context" "encoding/json" "fmt" "sort" "strings" ) -// Vertex is a tree node, representing a compute node or a network switch, where -// - Name is a compute node name -// - ID is an CSP defined instance ID of switches and compute nodes -// - Vertices is a list of connected compute nodes or network switches -type Vertex struct { - Name string - ID string - Vertices map[string]*Vertex - Metadata map[string]string -} - -func (v *Vertex) String() string { - vertices := []string{} - for _, w := range v.Vertices { - vertices = append(vertices, w.ID) - } - return fmt.Sprintf("ID:%q Name:%q Vertices: %s", v.ID, v.Name, strings.Join(vertices, ",")) -} - -type HTTPError struct { - Code int - Message string -} - -func NewHTTPError(code int, msg string) *HTTPError { - return &HTTPError{ - Code: code, - Message: msg, - } -} - -func (e *HTTPError) Error() string { - return fmt.Sprintf("HTTP %d: %s", e.Code, e.Message) -} - -type Provider interface { - GetCredentials(map[string]string) (interface{}, error) - GetComputeInstances(context.Context, Engine) ([]ComputeInstances, error) - GenerateTopologyConfig(context.Context, interface{}, int, []ComputeInstances) (*Vertex, error) -} - -type Engine interface { - GenerateOutput(context.Context, *Vertex, map[string]string) ([]byte, error) -} - -type TopologyRequest struct { - Provider provider `json:"provider"` - Engine engine `json:"engine"` +type Request struct { + Provider Provider `json:"provider"` + Engine Engine `json:"engine"` Nodes []ComputeInstances `json:"nodes"` } -type provider struct { +type Provider struct { Name string `json:"name"` Creds map[string]string `json:"creds"` // access credentials - Params map[string]string `json:"params"` + Params map[string]any `json:"params"` } -type engine struct { - Name string `json:"name"` - Params map[string]string `json:"params"` +type Engine struct { + Name string `json:"name"` + Params map[string]any `json:"params"` } type ComputeInstances struct { @@ -91,37 +45,38 @@ type ComputeInstances struct { Instances map[string]string `json:"instances"` // : map } -func NewTopologyRequest(prv string, creds map[string]string, eng string, params map[string]string) *TopologyRequest { - return &TopologyRequest{ - Provider: provider{ +func NewRequest(prv string, creds map[string]string, eng string, params map[string]any) *Request { + return &Request{ + Provider: Provider{ Name: prv, Creds: creds, }, - Engine: engine{ + Engine: Engine{ Name: eng, Params: params, }, } } -func (p *TopologyRequest) String() string { +func (p *Request) String() string { var sb strings.Builder sb.WriteString("TopologyRequest:\n") - sb.WriteString(fmt.Sprintf(" Provider: %s\n", p.Provider.Name)) + sb.WriteString(fmt.Sprintf(" Provider:%s\n", spacer(p.Provider.Name))) sb.WriteString(map2string(p.Provider.Creds, " Credentials", true, "\n")) - sb.WriteString(map2string(p.Provider.Params, " Parameters", false, "\n")) - sb.WriteString(fmt.Sprintf(" Engine: %s\n", p.Engine.Name)) - sb.WriteString(map2string(p.Engine.Params, " Parameters", false, "\n")) - sb.WriteString(" Nodes: ") + sb.WriteString(mapOfAny2string(p.Provider.Params, " Parameters", false, "\n")) + sb.WriteString(fmt.Sprintf(" Engine:%s\n", spacer(p.Engine.Name))) + sb.WriteString(mapOfAny2string(p.Engine.Params, " Parameters", false, "\n")) + sb.WriteString(" Nodes:") for _, nodes := range p.Nodes { - sb.WriteString(map2string(nodes.Instances, nodes.Region, false, " ")) + sb.WriteByte(' ') + sb.WriteString(map2string(nodes.Instances, nodes.Region, false, "")) } sb.WriteString("\n") return sb.String() } -func GetTopologyRequest(body []byte) (*TopologyRequest, error) { - var payload TopologyRequest +func GetTopologyRequest(body []byte) (*Request, error) { + var payload Request if len(body) == 0 { return &payload, nil @@ -134,6 +89,14 @@ func GetTopologyRequest(body []byte) (*TopologyRequest, error) { return &payload, nil } +func spacer(value string) string { + if len(value) > 0 { + return " " + value + } + + return "" +} + func map2string(m map[string]string, prefix string, hide bool, suffix string) string { var sb strings.Builder sb.WriteString(prefix) @@ -159,3 +122,29 @@ func map2string(m map[string]string, prefix string, hide bool, suffix string) st return sb.String() } + +func mapOfAny2string(m map[string]any, prefix string, hide bool, suffix string) string { + var sb strings.Builder + sb.WriteString(prefix) + sb.WriteString(": [") + if n := len(m); n != 0 { + keys := make([]string, 0, n) + for key := range m { + keys = append(keys, key) + } + sort.Strings(keys) + terms := make([]string, 0, n) + for _, key := range keys { + if hide { + terms = append(terms, fmt.Sprintf("%s:***", key)) + } else { + terms = append(terms, fmt.Sprintf("%s:%s", key, m[key])) + } + } + sb.WriteString(strings.Join(terms, " ")) + } + sb.WriteString("]") + sb.WriteString(suffix) + + return sb.String() +} diff --git a/pkg/common/types_test.go b/pkg/topology/request_test.go similarity index 81% rename from pkg/common/types_test.go rename to pkg/topology/request_test.go index d8fb2eb..2f331bf 100644 --- a/pkg/common/types_test.go +++ b/pkg/topology/request_test.go @@ -14,11 +14,12 @@ * limitations under the License. */ -package common +package topology_test import ( "testing" + "github.com/NVIDIA/topograph/pkg/topology" "github.com/stretchr/testify/require" ) @@ -26,20 +27,20 @@ func TestPayload(t *testing.T) { testCases := []struct { name string input string - payload *TopologyRequest + payload *topology.Request print string err string }{ { name: "Case 1: no input", - payload: &TopologyRequest{}, + payload: &topology.Request{}, print: `TopologyRequest: - Provider: + Provider: Credentials: [] Parameters: [] - Engine: + Engine: Parameters: [] - Nodes: + Nodes: `, }, { @@ -48,7 +49,7 @@ func TestPayload(t *testing.T) { "nodes": 5 } `, - err: "failed to parse payload: json: cannot unmarshal number into Go struct field TopologyRequest.nodes of type []common.ComputeInstances", + err: "failed to parse payload: json: cannot unmarshal number into Go struct field Request.nodes of type []topology.ComputeInstances", }, { name: "Case 3: valid input", @@ -89,23 +90,23 @@ func TestPayload(t *testing.T) { ] } `, - payload: &TopologyRequest{ - Provider: provider{ + payload: &topology.Request{ + Provider: topology.Provider{ Name: "aws", Creds: map[string]string{ "access_key_id": "id", "secret_access_key": "secret", }, - Params: map[string]string{}, + Params: map[string]any{}, }, - Engine: engine{ + Engine: topology.Engine{ Name: "slurm", - Params: map[string]string{ - KeyPlugin: ValTopologyBlock, - KeyBlockSizes: "30,120", + Params: map[string]any{ + topology.KeyPlugin: topology.ValTopologyBlock, + topology.KeyBlockSizes: "30,120", }, }, - Nodes: []ComputeInstances{ + Nodes: []topology.ComputeInstances{ { Region: "region1", Instances: map[string]string{ @@ -130,14 +131,14 @@ func TestPayload(t *testing.T) { Parameters: [] Engine: slurm Parameters: [block_sizes:30,120 plugin:topology/block] - Nodes: region1: [instance1:node1 instance2:node2 instance3:node3] region2: [instance4:node4 instance5:node5 instance6:node6] + Nodes: region1: [instance1:node1 instance2:node2 instance3:node3] region2: [instance4:node4 instance5:node5 instance6:node6] `, }, } for _, tc := range testCases { t.Run(tc.name, func(t *testing.T) { - payload, err := GetTopologyRequest([]byte(tc.input)) + payload, err := topology.GetTopologyRequest([]byte(tc.input)) if len(tc.err) != 0 { require.EqualError(t, err, tc.err) } else { diff --git a/pkg/common/const.go b/pkg/topology/topology.go similarity index 62% rename from pkg/common/const.go rename to pkg/topology/topology.go index 118cf38..852c6b6 100644 --- a/pkg/common/const.go +++ b/pkg/topology/topology.go @@ -14,20 +14,15 @@ * limitations under the License. */ -package common +package topology -const ( - ProviderAWS = "aws" - ProviderOCI = "oci" - ProviderGCP = "gcp" - ProviderCW = "cw" - ProviderBM = "baremetal" - ProviderTest = "test" +import ( + "fmt" + "strings" +) - KeyEngine = "engine" - EngineSLURM = "slurm" - EngineK8S = "k8s" - EngineTest = "test" +const ( + KeyEngine = "engine" KeyUID = "uid" KeyTopoConfigPath = "topology_config_path" @@ -42,3 +37,22 @@ const ( ValTopologyBlock = "topology/block" NoTopology = "no-topology" ) + +// Vertex is a tree node, representing a compute node or a network switch, where +// - Name is a compute node name +// - ID is an CSP defined instance ID of switches and compute nodes +// - Vertices is a list of connected compute nodes or network switches +type Vertex struct { + Name string + ID string + Vertices map[string]*Vertex + Metadata map[string]string +} + +func (v *Vertex) String() string { + vertices := []string{} + for _, w := range v.Vertices { + vertices = append(vertices, w.ID) + } + return fmt.Sprintf("ID:%q Name:%q Vertices: %s", v.ID, v.Name, strings.Join(vertices, ",")) +} diff --git a/pkg/translate/output.go b/pkg/translate/output.go index b8dd593..5932336 100644 --- a/pkg/translate/output.go +++ b/pkg/translate/output.go @@ -24,17 +24,18 @@ import ( "strconv" "strings" - "github.com/NVIDIA/topograph/pkg/common" + "github.com/NVIDIA/topograph/pkg/engines" + "github.com/NVIDIA/topograph/pkg/topology" ) -func ToGraph(wr io.Writer, root *common.Vertex) error { - if len(root.Metadata) != 0 && root.Metadata[common.KeyPlugin] == common.ValTopologyBlock { +func ToGraph(wr io.Writer, root *topology.Vertex) error { + if len(root.Metadata) != 0 && root.Metadata[topology.KeyPlugin] == topology.ValTopologyBlock { return toBlockTopology(wr, root) } return toTreeTopology(wr, root) } -func printBlock(wr io.Writer, block *common.Vertex, domainVisited map[string]int) error { +func printBlock(wr io.Writer, block *topology.Vertex, domainVisited map[string]int) error { if _, exists := domainVisited[block.ID]; !exists { nodes := make([]string, 0, len(block.Vertices)) for _, node := range block.Vertices { //nodes within each domain @@ -49,7 +50,7 @@ func printBlock(wr io.Writer, block *common.Vertex, domainVisited map[string]int return nil } -func findBlock(wr io.Writer, nodename string, root *common.Vertex, domainVisited map[string]int) error { // blockRoot +func findBlock(wr io.Writer, nodename string, root *topology.Vertex, domainVisited map[string]int) error { // blockRoot for _, block := range root.Vertices { if _, exists := block.Vertices[nodename]; exists { return printBlock(wr, block, domainVisited) @@ -58,7 +59,7 @@ func findBlock(wr io.Writer, nodename string, root *common.Vertex, domainVisited return nil } -func sortVertices(root *common.Vertex) []string { +func sortVertices(root *topology.Vertex) []string { // sort the IDs keys := make([]string, 0, len(root.Vertices)) for key := range root.Vertices { @@ -68,7 +69,7 @@ func sortVertices(root *common.Vertex) []string { return keys } -func printDisconnectedBlocks(wr io.Writer, root *common.Vertex, domainVisited map[string]int) error { +func printDisconnectedBlocks(wr io.Writer, root *topology.Vertex, domainVisited map[string]int) error { if root != nil { keys := sortVertices(root) for _, key := range keys { @@ -106,13 +107,13 @@ func getBlockSize(domainVisited map[string]int, adminBlockSize string) string { return strconv.Itoa(int(bs)) } -func toBlockTopology(wr io.Writer, root *common.Vertex) error { +func toBlockTopology(wr io.Writer, root *topology.Vertex) error { // traverse tree topology and when a node is reached, check within blockRoot for domain and print that domain. // keep a map of which domain has been printed - treeRoot := root.Vertices[common.ValTopologyTree] - blockRoot := root.Vertices[common.ValTopologyBlock] + treeRoot := root.Vertices[topology.ValTopologyTree] + blockRoot := root.Vertices[topology.ValTopologyBlock] visited := make(map[string]bool) - queue := []*common.Vertex{treeRoot} + queue := []*topology.Vertex{treeRoot} domainVisited := make(map[string]int) if treeRoot != nil { @@ -142,19 +143,19 @@ func toBlockTopology(wr io.Writer, root *common.Vertex) error { } blockSize := "" - if _, exists := root.Metadata[common.KeyBlockSizes]; exists { - blockSize = root.Metadata[common.KeyBlockSizes] + if _, exists := root.Metadata[topology.KeyBlockSizes]; exists { + blockSize = root.Metadata[topology.KeyBlockSizes] } blockSize = getBlockSize(domainVisited, blockSize) _, err = wr.Write([]byte(fmt.Sprintf("BlockSizes=%s\n", blockSize))) return err } -func toTreeTopology(wr io.Writer, root *common.Vertex) error { +func toTreeTopology(wr io.Writer, root *topology.Vertex) error { visited := make(map[string]bool) leaves := make(map[string][]string) - parents := []*common.Vertex{} - queue := []*common.Vertex{root} + parents := []*topology.Vertex{} + queue := []*topology.Vertex{root} idToName := make(map[string]string) for len(queue) > 0 { @@ -216,7 +217,7 @@ func toTreeTopology(wr io.Writer, root *common.Vertex) error { return nil } -func writeSwitch(wr io.Writer, v *common.Vertex) error { +func writeSwitch(wr io.Writer, v *topology.Vertex) error { if len(v.ID) == 0 { return nil } @@ -333,7 +334,7 @@ func split(input string) (string, string) { return input[:i], input[i:] } -func GetTreeTestSet(testForLongLabelName bool) (*common.Vertex, map[string]string) { +func GetTreeTestSet(testForLongLabelName bool) (*topology.Vertex, map[string]string) { var s3name string if testForLongLabelName { s3name = "S3very-very-long-id-to-check-label-value-limits-of-63-characters" @@ -346,34 +347,34 @@ func GetTreeTestSet(testForLongLabelName bool) (*common.Vertex, map[string]strin "I34": "Node304", "I35": "Node305", "I36": "Node306", } - n21 := &common.Vertex{ID: "I21", Name: "Node201"} - n22 := &common.Vertex{ID: "I22", Name: "Node202"} - n25 := &common.Vertex{ID: "I25", Name: "Node205"} + n21 := &topology.Vertex{ID: "I21", Name: "Node201"} + n22 := &topology.Vertex{ID: "I22", Name: "Node202"} + n25 := &topology.Vertex{ID: "I25", Name: "Node205"} - n34 := &common.Vertex{ID: "I34", Name: "Node304"} - n35 := &common.Vertex{ID: "I35", Name: "Node305"} - n36 := &common.Vertex{ID: "I36", Name: "Node306"} + n34 := &topology.Vertex{ID: "I34", Name: "Node304"} + n35 := &topology.Vertex{ID: "I35", Name: "Node305"} + n36 := &topology.Vertex{ID: "I36", Name: "Node306"} - sw2 := &common.Vertex{ + sw2 := &topology.Vertex{ ID: "S2", - Vertices: map[string]*common.Vertex{"I21": n21, "I22": n22, "I25": n25}, + Vertices: map[string]*topology.Vertex{"I21": n21, "I22": n22, "I25": n25}, } - sw3 := &common.Vertex{ + sw3 := &topology.Vertex{ ID: s3name, - Vertices: map[string]*common.Vertex{"I34": n34, "I35": n35, "I36": n36}, + Vertices: map[string]*topology.Vertex{"I34": n34, "I35": n35, "I36": n36}, } - sw1 := &common.Vertex{ + sw1 := &topology.Vertex{ ID: "S1", - Vertices: map[string]*common.Vertex{"S2": sw2, s3name: sw3}, + Vertices: map[string]*topology.Vertex{"S2": sw2, s3name: sw3}, } - root := &common.Vertex{ - Vertices: map[string]*common.Vertex{"S1": sw1}, + root := &topology.Vertex{ + Vertices: map[string]*topology.Vertex{"S1": sw1}, } return root, instance2node } -func GetBlockWithMultiIBTestSet() (*common.Vertex, map[string]string) { +func GetBlockWithMultiIBTestSet() (*topology.Vertex, map[string]string) { instance2node := map[string]string{ "I14": "Node104", "I15": "Node105", "I16": "Node106", "I21": "Node201", "I22": "Node202", "I25": "Node205", @@ -381,177 +382,177 @@ func GetBlockWithMultiIBTestSet() (*common.Vertex, map[string]string) { "I41": "Node401", "I42": "Node402", "I43": "Node403", } - n14 := &common.Vertex{ID: "I14", Name: "Node104"} - n15 := &common.Vertex{ID: "I15", Name: "Node105"} - n16 := &common.Vertex{ID: "I16", Name: "Node106"} + n14 := &topology.Vertex{ID: "I14", Name: "Node104"} + n15 := &topology.Vertex{ID: "I15", Name: "Node105"} + n16 := &topology.Vertex{ID: "I16", Name: "Node106"} - n21 := &common.Vertex{ID: "I21", Name: "Node201"} - n22 := &common.Vertex{ID: "I22", Name: "Node202"} - n25 := &common.Vertex{ID: "I25", Name: "Node205"} + n21 := &topology.Vertex{ID: "I21", Name: "Node201"} + n22 := &topology.Vertex{ID: "I22", Name: "Node202"} + n25 := &topology.Vertex{ID: "I25", Name: "Node205"} - n31 := &common.Vertex{ID: "I31", Name: "Node301"} - n32 := &common.Vertex{ID: "I32", Name: "Node302"} - n33 := &common.Vertex{ID: "I33", Name: "Node303"} + n31 := &topology.Vertex{ID: "I31", Name: "Node301"} + n32 := &topology.Vertex{ID: "I32", Name: "Node302"} + n33 := &topology.Vertex{ID: "I33", Name: "Node303"} - n41 := &common.Vertex{ID: "I41", Name: "Node401"} - n42 := &common.Vertex{ID: "I42", Name: "Node402"} - n43 := &common.Vertex{ID: "I43", Name: "Node403"} + n41 := &topology.Vertex{ID: "I41", Name: "Node401"} + n42 := &topology.Vertex{ID: "I42", Name: "Node402"} + n43 := &topology.Vertex{ID: "I43", Name: "Node403"} - sw5 := &common.Vertex{ + sw5 := &topology.Vertex{ ID: "S5", - Vertices: map[string]*common.Vertex{"I31": n31, "I32": n32, "I33": n33}, + Vertices: map[string]*topology.Vertex{"I31": n31, "I32": n32, "I33": n33}, } - sw6 := &common.Vertex{ + sw6 := &topology.Vertex{ ID: "S6", - Vertices: map[string]*common.Vertex{"I41": n41, "I42": n42, "I43": n43}, + Vertices: map[string]*topology.Vertex{"I41": n41, "I42": n42, "I43": n43}, } - sw4 := &common.Vertex{ + sw4 := &topology.Vertex{ ID: "S4", - Vertices: map[string]*common.Vertex{"S5": sw5, "S6": sw6}, + Vertices: map[string]*topology.Vertex{"S5": sw5, "S6": sw6}, } - ibRoot1 := &common.Vertex{ - Vertices: map[string]*common.Vertex{"S4": sw4}, + ibRoot1 := &topology.Vertex{ + Vertices: map[string]*topology.Vertex{"S4": sw4}, } - sw2 := &common.Vertex{ + sw2 := &topology.Vertex{ ID: "S2", - Vertices: map[string]*common.Vertex{"I14": n14, "I15": n15, "I16": n16}, + Vertices: map[string]*topology.Vertex{"I14": n14, "I15": n15, "I16": n16}, } - sw3 := &common.Vertex{ + sw3 := &topology.Vertex{ ID: "S3", - Vertices: map[string]*common.Vertex{"I21": n21, "I22": n22, "I25": n25}, + Vertices: map[string]*topology.Vertex{"I21": n21, "I22": n22, "I25": n25}, } - sw1 := &common.Vertex{ + sw1 := &topology.Vertex{ ID: "S1", - Vertices: map[string]*common.Vertex{"S2": sw2, "S3": sw3}, + Vertices: map[string]*topology.Vertex{"S2": sw2, "S3": sw3}, } - ibRoot2 := &common.Vertex{ - Vertices: map[string]*common.Vertex{"S1": sw1}, + ibRoot2 := &topology.Vertex{ + Vertices: map[string]*topology.Vertex{"S1": sw1}, } - treeRoot := &common.Vertex{ - Vertices: map[string]*common.Vertex{"IB1": ibRoot1, "IB2": ibRoot2}, + treeRoot := &topology.Vertex{ + Vertices: map[string]*topology.Vertex{"IB1": ibRoot1, "IB2": ibRoot2}, } - block1 := &common.Vertex{ + block1 := &topology.Vertex{ ID: "B1", - Vertices: map[string]*common.Vertex{"I14": n14, "I15": n15, "I16": n16}, + Vertices: map[string]*topology.Vertex{"I14": n14, "I15": n15, "I16": n16}, } - block2 := &common.Vertex{ + block2 := &topology.Vertex{ ID: "B2", - Vertices: map[string]*common.Vertex{"I21": n21, "I22": n22, "I25": n25}, + Vertices: map[string]*topology.Vertex{"I21": n21, "I22": n22, "I25": n25}, } - block3 := &common.Vertex{ + block3 := &topology.Vertex{ ID: "B3", - Vertices: map[string]*common.Vertex{"I31": n31, "I32": n32, "I33": n33}, + Vertices: map[string]*topology.Vertex{"I31": n31, "I32": n32, "I33": n33}, } - block4 := &common.Vertex{ + block4 := &topology.Vertex{ ID: "B4", - Vertices: map[string]*common.Vertex{"I41": n41, "I42": n42, "I43": n43}, + Vertices: map[string]*topology.Vertex{"I41": n41, "I42": n42, "I43": n43}, } - blockRoot := &common.Vertex{ - Vertices: map[string]*common.Vertex{"B1": block1, "B2": block2, "B3": block3, "B4": block4}, + blockRoot := &topology.Vertex{ + Vertices: map[string]*topology.Vertex{"B1": block1, "B2": block2, "B3": block3, "B4": block4}, } - root := &common.Vertex{ - Vertices: map[string]*common.Vertex{common.ValTopologyBlock: blockRoot, common.ValTopologyTree: treeRoot}, + root := &topology.Vertex{ + Vertices: map[string]*topology.Vertex{topology.ValTopologyBlock: blockRoot, topology.ValTopologyTree: treeRoot}, Metadata: map[string]string{ - common.KeyEngine: common.EngineSLURM, - common.KeyPlugin: common.ValTopologyBlock, - common.KeyBlockSizes: "3", + topology.KeyEngine: engines.EngineSLURM, + topology.KeyPlugin: topology.ValTopologyBlock, + topology.KeyBlockSizes: "3", }, } return root, instance2node } -func GetBlockWithIBTestSet() (*common.Vertex, map[string]string) { +func GetBlockWithIBTestSet() (*topology.Vertex, map[string]string) { instance2node := map[string]string{ "I14": "Node104", "I15": "Node105", "I16": "Node106", "I21": "Node201", "I22": "Node202", "I25": "Node205", } - n14 := &common.Vertex{ID: "I14", Name: "Node104"} - n15 := &common.Vertex{ID: "I15", Name: "Node105"} - n16 := &common.Vertex{ID: "I16", Name: "Node106"} + n14 := &topology.Vertex{ID: "I14", Name: "Node104"} + n15 := &topology.Vertex{ID: "I15", Name: "Node105"} + n16 := &topology.Vertex{ID: "I16", Name: "Node106"} - n21 := &common.Vertex{ID: "I21", Name: "Node201"} - n22 := &common.Vertex{ID: "I22", Name: "Node202"} - n25 := &common.Vertex{ID: "I25", Name: "Node205"} + n21 := &topology.Vertex{ID: "I21", Name: "Node201"} + n22 := &topology.Vertex{ID: "I22", Name: "Node202"} + n25 := &topology.Vertex{ID: "I25", Name: "Node205"} - sw2 := &common.Vertex{ + sw2 := &topology.Vertex{ ID: "S2", - Vertices: map[string]*common.Vertex{"I14": n14, "I15": n15, "I16": n16}, + Vertices: map[string]*topology.Vertex{"I14": n14, "I15": n15, "I16": n16}, } - sw3 := &common.Vertex{ + sw3 := &topology.Vertex{ ID: "S3", - Vertices: map[string]*common.Vertex{"I21": n21, "I22": n22, "I25": n25}, + Vertices: map[string]*topology.Vertex{"I21": n21, "I22": n22, "I25": n25}, } - sw1 := &common.Vertex{ + sw1 := &topology.Vertex{ ID: "S1", - Vertices: map[string]*common.Vertex{"S2": sw2, "S3": sw3}, + Vertices: map[string]*topology.Vertex{"S2": sw2, "S3": sw3}, } - treeRoot := &common.Vertex{ - Vertices: map[string]*common.Vertex{"S1": sw1}, + treeRoot := &topology.Vertex{ + Vertices: map[string]*topology.Vertex{"S1": sw1}, } - block1 := &common.Vertex{ + block1 := &topology.Vertex{ ID: "B1", - Vertices: map[string]*common.Vertex{"I14": n14, "I15": n15, "I16": n16}, + Vertices: map[string]*topology.Vertex{"I14": n14, "I15": n15, "I16": n16}, } - block2 := &common.Vertex{ + block2 := &topology.Vertex{ ID: "B2", - Vertices: map[string]*common.Vertex{"I21": n21, "I22": n22, "I25": n25}, + Vertices: map[string]*topology.Vertex{"I21": n21, "I22": n22, "I25": n25}, } - blockRoot := &common.Vertex{ - Vertices: map[string]*common.Vertex{"B1": block1, "B2": block2}, + blockRoot := &topology.Vertex{ + Vertices: map[string]*topology.Vertex{"B1": block1, "B2": block2}, } - root := &common.Vertex{ - Vertices: map[string]*common.Vertex{common.ValTopologyBlock: blockRoot, common.ValTopologyTree: treeRoot}, + root := &topology.Vertex{ + Vertices: map[string]*topology.Vertex{topology.ValTopologyBlock: blockRoot, topology.ValTopologyTree: treeRoot}, Metadata: map[string]string{ - common.KeyEngine: common.EngineSLURM, - common.KeyPlugin: common.ValTopologyBlock, - common.KeyBlockSizes: "3", + topology.KeyEngine: engines.EngineSLURM, + topology.KeyPlugin: topology.ValTopologyBlock, + topology.KeyBlockSizes: "3", }, } return root, instance2node } -func GetBlockTestSet() (*common.Vertex, map[string]string) { +func GetBlockTestSet() (*topology.Vertex, map[string]string) { instance2node := map[string]string{ "I14": "Node104", "I15": "Node105", "I16": "Node106", "I21": "Node201", "I22": "Node202", "I25": "Node205", } - n14 := &common.Vertex{ID: "I14", Name: "Node104"} - n15 := &common.Vertex{ID: "I15", Name: "Node105"} - n16 := &common.Vertex{ID: "I16", Name: "Node106"} + n14 := &topology.Vertex{ID: "I14", Name: "Node104"} + n15 := &topology.Vertex{ID: "I15", Name: "Node105"} + n16 := &topology.Vertex{ID: "I16", Name: "Node106"} - n21 := &common.Vertex{ID: "I21", Name: "Node201"} - n22 := &common.Vertex{ID: "I22", Name: "Node202"} - n25 := &common.Vertex{ID: "I25", Name: "Node205"} + n21 := &topology.Vertex{ID: "I21", Name: "Node201"} + n22 := &topology.Vertex{ID: "I22", Name: "Node202"} + n25 := &topology.Vertex{ID: "I25", Name: "Node205"} - block1 := &common.Vertex{ + block1 := &topology.Vertex{ ID: "B1", - Vertices: map[string]*common.Vertex{"I14": n14, "I15": n15, "I16": n16}, + Vertices: map[string]*topology.Vertex{"I14": n14, "I15": n15, "I16": n16}, } - block2 := &common.Vertex{ + block2 := &topology.Vertex{ ID: "B2", - Vertices: map[string]*common.Vertex{"I21": n21, "I22": n22, "I25": n25}, + Vertices: map[string]*topology.Vertex{"I21": n21, "I22": n22, "I25": n25}, } - blockRoot := &common.Vertex{ - Vertices: map[string]*common.Vertex{"B1": block1, "B2": block2}, + blockRoot := &topology.Vertex{ + Vertices: map[string]*topology.Vertex{"B1": block1, "B2": block2}, } - root := &common.Vertex{ - Vertices: map[string]*common.Vertex{common.ValTopologyBlock: blockRoot}, + root := &topology.Vertex{ + Vertices: map[string]*topology.Vertex{topology.ValTopologyBlock: blockRoot}, Metadata: map[string]string{ - common.KeyEngine: common.EngineSLURM, - common.KeyPlugin: common.ValTopologyBlock, - common.KeyBlockSizes: "3", + topology.KeyEngine: engines.EngineSLURM, + topology.KeyPlugin: topology.ValTopologyBlock, + topology.KeyBlockSizes: "3", }, } return root, instance2node diff --git a/pkg/translate/output_test.go b/pkg/translate/output_test.go index 0f96019..6e9f780 100644 --- a/pkg/translate/output_test.go +++ b/pkg/translate/output_test.go @@ -20,7 +20,7 @@ import ( "bytes" "testing" - "github.com/NVIDIA/topograph/pkg/common" + "github.com/NVIDIA/topograph/pkg/topology" "github.com/stretchr/testify/require" ) @@ -97,20 +97,20 @@ func TestToBlockIBTopology(t *testing.T) { } func TestToSlurmNameShortener(t *testing.T) { - v := &common.Vertex{ - Vertices: map[string]*common.Vertex{ + v := &topology.Vertex{ + Vertices: map[string]*topology.Vertex{ "hpcislandid-1": { ID: "hpcislandid-1", Name: "switch.3.1", - Vertices: map[string]*common.Vertex{ + Vertices: map[string]*topology.Vertex{ "network-block-1": { ID: "network-block-1", Name: "switch.2.1", - Vertices: map[string]*common.Vertex{ + Vertices: map[string]*topology.Vertex{ "local-block-1": { ID: "local-block-1", Name: "switch.1.1", - Vertices: map[string]*common.Vertex{ + Vertices: map[string]*topology.Vertex{ "node-1": { ID: "node-1-id", Name: "node-1", @@ -122,11 +122,11 @@ func TestToSlurmNameShortener(t *testing.T) { "network-block-2": { ID: "network-block-2", Name: "switch.2.2", - Vertices: map[string]*common.Vertex{ + Vertices: map[string]*topology.Vertex{ "local-block-2": { ID: "local-block-2", Name: "switch.1.2", - Vertices: map[string]*common.Vertex{ + Vertices: map[string]*topology.Vertex{ "node-2": { ID: "node-2-id", Name: "node-2",