diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 0987251..e570920 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2024-12-03T00:08:31Z by kres 232fe63. +# Generated on 2024-12-06T13:45:15Z by kres 1ebe796. name: default concurrency: @@ -108,25 +108,8 @@ jobs: - name: qemu-up run: | make qemu-up - - name: Login to registry - if: github.event_name != 'pull_request' - uses: docker/login-action@v3 - with: - password: ${{ secrets.GITHUB_TOKEN }} - registry: ghcr.io - username: ${{ github.repository_owner }} - - name: image-qemu-up - run: | - make image-qemu-up - - name: push-omni-infra-provider-bare-metal-qemu-up - if: github.event_name != 'pull_request' - env: - PUSH: "true" - run: | - make image-qemu-up - name: run-integration-test env: - OMNI_INTEGRATION_TEST_IMAGE: ghcr.io/utkuozdemir/omni-integration-test:latest TEMP_REGISTRY: registry.dev.siderolabs.io run: | sudo -E make run-integration-test @@ -146,13 +129,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: logs - path: |- - /tmp/omni.log - /tmp/provider.log - ~/.talos/clusters/**/*.log - !~/.talos/clusters/**/swtpm.log - !~/.talos/clusters/**/dhcpd.log - !~/.talos/clusters/**/lb.log + path: /tmp/test-logs retention-days: "5" - name: Generate Checksums if: startsWith(github.ref, 'refs/tags/') diff --git a/.kres.yaml b/.kres.yaml index 02f1a0f..11771fb 100644 --- a/.kres.yaml +++ b/.kres.yaml @@ -4,22 +4,19 @@ spec: ignoredPaths: - hack/compose/docker-compose.override.yml --- +kind: auto.CommandConfig +name: qemu-up +spec: + disableImage: true +--- kind: common.Image name: image-provider spec: imageName: omni-infra-provider-bare-metal - baseImage: ghcr.io/siderolabs/talosctl:v1.9.0-alpha.3 pushLatest: false extraEnvironment: PLATFORM: linux/amd64,linux/arm64 --- -kind: common.Image -name: image-qemu-up -spec: - imageName: omni-infra-provider-bare-metal-qemu-up - baseImage: ghcr.io/siderolabs/talosctl:v1.9.0-alpha.3 - pushLatest: false ---- kind: custom.Step name: ipxe spec: @@ -130,7 +127,6 @@ spec: sops: true environment: TEMP_REGISTRY: registry.dev.siderolabs.io - OMNI_INTEGRATION_TEST_IMAGE: ghcr.io/utkuozdemir/omni-integration-test:latest # todo: testing, remove later artifacts: enabled: true extraPaths: @@ -140,12 +136,7 @@ spec: always: true continueOnError: true paths: - - "/tmp/omni.log" - - "/tmp/provider.log" - - "~/.talos/clusters/**/*.log" - - "!~/.talos/clusters/**/swtpm.log" - - "!~/.talos/clusters/**/dhcpd.log" - - "!~/.talos/clusters/**/lb.log" + - "/tmp/test-logs" --- kind: common.SOPS spec: diff --git a/Dockerfile b/Dockerfile index 6b3662f..b53e4c1 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,25 +1,21 @@ -# syntax = docker/dockerfile-upstream:1.11.1-labs +# syntax = docker/dockerfile-upstream:1.12.0-labs # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2024-12-05T10:44:40Z by kres 232fe63. +# Generated on 2024-12-06T12:07:03Z by kres 1ebe796. ARG TOOLCHAIN -FROM ghcr.io/siderolabs/talosctl:v1.9.0-alpha.3 AS base-image-provider +FROM ghcr.io/siderolabs/ca-certificates:v1.9.0 AS image-ca-certificates -FROM ghcr.io/siderolabs/talosctl:v1.9.0-alpha.3 AS base-image-qemu-up - -FROM ghcr.io/siderolabs/ca-certificates:v1.8.0 AS image-ca-certificates - -FROM ghcr.io/siderolabs/fhs:v1.8.0 AS image-fhs +FROM ghcr.io/siderolabs/fhs:v1.9.0 AS image-fhs FROM --platform=linux/amd64 ghcr.io/siderolabs/ipxe:v1.8.0-16-g71d23b4 AS ipxe-linux-amd64 FROM --platform=linux/arm64 ghcr.io/siderolabs/ipxe:v1.8.0-16-g71d23b4 AS ipxe-linux-arm64 # runs markdownlint -FROM docker.io/oven/bun:1.1.36-alpine AS lint-markdown +FROM docker.io/oven/bun:1.1.38-alpine AS lint-markdown WORKDIR /src RUN bun i markdownlint-cli@0.43.0 sentences-per-line@0.2.1 COPY .markdownlint.json . @@ -217,7 +213,7 @@ FROM scratch AS qemu-up-all COPY --from=qemu-up-linux-amd64 / / COPY --from=qemu-up-linux-arm64 / / -FROM base-image-provider AS image-provider +FROM scratch AS image-provider ARG TARGETARCH COPY --from=provider provider-linux-${TARGETARCH} /provider COPY --from=image-fhs / / @@ -233,11 +229,3 @@ COPY --from=ghcr.io/siderolabs/talos-metal-agent-boot-assets:v1.9.0-alpha.3-agen LABEL org.opencontainers.image.source=https://github.com/siderolabs/omni-infra-provider-bare-metal ENTRYPOINT ["/provider"] -FROM base-image-qemu-up AS image-qemu-up -ARG TARGETARCH -COPY --from=qemu-up qemu-up-linux-${TARGETARCH} /qemu-up -COPY --from=image-fhs / / -COPY --from=image-ca-certificates / / -LABEL org.opencontainers.image.source=https://github.com/siderolabs/omni-infra-provider-bare-metal -ENTRYPOINT ["/qemu-up"] - diff --git a/Makefile b/Makefile index 2a9a015..0e733cd 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,6 @@ # THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT. # -# Generated on 2024-12-04T12:05:52Z by kres 232fe63. +# Generated on 2024-12-06T14:06:35Z by kres 1ebe796. # common variables @@ -21,11 +21,11 @@ PROTOBUF_GO_VERSION ?= 1.35.2 GRPC_GO_VERSION ?= 1.5.1 GRPC_GATEWAY_VERSION ?= 2.24.0 VTPROTOBUF_VERSION ?= 0.6.0 -GOIMPORTS_VERSION ?= 0.27.0 +GOIMPORTS_VERSION ?= 0.28.0 DEEPCOPY_VERSION ?= v0.5.6 -GOLANGCILINT_VERSION ?= v1.62.0 +GOLANGCILINT_VERSION ?= v1.62.2 GOFUMPT_VERSION ?= v0.7.0 -GO_VERSION ?= 1.23.3 +GO_VERSION ?= 1.23.4 GO_BUILDFLAGS ?= GO_LDFLAGS ?= CGO_ENABLED ?= 0 @@ -140,7 +140,7 @@ else GO_LDFLAGS += -s endif -all: unit-tests provider image-provider qemu-up image-qemu-up ipxe run-integration-test lint +all: unit-tests provider image-provider qemu-up ipxe run-integration-test lint $(ARTIFACTS): ## Creates artifacts directory. @mkdir -p $(ARTIFACTS) @@ -241,10 +241,6 @@ qemu-up-linux-arm64: $(ARTIFACTS)/qemu-up-linux-arm64 ## Builds executable for .PHONY: qemu-up qemu-up: qemu-up-linux-amd64 qemu-up-linux-arm64 ## Builds executables for qemu-up. -.PHONY: image-qemu-up -image-qemu-up: ## Builds image for omni-infra-provider-bare-metal-qemu-up. - @$(MAKE) target-$@ TARGET_ARGS="--tag=$(REGISTRY)/$(USERNAME)/omni-infra-provider-bare-metal-qemu-up:$(IMAGE_TAG)" - run-integration-test: provider qemu-up TEMP_REGISTRY=$(TEMP_REGISTRY) OMNI_IMAGE=$(OMNI_IMAGE) OMNI_INTEGRATION_TEST_IMAGE=$(OMNI_INTEGRATION_TEST_IMAGE) SKIP_CLEANUP=$(SKIP_CLEANUP) hack/test/integration.sh diff --git a/cmd/provider/main.go b/cmd/provider/main.go index b850b43..94e46cf 100644 --- a/cmd/provider/main.go +++ b/cmd/provider/main.go @@ -18,6 +18,7 @@ import ( "go.uber.org/zap" "go.uber.org/zap/zapcore" + "github.com/siderolabs/omni-infra-provider-bare-metal/internal/constants" "github.com/siderolabs/omni-infra-provider-bare-metal/internal/provider" "github.com/siderolabs/omni-infra-provider-bare-metal/internal/provider/meta" "github.com/siderolabs/omni-infra-provider-bare-metal/internal/version" @@ -110,7 +111,7 @@ func init() { "instead of forwarding the request to the image factory to boot into agent mode.") rootCmd.Flags().StringVar(&providerOptions.DHCPProxyIfaceOrIP, dhcpProxyIfaceOrIPFlag, provider.DefaultOptions.DHCPProxyIfaceOrIP, "The interface name or the IP address on the interface to run the DHCP proxy server on. "+ - "If it is an IP address, the DHCP proxy server will run on the interface that has the IP address.") + "If it is an IP address, the DHCP proxy server will run on the interface that has the IP address.") // get default flag rootCmd.Flags().StringVar(&providerOptions.ImageFactoryBaseURL, "image-factory-base-url", provider.DefaultOptions.ImageFactoryBaseURL, "The base URL of the image factory.") rootCmd.Flags().StringVar(&providerOptions.ImageFactoryPXEBaseURL, "image-factory-pxe-base-url", provider.DefaultOptions.ImageFactoryPXEBaseURL, @@ -130,7 +131,11 @@ func init() { "Comma separated list of key=value pairs to be set to the machine. Example: key1=value1,key2,key3=value3") rootCmd.Flags().BoolVar(&providerOptions.InsecureSkipTLSVerify, "insecure-skip-tls-verify", provider.DefaultOptions.InsecureSkipTLSVerify, "Skip TLS verification when connecting to the Omni API.") - rootCmd.Flags().BoolVar(&providerOptions.ClearState, "clear-state", provider.DefaultOptions.ClearState, "Clear the state of the provider on startup.") + + if constants.IsDebugBuild { + rootCmd.Flags().BoolVar(&providerOptions.ClearState, "clear-state", provider.DefaultOptions.ClearState, "Clear the state of the provider on startup.") + } + rootCmd.Flags().BoolVar(&providerOptions.EnableResourceCache, "enable-resource-cache", provider.DefaultOptions.EnableResourceCache, "Enable controller runtime resource cache.") rootCmd.Flags().BoolVar(&providerOptions.WipeWithZeroes, "wipe-with-zeroes", provider.DefaultOptions.WipeWithZeroes, diff --git a/go.mod b/go.mod index 8aba5e6..6633a85 100644 --- a/go.mod +++ b/go.mod @@ -1,6 +1,6 @@ module github.com/siderolabs/omni-infra-provider-bare-metal -go 1.23.3 +go 1.23.4 replace ( github.com/pensando/goipmi => github.com/siderolabs/goipmi v0.0.0-20211214143420-35f956689e67 @@ -21,21 +21,20 @@ require ( github.com/siderolabs/image-factory v0.6.2 github.com/siderolabs/net v0.4.0 github.com/siderolabs/omni/client v0.0.0-20241205095318-ce403382d64c - github.com/siderolabs/talos v1.9.0-alpha.3.0.20241202095056-770be16425d2 - github.com/siderolabs/talos-metal-agent v0.1.0-alpha.2 + github.com/siderolabs/talos v1.9.0-beta.0 + github.com/siderolabs/talos-metal-agent v0.1.0-alpha.3 github.com/siderolabs/talos/pkg/machinery v1.9.0-beta.0 github.com/spf13/cobra v1.8.1 - github.com/stretchr/testify v1.10.0 go.uber.org/zap v1.27.0 - golang.org/x/net v0.31.0 + golang.org/x/net v0.32.0 golang.org/x/sync v0.10.0 - google.golang.org/grpc v1.68.0 + google.golang.org/grpc v1.68.1 google.golang.org/protobuf v1.35.2 gopkg.in/yaml.v3 v3.0.1 ) require ( - cel.dev/expr v0.18.0 // indirect + cel.dev/expr v0.19.0 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect github.com/ProtonMail/go-crypto v1.1.3 // indirect github.com/ProtonMail/go-mime v0.0.0-20230322103455-7d82a3887f2f // indirect @@ -107,9 +106,8 @@ require ( github.com/pierrec/lz4/v4 v4.1.21 // indirect github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/ryanuber/go-glob v1.0.0 // indirect - github.com/siderolabs/crypto v0.5.0 // indirect + github.com/siderolabs/crypto v0.5.1 // indirect github.com/siderolabs/go-api-signature v0.3.6 // indirect github.com/siderolabs/go-blockdevice/v2 v2.0.6 // indirect github.com/siderolabs/go-cmd v0.1.3 // indirect @@ -135,15 +133,15 @@ require ( go.opentelemetry.io/otel/sdk v1.32.0 // indirect go.opentelemetry.io/otel/trace v1.32.0 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/crypto v0.29.0 // indirect - golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f // indirect + golang.org/x/crypto v0.30.0 // indirect + golang.org/x/exp v0.0.0-20241204233417-43b7b7cde48d // indirect golang.org/x/oauth2 v0.24.0 // indirect - golang.org/x/sys v0.27.0 // indirect - golang.org/x/term v0.26.0 // indirect - golang.org/x/text v0.20.0 // indirect + golang.org/x/sys v0.28.0 // indirect + golang.org/x/term v0.27.0 // indirect + golang.org/x/text v0.21.0 // indirect golang.org/x/time v0.8.0 // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20241202173237-19429a94021a // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20241202173237-19429a94021a // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20241206012308-a4fef0638583 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20241206012308-a4fef0638583 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gotest.tools/v3 v3.5.1 // indirect diff --git a/go.sum b/go.sum index 2a4daa5..af09f9d 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,5 @@ -cel.dev/expr v0.18.0 h1:CJ6drgk+Hf96lkLikr4rFf19WrU0BOWEihyZnI2TAzo= -cel.dev/expr v0.18.0/go.mod h1:MrpN08Q+lEBs+bGYdLxxHkZoUSsCp0nSKTs0nTymJgw= +cel.dev/expr v0.19.0 h1:lXuo+nDhpyJSpWxpPVi5cPUwzKb+dsdOiw6IreM5yt0= +cel.dev/expr v0.19.0/go.mod h1:MrpN08Q+lEBs+bGYdLxxHkZoUSsCp0nSKTs0nTymJgw= cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161 h1:L/gRVlceqvL25UVaW/CKtUDjefjrs0SPonmDGUVOYP0= github.com/Azure/go-ansiterm v0.0.0-20230124172434-306776ec8161/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= @@ -296,8 +296,8 @@ github.com/ryanuber/go-glob v1.0.0 h1:iQh3xXAumdQ+4Ufa5b25cRpC5TYKlno6hsv6Cb3pkB github.com/ryanuber/go-glob v1.0.0/go.mod h1:807d1WSdnB0XRJzKNil9Om6lcp/3a0v4qIHxIXzX/Yc= github.com/santhosh-tekuri/jsonschema/v5 v5.3.1 h1:lZUw3E0/J3roVtGQ+SCrUrg3ON6NgVqpn3+iol9aGu4= github.com/santhosh-tekuri/jsonschema/v5 v5.3.1/go.mod h1:uToXkOrWAZ6/Oc07xWQrPOhJotwFIyu2bBVN41fcDUY= -github.com/siderolabs/crypto v0.5.0 h1:+Sox0aYLCcD0PAH2cbEcx557zUrONLtuj1Ws+2MFXGc= -github.com/siderolabs/crypto v0.5.0/go.mod h1:hsR3tJ3aaeuhCChsLF4dBd9vlJVPvmhg4vvx2ez4aD4= +github.com/siderolabs/crypto v0.5.1 h1:aZEUTZBoP8rH+0TqQAlUgazriPh89MrXf4R+th+m6ps= +github.com/siderolabs/crypto v0.5.1/go.mod h1:7RHC7eUKBx6RLS2lDaNXrQ83zY9iPH/aQSTxk1I4/j4= github.com/siderolabs/gen v0.7.0 h1:uHAt3WD0dof28NHFuguWBbDokaXQraR/HyVxCLw2QCU= github.com/siderolabs/gen v0.7.0/go.mod h1:an3a2Y53O7kUjnnK8Bfu3gewtvnIOu5RTU6HalFtXQQ= github.com/siderolabs/go-api-signature v0.3.6 h1:wDIsXbpl7Oa/FXvxB6uz4VL9INA9fmr3EbmjEZYFJrU= @@ -328,10 +328,10 @@ github.com/siderolabs/proto-codec v0.1.1 h1:4jiUwW/vaXTZ+YNgZDs37B4aj/1mzV/erIkz github.com/siderolabs/proto-codec v0.1.1/go.mod h1:rIvmhKJG8+JwSCGPX+cQljpOMDmuHhLKPkt6KaFwEaU= github.com/siderolabs/protoenc v0.2.1 h1:BqxEmeWQeMpNP3R6WrPqDatX8sM/r4t97OP8mFmg6GA= github.com/siderolabs/protoenc v0.2.1/go.mod h1:StTHxjet1g11GpNAWiATgc8K0HMKiFSEVVFOa/H0otc= -github.com/siderolabs/talos v1.9.0-alpha.3.0.20241202095056-770be16425d2 h1:82upUT3xgI/H6h0BcmumvwHezJI015iHfJzgIfDcYG4= -github.com/siderolabs/talos v1.9.0-alpha.3.0.20241202095056-770be16425d2/go.mod h1:CKjjO271EOpJ066K4NibHYIFhAmXsOE/NFUmIZ9TSaw= -github.com/siderolabs/talos-metal-agent v0.1.0-alpha.2 h1:r3p8ibJWGQYnxixtkDB9cHSzkQW7lRE/O04nTMpVTTo= -github.com/siderolabs/talos-metal-agent v0.1.0-alpha.2/go.mod h1:sG7GnlIW1TTGE0mc3Cv5d/sUSzUkv0CsaG+qcv1Oy24= +github.com/siderolabs/talos v1.9.0-beta.0 h1:irfeHLzIQnqpUBqhp4W94+WY4U0mn7KL0UA5bOCQgJo= +github.com/siderolabs/talos v1.9.0-beta.0/go.mod h1:IYaTrNW059D15uy9dRzFJKD9pBZfWjQsGfAMvWhX97M= +github.com/siderolabs/talos-metal-agent v0.1.0-alpha.3 h1:4OExRsg5FvXwFh4tvUDf2nysr4VSU8qKaW0eZf6sBVY= +github.com/siderolabs/talos-metal-agent v0.1.0-alpha.3/go.mod h1:y+9Czg9D9D8lZQCvdHcz/R4y1PKN5soKPd49noPKGsw= github.com/siderolabs/talos/pkg/machinery v1.9.0-beta.0 h1:ueS/a+PjzchYO3ZouHgpOZfoUTk/9dO+XHxH3uSM7IU= github.com/siderolabs/talos/pkg/machinery v1.9.0-beta.0/go.mod h1:jFnOdqa3IfiUHO/ZG0jfON7SxbNIkowVsom8pO0OwBU= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= @@ -399,11 +399,11 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.29.0 h1:L5SG1JTTXupVV3n6sUqMTeWbjAyfPwoda2DLX8J8FrQ= -golang.org/x/crypto v0.29.0/go.mod h1:+F4F4N5hv6v38hfeYwTdx20oUvLLc+QfrE9Ax9HtgRg= +golang.org/x/crypto v0.30.0 h1:RwoQn3GkWiMkzlX562cLB7OxWvjH1L8xutO2WoJcRoY= +golang.org/x/crypto v0.30.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo= -golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f/go.mod h1:D5SMRVC3C2/4+F/DB1wZsLRnSNimn2Sp/NPsCrsv8ak= +golang.org/x/exp v0.0.0-20241204233417-43b7b7cde48d h1:0olWaB5pg3+oychR51GUVCEsGkeCU/2JxjBgIo4f3M0= +golang.org/x/exp v0.0.0-20241204233417-43b7b7cde48d/go.mod h1:qj5a5QZpwLU2NLQudwIN5koi3beDhSAlJwa67PuM98c= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= @@ -434,8 +434,8 @@ golang.org/x/net v0.0.0-20210928044308-7d9f5e0b762b/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= -golang.org/x/net v0.31.0 h1:68CPQngjLL0r2AlUKiSxtQFKvzRVbnzLwMUn5SzcLHo= -golang.org/x/net v0.31.0/go.mod h1:P4fl1q7dY2hnZFxEk4pPSkDHF+QqjitcnDjUQyMM+pM= +golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI= +golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= @@ -482,13 +482,13 @@ golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= -golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= -golang.org/x/term v0.26.0 h1:WEQa6V3Gja/BhNxg540hBip/kkaYtRg3cxg4oXSw4AU= -golang.org/x/term v0.26.0/go.mod h1:Si5m1o57C5nBNQo5z1iq+XDijt21BDBDp2bK0QI8e3E= +golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q= +golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= @@ -496,8 +496,8 @@ golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.8.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.20.0 h1:gK/Kv2otX8gz+wn7Rmb3vT96ZwuoxnQlY+HlJVj7Qug= -golang.org/x/text v0.20.0/go.mod h1:D4IsuqiFMhST5bX19pQ9ikHC2GsaKyk/oF+pn3ducp4= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/time v0.8.0 h1:9i3RxcPv3PZnitoVGMPDKZSq1xW1gK1Xy3ArNOGZfEg= golang.org/x/time v0.8.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -510,8 +510,8 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= -golang.org/x/tools v0.27.0 h1:qEKojBykQkQ4EynWy4S8Weg69NumxKdn40Fce3uc/8o= -golang.org/x/tools v0.27.0/go.mod h1:sUi0ZgbwW9ZPAq26Ekut+weQPR5eIM6GQLQ1Yjm1H0Q= +golang.org/x/tools v0.28.0 h1:WuB6qZ4RPCQo5aP3WdKZS7i595EdWqWR8vqJTlwTVK8= +golang.org/x/tools v0.28.0/go.mod h1:dcIOrVd3mfQKTgrDVQHqCPMWy6lnhfhtX3hLXYVLfRw= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -521,17 +521,17 @@ google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7 google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto/googleapis/api v0.0.0-20241202173237-19429a94021a h1:OAiGFfOiA0v9MRYsSidp3ubZaBnteRUyn3xB2ZQ5G/E= -google.golang.org/genproto/googleapis/api v0.0.0-20241202173237-19429a94021a/go.mod h1:jehYqy3+AhJU9ve55aNOaSml7wUXjF9x6z2LcCfpAhY= -google.golang.org/genproto/googleapis/rpc v0.0.0-20241202173237-19429a94021a h1:hgh8P4EuoxpsuKMXX/To36nOFD7vixReXgn8lPGnt+o= -google.golang.org/genproto/googleapis/rpc v0.0.0-20241202173237-19429a94021a/go.mod h1:5uTbfoYQed2U9p3KIj2/Zzm02PYhndfdmML0qC3q3FU= +google.golang.org/genproto/googleapis/api v0.0.0-20241206012308-a4fef0638583 h1:v+j+5gpj0FopU0KKLDGfDo9ZRRpKdi5UBrCP0f76kuY= +google.golang.org/genproto/googleapis/api v0.0.0-20241206012308-a4fef0638583/go.mod h1:jehYqy3+AhJU9ve55aNOaSml7wUXjF9x6z2LcCfpAhY= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241206012308-a4fef0638583 h1:IfdSdTcLFy4lqUQrQJLkLt1PB+AsqVz6lwkWPzWEz10= +google.golang.org/genproto/googleapis/rpc v0.0.0-20241206012308-a4fef0638583/go.mod h1:5uTbfoYQed2U9p3KIj2/Zzm02PYhndfdmML0qC3q3FU= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.38.0/go.mod h1:NREThFqKR1f3iQ6oBuvc5LadQuXVGo9rkm5ZGrQdJfM= -google.golang.org/grpc v1.68.0 h1:aHQeeJbo8zAkAa3pRzrVjZlbz6uSfeOXlJNQM0RAbz0= -google.golang.org/grpc v1.68.0/go.mod h1:fmSPC5AsjSBCK54MyHRx48kpOti1/jRfOlwEWywNjWA= +google.golang.org/grpc v1.68.1 h1:oI5oTa11+ng8r8XMMN7jAOmWfPZWbYpCFaMUTACxkM0= +google.golang.org/grpc v1.68.1/go.mod h1:+q1XYFJjShcqn0QZHvCyeR4CXPA+llXIeUIfIe00waw= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= diff --git a/hack/test/integration.sh b/hack/test/integration.sh index 28b6fbf..66f26c1 100755 --- a/hack/test/integration.sh +++ b/hack/test/integration.sh @@ -13,6 +13,9 @@ echo "OMNI_IMAGE: $OMNI_IMAGE" echo "OMNI_INTEGRATION_TEST_IMAGE: $OMNI_INTEGRATION_TEST_IMAGE" echo "SKIP_CLEANUP: $SKIP_CLEANUP" +TEST_LOGS_DIR=/tmp/test-logs +mkdir -p $TEST_LOGS_DIR + docker pull "$OMNI_IMAGE" docker pull "$OMNI_INTEGRATION_TEST_IMAGE" @@ -36,20 +39,28 @@ echo "Register cleanup script..." function cleanup() { local exit_code=$? # preserve the original exit code - docker logs omni > /tmp/omni.log || true - docker logs provider > /tmp/provider.log || true - if [[ "$SKIP_CLEANUP" == "true" ]]; then echo "Skipping cleanup..." exit $exit_code fi - pkill -f qemu-up-linux-amd64 || true + echo "Stop containers" + docker stop omni provider vault-dev || true - # ${QEMU_UP} --destroy || true # disabled for now, as it removes Talos logs + echo "Gather container logs" + docker logs omni &>$TEST_LOGS_DIR/omni.log + docker logs provider &>$TEST_LOGS_DIR/provider.log - echo "Stop and remove Omni, Provider and Vault..." + echo "Gather machine logs" + machine_logs_dir=$TEST_LOGS_DIR/machines/ + mkdir -p $machine_logs_dir + find "$HOME/.talos/clusters/bare-metal" -type f -name "*.log" ! -name "dhcpd.log" ! -name "lb.log" -exec cp {} $machine_logs_dir \; + + pkill -f qemu-up-linux-amd64 || true + ${QEMU_UP} --destroy || true + pkill -f talosctl || true + echo "Remove containers and Omni artifacts" docker rm -f omni provider vault-dev || true rm -rf $ARTIFACTS/omni/ || true diff --git a/internal/constants/constants.go b/internal/constants/constants.go new file mode 100644 index 0000000..e6628dd --- /dev/null +++ b/internal/constants/constants.go @@ -0,0 +1,6 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// Package constants contains global backend constants. +package constants diff --git a/internal/constants/debug_disabled.go b/internal/constants/debug_disabled.go new file mode 100644 index 0000000..57630f9 --- /dev/null +++ b/internal/constants/debug_disabled.go @@ -0,0 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//go:build sidero.debug + +package constants + +// IsDebugBuild is set to true when the build is a debug build (WITH_DEBUG=true). +const IsDebugBuild = true diff --git a/internal/constants/debug_enabled.go b/internal/constants/debug_enabled.go new file mode 100644 index 0000000..9385db8 --- /dev/null +++ b/internal/constants/debug_enabled.go @@ -0,0 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +//go:build !sidero.debug + +package constants + +// IsDebugBuild is set to true when the build is a debug build (WITH_DEBUG=true). +const IsDebugBuild = false diff --git a/internal/provider/agent/agent.go b/internal/provider/agent/agent.go new file mode 100644 index 0000000..297dc24 --- /dev/null +++ b/internal/provider/agent/agent.go @@ -0,0 +1,6 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at http://mozilla.org/MPL/2.0/. + +// Package agent implements the metal agent service. +package agent diff --git a/internal/provider/agent/controller.go b/internal/provider/agent/service.go similarity index 83% rename from internal/provider/agent/controller.go rename to internal/provider/agent/service.go index ee60472..12592d6 100644 --- a/internal/provider/agent/controller.go +++ b/internal/provider/agent/service.go @@ -2,7 +2,6 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at http://mozilla.org/MPL/2.0/. -// Package agent implements the metal agent controller. package agent import ( @@ -13,6 +12,7 @@ import ( "github.com/jhump/grpctunnel" "github.com/jhump/grpctunnel/tunnelpb" agentpb "github.com/siderolabs/talos-metal-agent/api/agent" + agentconstants "github.com/siderolabs/talos-metal-agent/pkg/constants" "go.uber.org/zap" "google.golang.org/grpc" "google.golang.org/grpc/codes" @@ -25,13 +25,12 @@ import ( ) const ( - machineIDMetadataKey = "machine-id" - timeout = 30 * time.Second - zeroesWipeTimeout = 3 * time.Hour + timeout = 30 * time.Second + zeroesWipeTimeout = 24 * time.Hour ) -// Controller controls servers by establishing a reverse GRPC tunnel with them and by sending them commands. -type Controller struct { +// Service controls servers by establishing a reverse GRPC tunnel with them and by sending them commands. +type Service struct { logger *zap.Logger grpcServer grpc.ServiceRegistrar tunnelHandler *grpctunnel.TunnelServiceHandler @@ -39,8 +38,8 @@ type Controller struct { wipeWithZeroes bool } -// NewController creates a new agent Controller. -func NewController(grpcServer grpc.ServiceRegistrar, state state.State, wipeWithZeroes bool, logger *zap.Logger) *Controller { +// NewService creates a new agent service. +func NewService(grpcServer grpc.ServiceRegistrar, state state.State, wipeWithZeroes bool, logger *zap.Logger) *Service { tunnelHandler := grpctunnel.NewTunnelServiceHandler( grpctunnel.TunnelServiceHandlerOptions{ OnReverseTunnelOpen: func(channel grpctunnel.TunnelChannel) { @@ -62,7 +61,7 @@ func NewController(grpcServer grpc.ServiceRegistrar, state state.State, wipeWith tunnelpb.RegisterTunnelServiceServer(grpcServer, tunnelHandler.Service()) - return &Controller{ + return &Service{ logger: logger, grpcServer: grpcServer, tunnelHandler: tunnelHandler, @@ -98,7 +97,7 @@ func handleTunnelEvent(channel grpctunnel.TunnelChannel, state state.State, conn } // IsAccessible checks if the agent with the given ID is accessible. -func (c *Controller) IsAccessible(ctx context.Context, id string) (bool, error) { +func (c *Service) IsAccessible(ctx context.Context, id string) (bool, error) { ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -118,7 +117,7 @@ func (c *Controller) IsAccessible(ctx context.Context, id string) (bool, error) } // GetPowerManagement retrieves the IPMI information from the server with the given ID. -func (c *Controller) GetPowerManagement(ctx context.Context, id string) (*agentpb.GetPowerManagementResponse, error) { +func (c *Service) GetPowerManagement(ctx context.Context, id string) (*agentpb.GetPowerManagementResponse, error) { ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -129,7 +128,7 @@ func (c *Controller) GetPowerManagement(ctx context.Context, id string) (*agentp } // SetPowerManagement sets the IPMI information on the server with the given ID. -func (c *Controller) SetPowerManagement(ctx context.Context, id string, req *agentpb.SetPowerManagementRequest) error { +func (c *Service) SetPowerManagement(ctx context.Context, id string, req *agentpb.SetPowerManagementRequest) error { ctx, cancel := context.WithTimeout(ctx, timeout) defer cancel() @@ -142,7 +141,7 @@ func (c *Controller) SetPowerManagement(ctx context.Context, id string, req *age } // WipeDisks wipes the disks on the server with the given ID. -func (c *Controller) WipeDisks(ctx context.Context, id string) error { +func (c *Service) WipeDisks(ctx context.Context, id string) error { channel := c.tunnelHandler.KeyAsChannel(id) cli := agentpb.NewAgentServiceClient(channel) @@ -162,7 +161,7 @@ func (c *Controller) WipeDisks(ctx context.Context, id string) error { } // AllConnectedMachines returns a set of all connected machines. -func (c *Controller) AllConnectedMachines() map[string]struct{} { +func (c *Service) AllConnectedMachines() map[string]struct{} { allTunnels := c.tunnelHandler.AllReverseTunnels() machines := make(map[string]struct{}, len(allTunnels)) @@ -189,7 +188,7 @@ func machineIDAffinityKey(ctx context.Context, logger *zap.Logger) (string, bool return "", false } - machineID := md.Get(machineIDMetadataKey) + machineID := md.Get(agentconstants.MachineIDMetadataKey) if len(machineID) == 0 { logger.Warn("invalid affinity key", zap.String("reason", "no machine ID in metadata")) diff --git a/internal/provider/boot/boot.go b/internal/provider/boot/boot.go index 5bc64f2..145d946 100644 --- a/internal/provider/boot/boot.go +++ b/internal/provider/boot/boot.go @@ -24,9 +24,9 @@ type Mode struct { } // DetermineRequiredMode determines the required boot mode. -func DetermineRequiredMode(infraMachine *infra.Machine, status *baremetal.MachineStatus, installStatus *infra.MachineState, logger *zap.Logger) (Mode, error) { +func DetermineRequiredMode(infraMachine *infra.Machine, status *baremetal.MachineStatus, machineState *infra.MachineState, logger *zap.Logger) (Mode, error) { acceptanceStatus := omnispecs.InfraMachineConfigSpec_PENDING - tearingDown := false + infraMachineTearingDown := false allocated := false requiresPowerMgmtConfig := true installed := false @@ -35,7 +35,7 @@ func DetermineRequiredMode(infraMachine *infra.Machine, status *baremetal.Machin if infraMachine != nil { acceptanceStatus = infraMachine.TypedSpec().Value.AcceptanceStatus - tearingDown = infraMachine.Metadata().Phase() == resource.PhaseTearingDown + infraMachineTearingDown = infraMachine.Metadata().Phase() == resource.PhaseTearingDown allocated = infraMachine.TypedSpec().Value.ClusterTalosVersion != "" if infraMachine.TypedSpec().Value.WipeId != "" { @@ -48,8 +48,8 @@ func DetermineRequiredMode(infraMachine *infra.Machine, status *baremetal.Machin lastWipeID = status.TypedSpec().Value.LastWipeId } - if installStatus != nil { - installed = installStatus.TypedSpec().Value.Installed + if machineState != nil { + installed = machineState.TypedSpec().Value.Installed } acceptancePending := acceptanceStatus == omnispecs.InfraMachineConfigSpec_PENDING @@ -62,7 +62,7 @@ func DetermineRequiredMode(infraMachine *infra.Machine, status *baremetal.Machin } requiresWipe := pendingWipeID != "" - bootIntoAgentMode := tearingDown || acceptancePending || !allocated || requiresPowerMgmtConfig || requiresWipe + bootIntoAgentMode := infraMachineTearingDown || acceptancePending || !allocated || requiresPowerMgmtConfig || requiresWipe var requiredBootMode specs.BootMode @@ -78,7 +78,7 @@ func DetermineRequiredMode(infraMachine *infra.Machine, status *baremetal.Machin } logger.With( - zap.Bool("tearing_down", tearingDown), + zap.Bool("infra_machine_tearing_down", infraMachineTearingDown), zap.Bool("requires_power_mgmt_config", requiresPowerMgmtConfig), zap.Bool("installed", installed), zap.String("wipe_id", wipeID), diff --git a/internal/provider/config/config.go b/internal/provider/config/config.go index b41eb5d..cb10302 100644 --- a/internal/provider/config/config.go +++ b/internal/provider/config/config.go @@ -9,26 +9,15 @@ import ( "context" "fmt" "net/http" - "strings" - "text/template" + "net/url" + "github.com/siderolabs/talos/pkg/machinery/config/container" + "github.com/siderolabs/talos/pkg/machinery/config/types/meta" + "github.com/siderolabs/talos/pkg/machinery/config/types/runtime" + "github.com/siderolabs/talos/pkg/machinery/config/types/siderolink" "go.uber.org/zap" ) -const machineConfigTemplate = `apiVersion: v1alpha1 -kind: SideroLinkConfig -apiUrl: {{ .APIURL }} ---- -apiVersion: v1alpha1 -kind: EventSinkConfig -endpoint: "[fdae:41e4:649b:9303::1]:8090" ---- -apiVersion: v1alpha1 -kind: KmsgLogConfig -name: omni-kmsg -url: "tcp://[fdae:41e4:649b:9303::1]:8092" -` - // OmniClient is the interface to interact with Omni. type OmniClient interface { GetSiderolinkAPIURL(ctx context.Context) (string, error) @@ -37,7 +26,7 @@ type OmniClient interface { // Handler handles machine configuration requests. type Handler struct { logger *zap.Logger - machineConfig string + machineConfig []byte } // NewHandler creates a new Handler. @@ -47,23 +36,13 @@ func NewHandler(ctx context.Context, omniClient OmniClient, logger *zap.Logger) return nil, fmt.Errorf("failed to get siderolink API URL: %w", err) } - tmpl, err := template.New("machine-config").Parse(machineConfigTemplate) + machineConfig, err := buildPartialConfig(siderolinkAPIURL) if err != nil { - return nil, err - } - - var sb strings.Builder - - if err = tmpl.Execute(&sb, struct { - APIURL string - }{ - APIURL: siderolinkAPIURL, - }); err != nil { - return nil, fmt.Errorf("failed to execute template: %w", err) + return nil, fmt.Errorf("failed to build machine config: %w", err) } return &Handler{ - machineConfig: sb.String(), + machineConfig: machineConfig, logger: logger, }, nil } @@ -80,7 +59,40 @@ func (s *Handler) ServeHTTP(w http.ResponseWriter, req *http.Request) { w.WriteHeader(http.StatusOK) - if _, err := w.Write([]byte(s.machineConfig)); err != nil { + if _, err := w.Write(s.machineConfig); err != nil { s.logger.Error("failed to write response", zap.Error(err)) } } + +func buildPartialConfig(siderolinkAPIURL string) ([]byte, error) { + apiURL, err := url.Parse(siderolinkAPIURL) + if err != nil { + return nil, fmt.Errorf("failed to parse API URL: %w", err) + } + + siderolinkConfig := siderolink.NewConfigV1Alpha1() + siderolinkConfig.APIUrlConfig = meta.URL{ + URL: apiURL, + } + + eventSinkConfig := runtime.NewEventSinkV1Alpha1() + eventSinkConfig.Endpoint = "[fdae:41e4:649b:9303::1]:8090" + + kmsgLogURL, err := url.Parse("tcp://[fdae:41e4:649b:9303::1]:8092") + if err != nil { + return nil, fmt.Errorf("failed to parse kmsg log URL: %w", err) + } + + kmsgLogConfig := runtime.NewKmsgLogV1Alpha1() + kmsgLogConfig.MetaName = "omni-kmsg" + kmsgLogConfig.KmsgLogURL = meta.URL{ + URL: kmsgLogURL, + } + + configContainer, err := container.New(siderolinkConfig, eventSinkConfig, kmsgLogConfig) + if err != nil { + return nil, fmt.Errorf("failed to create config container: %w", err) + } + + return configContainer.Bytes() +} diff --git a/internal/provider/controllers/infra_machine_status.go b/internal/provider/controllers/infra_machine_status.go index 9f8fff1..550772c 100644 --- a/internal/provider/controllers/infra_machine_status.go +++ b/internal/provider/controllers/infra_machine_status.go @@ -36,8 +36,8 @@ const ( ipmiUsername = "talos-agent" ) -// AgentController is the interface for interacting with the Talos agent over the reverse GRPC tunnel. -type AgentController interface { +// AgentService is the interface for interacting with the Talos agent over the reverse GRPC tunnel. +type AgentService interface { GetPowerManagement(ctx context.Context, id string) (*agentpb.GetPowerManagementResponse, error) SetPowerManagement(ctx context.Context, id string, req *agentpb.SetPowerManagementRequest) error WipeDisks(ctx context.Context, id string) error @@ -52,9 +52,9 @@ type APIPowerManager interface { type InfraMachineController = qtransform.QController[*infra.Machine, *infra.MachineStatus] // NewInfraMachineController initializes InfraMachineController. -func NewInfraMachineController(agentController AgentController, apiPowerManager APIPowerManager, state state.State, requeueInterval time.Duration) *InfraMachineController { +func NewInfraMachineController(agentService AgentService, apiPowerManager APIPowerManager, state state.State, requeueInterval time.Duration) *InfraMachineController { helper := &infraMachineControllerHelper{ - agentController: agentController, + agentService: agentService, apiPowerManager: apiPowerManager, state: state, requeueInterval: requeueInterval, @@ -87,7 +87,7 @@ func NewInfraMachineController(agentController AgentController, apiPowerManager } type infraMachineControllerHelper struct { - agentController AgentController + agentService AgentService apiPowerManager APIPowerManager state state.State requeueInterval time.Duration @@ -116,15 +116,11 @@ func (h *infraMachineControllerHelper) transform(ctx context.Context, reader con return xerrors.NewTaggedf[qtransform.SkipReconcileTag]("machine not accepted") } - status, statusCreated, err := machinestatus.GetOrCreate(ctx, h.state, infraMachine.Metadata().ID()) + status, err := machinestatus.Modify(ctx, h.state, infraMachine.Metadata().ID(), nil) if err != nil { return err } - if statusCreated { // reconcile is triggered by create - return nil - } - logger.Info("transform infra machine") bootMode := status.TypedSpec().Value.BootMode @@ -139,12 +135,12 @@ func (h *infraMachineControllerHelper) transform(ctx context.Context, reader con return err } - installStatus, err := safe.ReaderGetByID[*infra.MachineState](ctx, reader, infraMachine.Metadata().ID()) + machineState, err := safe.ReaderGetByID[*infra.MachineState](ctx, reader, infraMachine.Metadata().ID()) if err != nil && !state.IsNotFoundError(err) { return err } - mode, err := boot.DetermineRequiredMode(infraMachine, status, installStatus, logger) + mode, err := boot.DetermineRequiredMode(infraMachine, status, machineState, logger) if err != nil { return err } @@ -193,6 +189,9 @@ func (h *infraMachineControllerHelper) transform(ctx context.Context, reader con return nil } +// finalizerRemoval is called when the infra.Machine is being deleted. +// +// We do not need to wipe the disks here, as if/when the machine reconnects to Omni, a new infra.Machine will be created, and it will be marked for the initial wipe. func (h *infraMachineControllerHelper) finalizerRemoval(ctx context.Context, reader controller.Reader, logger *zap.Logger, infraMachine *infra.Machine) error { // attempt to boot into agent mode if it is not already in agent mode status, err := safe.ReaderGetByID[*baremetal.MachineStatus](ctx, reader, infraMachine.Metadata().ID()) @@ -214,7 +213,33 @@ func (h *infraMachineControllerHelper) finalizerRemoval(ctx context.Context, rea logger.Warn("failed to reboot machine", zap.Error(err)) } - // we don't need to wipe the disks here, as if/when the machine reconnects to Omni, a new infra.Machine will be created and it will be marked for the initial wipe + return h.removeInternalStatus(ctx, infraMachine.Metadata().ID()) +} + +// removeInternalStatus removes the provider-internal baremetal.MachineStatus resource. +func (h *infraMachineControllerHelper) removeInternalStatus(ctx context.Context, id resource.ID) error { + statusMD := baremetal.NewMachineStatus(id).Metadata() + + destroyReady, err := h.state.Teardown(ctx, statusMD) + if err != nil { + if state.IsNotFoundError(err) { + return nil + } + + return err + } + + if !destroyReady { + return nil + } + + if err = h.state.Destroy(ctx, statusMD); err != nil { + if state.IsNotFoundError(err) { + return nil + } + + return err + } return nil } @@ -238,7 +263,7 @@ func (h *infraMachineControllerHelper) populateInfraMachineStatus(status *bareme } func (h *infraMachineControllerHelper) wipe(ctx context.Context, id resource.ID, pendingWipeID string, logger *zap.Logger) error { - if err := h.agentController.WipeDisks(ctx, id); err != nil { + if err := h.agentService.WipeDisks(ctx, id); err != nil { statusCode := grpcstatus.Code(err) if statusCode == codes.Unavailable { return controller.NewRequeueErrorf(h.requeueInterval, "machine is not yet available, requeue wipe") @@ -270,6 +295,9 @@ func (h *infraMachineControllerHelper) wipe(ctx context.Context, id resource.ID, // ensureReboot makes sure that the machine is rebooted if it can be rebooted. func (h *infraMachineControllerHelper) ensureReboot(ctx context.Context, status *baremetal.MachineStatus, logger *zap.Logger) error { + ctx, cancel := context.WithTimeout(ctx, 10*time.Minute) + defer cancel() + var powerClient power.Client powerClient, err := power.GetClient(status.TypedSpec().Value.PowerManagement) @@ -292,7 +320,7 @@ func (h *infraMachineControllerHelper) ensurePowerManagement(ctx context.Context id := status.Metadata().ID() - powerManagement, err := h.agentController.GetPowerManagement(ctx, id) + powerManagement, err := h.agentService.GetPowerManagement(ctx, id) if err != nil { if grpcstatus.Code(err) == codes.Unavailable { return controller.NewRequeueErrorf(h.requeueInterval, "machine is not yet available, requeue getting power management") @@ -367,7 +395,7 @@ func (h *infraMachineControllerHelper) ensurePowerManagementOnAgent(ctx context. } } - if err = h.agentController.SetPowerManagement(ctx, id, &agentpb.SetPowerManagementRequest{ + if err = h.agentService.SetPowerManagement(ctx, id, &agentpb.SetPowerManagementRequest{ Api: api, Ipmi: ipmi, }); err != nil { diff --git a/internal/provider/ipxe/handler.go b/internal/provider/ipxe/handler.go index 744e366..a05591b 100644 --- a/internal/provider/ipxe/handler.go +++ b/internal/provider/ipxe/handler.go @@ -152,8 +152,8 @@ func (handler *Handler) makeBootDecision(ctx context.Context, arch, uuid string, userExtraKernelArgs = strings.Fields(machineResources.infraMachine.TypedSpec().Value.ExtraKernelArgs) } - mode, modeErr := boot.DetermineRequiredMode(machineResources.infraMachine, machineResources.status, machineResources.installStatus, logger) - if modeErr != nil { + mode, err := boot.DetermineRequiredMode(machineResources.infraMachine, machineResources.status, machineResources.machineState, logger) + if err != nil { return bootDecision{statusCode: http.StatusInternalServerError}, fmt.Errorf("failed to determine required boot mode: %w", err) } @@ -220,9 +220,9 @@ func (handler *Handler) makeBootDecision(ctx context.Context, arch, uuid string, } type resources struct { - infraMachine *infra.Machine - status *baremetal.MachineStatus - installStatus *infra.MachineState + infraMachine *infra.Machine + status *baremetal.MachineStatus + machineState *infra.MachineState } func (handler *Handler) getResources(ctx context.Context, id string) (resources, error) { @@ -231,20 +231,20 @@ func (handler *Handler) getResources(ctx context.Context, id string) (resources, return resources{}, fmt.Errorf("failed to get infra machine: %w", err) } - status, _, err := machinestatus.GetOrCreate(ctx, handler.state, id) + status, err := machinestatus.Modify(ctx, handler.state, id, nil) if err != nil { return resources{}, fmt.Errorf("failed to get bare metal machine status: %w", err) } - installStatus, err := safe.StateGetByID[*infra.MachineState](ctx, handler.state, id) + machineState, err := safe.StateGetByID[*infra.MachineState](ctx, handler.state, id) if err != nil && !state.IsNotFoundError(err) { return resources{}, fmt.Errorf("failed to get infra machine install status: %w", err) } return resources{ - infraMachine: infraMachine, - status: status, - installStatus: installStatus, + infraMachine: infraMachine, + status: status, + machineState: machineState, }, nil } diff --git a/internal/provider/machinestatus/machinestatus.go b/internal/provider/machinestatus/machinestatus.go index 246e359..752bf70 100644 --- a/internal/provider/machinestatus/machinestatus.go +++ b/internal/provider/machinestatus/machinestatus.go @@ -18,32 +18,15 @@ import ( var mu sync.Mutex -// GetOrCreate gets or creates the baremetal.MachineStatus resource in the state. -func GetOrCreate(ctx context.Context, st state.State, id resource.ID) (*baremetal.MachineStatus, bool, error) { - mu.Lock() - defer mu.Unlock() - - res, err := safe.StateGetByID[*baremetal.MachineStatus](ctx, st, id) - if err != nil { - if !state.IsNotFoundError(err) { - return nil, false, err - } - - res = baremetal.NewMachineStatus(id) - - if err = st.Create(ctx, res); err != nil { - return nil, true, err - } - } - - return res, false, nil -} - // Modify modifies the baremetal.MachineStatus resource in the state. func Modify(ctx context.Context, st state.State, id resource.ID, updateFn func(status *baremetal.MachineStatus) error) (*baremetal.MachineStatus, error) { mu.Lock() defer mu.Unlock() + if updateFn == nil { + updateFn = func(*baremetal.MachineStatus) error { return nil } + } + _, err := safe.StateGetByID[*baremetal.MachineStatus](ctx, st, id) if err != nil { if !state.IsNotFoundError(err) { diff --git a/internal/provider/machinestatus/poller.go b/internal/provider/machinestatus/poller.go index 7024983..b77dbe1 100644 --- a/internal/provider/machinestatus/poller.go +++ b/internal/provider/machinestatus/poller.go @@ -20,25 +20,25 @@ import ( "github.com/siderolabs/omni-infra-provider-bare-metal/internal/provider/power" ) -// AgentController is the interface for controlling Talos agent. -type AgentController interface { +// AgentService is the interface for controlling Talos agent. +type AgentService interface { AllConnectedMachines() map[string]struct{} IsAccessible(ctx context.Context, machineID string) (bool, error) } // Poller polls the machines periodically and updates their statuses. type Poller struct { - agentController AgentController - state state.State - logger *zap.Logger + agentService AgentService + state state.State + logger *zap.Logger } // NewPoller creates a new Poller. -func NewPoller(agentController AgentController, state state.State, logger *zap.Logger) *Poller { +func NewPoller(agentService AgentService, state state.State, logger *zap.Logger) *Poller { return &Poller{ - agentController: agentController, - state: state, - logger: logger, + agentService: agentService, + state: state, + logger: logger, } } @@ -70,7 +70,7 @@ func (m *Poller) poll(ctx context.Context) { return } - connectedMachines := m.agentController.AllConnectedMachines() + connectedMachines := m.agentService.AllConnectedMachines() machineIDSet := map[string]struct{}{} var numAgentConnected, numAgentDisconnected, numPoweredOn, numPoweredOff, numPowerUnknown int @@ -93,13 +93,7 @@ func (m *Poller) poll(ctx context.Context) { agentConnected, err = m.agentConnected(ctx, connectedMachines, status.Metadata().ID()) if err != nil { - connectionError := errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) || grpcstatus.Code(err) == codes.Canceled - - if !connectionError { - m.logger.Error("failed to check connection", zap.String("machine_id", status.Metadata().ID()), zap.Error(err)) - - continue - } + m.logger.Error("failed to check connection", zap.String("machine_id", status.Metadata().ID()), zap.Error(err)) } if agentConnected { @@ -157,8 +151,12 @@ func (m *Poller) agentConnected(ctx context.Context, connectedMachines map[strin } // attempt to ping - accessible, err := m.agentController.IsAccessible(ctx, machineID) + accessible, err := m.agentService.IsAccessible(ctx, machineID) if err != nil { + if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) || grpcstatus.Code(err) == codes.Canceled { + return false, nil + } + return false, err } diff --git a/internal/provider/options.go b/internal/provider/options.go index 5ffe653..ab14f90 100644 --- a/internal/provider/options.go +++ b/internal/provider/options.go @@ -13,7 +13,7 @@ type Options struct { OmniAPIEndpoint string ImageFactoryBaseURL string ImageFactoryPXEBaseURL string - AgentModeTalosVersion string + AgentModeTalosVersion string // todo: get this from Omni. Warning: needs to be Talos 1.9 with agent code inside APIListenAddress string APIAdvertiseAddress string APIPowerMgmtStateDir string diff --git a/internal/provider/provider.go b/internal/provider/provider.go index d40df96..735000d 100644 --- a/internal/provider/provider.go +++ b/internal/provider/provider.go @@ -19,10 +19,10 @@ import ( "github.com/cosi-project/runtime/pkg/state" "github.com/hashicorp/go-multierror" "github.com/siderolabs/omni/client/pkg/client" - "github.com/siderolabs/omni/client/pkg/omni/resources/infra" "go.uber.org/zap" "golang.org/x/sync/errgroup" + "github.com/siderolabs/omni-infra-provider-bare-metal/internal/constants" "github.com/siderolabs/omni-infra-provider-bare-metal/internal/provider/agent" "github.com/siderolabs/omni-infra-provider-bare-metal/internal/provider/baremetal" "github.com/siderolabs/omni-infra-provider-bare-metal/internal/provider/config" @@ -58,7 +58,7 @@ func New(options Options, logger *zap.Logger) *Provider { // Run runs the provider. func (p *Provider) Run(ctx context.Context) error { - apiAdvertiseAddress, err := p.determineAPIAdvertiseAddress() + apiAdvertiseAddress, dhcpProxyIfaceOrIP, err := p.determineAddresses() if err != nil { return fmt.Errorf("failed to determine API advertise address: %w", err) } @@ -84,11 +84,15 @@ func (p *Provider) Run(ctx context.Context) error { omniClient := omni.BuildClient(omniState) if p.options.ClearState { - if err = p.clearState(ctx, omniState); err != nil { - return fmt.Errorf("failed to clear state: %w", err) - } + if constants.IsDebugBuild { + if err = p.clearState(ctx, omniState); err != nil { + return fmt.Errorf("failed to clear state: %w", err) + } - p.logger.Info("state cleared") + p.logger.Info("state cleared") + } else { + p.logger.Warn("clear state is requested, but this is not a debug build, skipping") + } } if err = omniClient.EnsureProviderStatus(ctx, p.options.Name, p.options.Description, icon); err != nil { @@ -117,26 +121,26 @@ func (p *Provider) Run(ctx context.Context) error { } srvr := server.New(ctx, p.options.APIListenAddress, p.options.APIPort, p.options.UseLocalBootAssets, configHandler, ipxeHandler, p.logger.With(zap.String("component", "server"))) - agentController := agent.NewController(srvr, omniState, p.options.WipeWithZeroes, p.logger.With(zap.String("component", "controller"))) //nolint:contextcheck // false positive - machineStatusPoller := machinestatus.NewPoller(agentController, omniState, p.logger.With(zap.String("component", "machine_status_poller"))) - dhcpProxy := dhcp.NewProxy(apiAdvertiseAddress, p.options.APIPort, p.options.DHCPProxyIfaceOrIP, p.logger.With(zap.String("component", "dhcp_proxy"))) + agentService := agent.NewService(srvr, omniState, p.options.WipeWithZeroes, p.logger.With(zap.String("component", "agent_service"))) //nolint:contextcheck // false positive + machineStatusPoller := machinestatus.NewPoller(agentService, omniState, p.logger.With(zap.String("component", "machine_status_poller"))) + dhcpProxy := dhcp.NewProxy(apiAdvertiseAddress, p.options.APIPort, dhcpProxyIfaceOrIP, p.logger.With(zap.String("component", "dhcp_proxy"))) tftpServer := tftp.NewServer(p.logger.With(zap.String("component", "tftp_server"))) apiPowerManager := powerapi.NewPowerManager(p.options.APIPowerMgmtStateDir) // todo: enable if we re-enable reverse tunnel on Omni: https://github.com/siderolabs/omni/pull/746 // reverseTunnel := tunnel.New(omniState, omniAPIClient, p.logger.With(zap.String("component", "reverse_tunnel"))) - if err = cosiRuntime.RegisterQController(controllers.NewInfraMachineController(agentController, apiPowerManager, omniState, 1*time.Minute)); err != nil { + if err = cosiRuntime.RegisterQController(controllers.NewInfraMachineController(agentService, apiPowerManager, omniState, 1*time.Minute)); err != nil { return fmt.Errorf("failed to register controller: %w", err) } - return p.runComponents(ctx, map[string]func(context.Context) error{ - "COSI runtime": cosiRuntime.Run, - "machine status poller": machineStatusPoller.Run, - "server": srvr.Run, - // "reverse tunnel": reverseTunnel.Run, - "DHCP proxy": dhcpProxy.Run, - "TFTP server": tftpServer.Run, + return p.runComponents(ctx, []component{ + {cosiRuntime.Run, "COSI runtime"}, + {machineStatusPoller.Run, "machine status poller"}, + {srvr.Run, "server"}, + {dhcpProxy.Run, "DHCP proxy"}, + {tftpServer.Run, "TFTP server"}, + // {reverseTunnel.Run, "reverse tunnel"}, }) } @@ -151,8 +155,6 @@ func (p *Provider) buildCOSIRuntime(omniAPIClient *client.Client) (*runtime.Runt if p.options.EnableResourceCache { options = append(options, safe.WithResourceCache[*baremetal.MachineStatus]()) - options = append(options, safe.WithResourceCache[*infra.Machine]()) - options = append(options, safe.WithResourceCache[*infra.MachineStatus]()) } cosiRuntime, err := runtime.NewRuntime(omniState, p.logger.With(zap.String("component", "cosi_runtime")), options...) @@ -163,26 +165,33 @@ func (p *Provider) buildCOSIRuntime(omniAPIClient *client.Client) (*runtime.Runt return cosiRuntime, nil } -func (p *Provider) runComponents(ctx context.Context, components map[string]func(context.Context) error) error { +type component struct { + run func(context.Context) error + name string +} + +func (p *Provider) runComponents(ctx context.Context, components []component) error { ctx, cancel := context.WithCancel(ctx) defer cancel() eg, ctx := errgroup.WithContext(ctx) - for name, f := range components { + for _, comp := range components { + logger := p.logger.With(zap.String("component", comp.name)) + eg.Go(func() error { defer cancel() // cancel the parent context, so all other components are also stopped - p.logger.Info("start component ", zap.String("name", name)) + logger.Info("start component") - err := f(ctx) + err := comp.run(ctx) if err != nil { - p.logger.Error("failed to run component", zap.String("name", name), zap.Error(err)) + logger.Error("failed to run component", zap.Error(err)) return err } - p.logger.Info("component stopped", zap.String("name", name)) + logger.Info("component stopped") return nil }) @@ -195,21 +204,32 @@ func (p *Provider) runComponents(ctx context.Context, components map[string]func return nil } -func (p *Provider) determineAPIAdvertiseAddress() (string, error) { - if p.options.APIAdvertiseAddress != "" { - return p.options.APIAdvertiseAddress, nil +func (p *Provider) determineAddresses() (apiAdvertiseAddress, dhcpProxyAddress string, err error) { + if p.options.APIAdvertiseAddress != "" && p.options.DHCPProxyIfaceOrIP != "" { + return p.options.APIAdvertiseAddress, p.options.DHCPProxyIfaceOrIP, nil } routableIPs, err := ip.RoutableIPs() if err != nil { - return "", fmt.Errorf("failed to get routable IPs: %w", err) + return "", "", fmt.Errorf("failed to get routable IPs: %w", err) } if len(routableIPs) != 1 { - return "", fmt.Errorf(`expected exactly one routable IP, got %d: %v. specify API advertise address explicitly`, len(routableIPs), routableIPs) + return "", "", fmt.Errorf("expected exactly one routable IP, got %d: %v. "+ + "specify API advertise address and DHCP proxy interface/IP explicitly", len(routableIPs), routableIPs) + } + + apiAdvertiseAddress = p.options.APIAdvertiseAddress + if apiAdvertiseAddress == "" { + apiAdvertiseAddress = routableIPs[0] + } + + dhcpProxyAddress = p.options.DHCPProxyIfaceOrIP + if dhcpProxyAddress == "" { + dhcpProxyAddress = routableIPs[0] } - return routableIPs[0], nil + return apiAdvertiseAddress, dhcpProxyAddress, nil } // buildOmniAPIClient creates a new Omni API client.