diff --git a/.changelog/5390.bugfix.md b/.changelog/5390.bugfix.md new file mode 100644 index 00000000000..efac82cc7f0 --- /dev/null +++ b/.changelog/5390.bugfix.md @@ -0,0 +1,9 @@ +host/sgx/epid: ensure consistent IAS proxy usage for attestation + +Refactors the IAS proxy client to expose separate clients for each configured +IAS proxy, instead of load-balancing internally between endpoints on a +per-request basis. + +This is required because the attestation procedure requires three calls to +the IAS endpoint (`GetSPIDInfo`, `GetSigRL`, `VerifyEvidence`) which should +all interact with the same endpoint. diff --git a/go/ias/init.go b/go/ias/init.go index 627f38c1b6b..f364882d287 100644 --- a/go/ias/init.go +++ b/go/ias/init.go @@ -14,7 +14,7 @@ import ( var logger = logging.GetLogger("ias") // New creates a new IAS endpoint. -func New(identity *identity.Identity) (api.Endpoint, error) { +func New(identity *identity.Identity) ([]api.Endpoint, error) { if cmdFlags.DebugDontBlameOasis() { if config.GlobalConfig.IAS.DebugSkipVerify { logger.Warn("`ias.debug_skip_verify` set, AVR signature validation bypassed") diff --git a/go/ias/proxy/client/client.go b/go/ias/proxy/client/client.go index d7dbc70fb36..ce291f0feaa 100644 --- a/go/ias/proxy/client/client.go +++ b/go/ias/proxy/client/client.go @@ -8,8 +8,6 @@ import ( "strings" "google.golang.org/grpc" - "google.golang.org/grpc/resolver" - "google.golang.org/grpc/resolver/manual" "github.com/oasisprotocol/oasis-core/go/common/crypto/signature" cmnGrpc "github.com/oasisprotocol/oasis-core/go/common/grpc" @@ -20,45 +18,44 @@ import ( "github.com/oasisprotocol/oasis-core/go/ias/proxy" ) -var _ api.Endpoint = (*proxyClient)(nil) +var _ api.Endpoint = (*mockEndpoint)(nil) -type proxyClient struct { - identity *identity.Identity +type mockEndpoint struct{} - conn *grpc.ClientConn - endpoint api.Endpoint +func (m *mockEndpoint) VerifyEvidence(_ context.Context, evidence *api.Evidence) (*ias.AVRBundle, error) { + // Generate a mock AVR, under the assumption that the runtime is built to support this. + // The runtime will reject the mock AVR if it is not. + avr, err := ias.NewMockAVR(evidence.Quote, evidence.Nonce) + if err != nil { + return nil, err + } + return &ias.AVRBundle{ + Body: avr, + }, nil +} - spidInfo *api.SPIDInfo +func (m *mockEndpoint) GetSPIDInfo(_ context.Context) (*api.SPIDInfo, error) { + spidInfo := &api.SPIDInfo{} + _ = spidInfo.SPID.UnmarshalBinary(make([]byte, ias.SPIDSize)) + return spidInfo, nil +} - logger *logging.Logger +func (m *mockEndpoint) GetSigRL(_ context.Context, _ uint32) ([]byte, error) { + return nil, fmt.Errorf("IAS proxy is not configured, mock used") } -func (c *proxyClient) fetchSPIDInfo(ctx context.Context) error { - if c.spidInfo != nil || c.endpoint == nil { - return nil - } +func (m *mockEndpoint) Cleanup() {} - var err error - if c.spidInfo, err = c.endpoint.GetSPIDInfo(ctx); err != nil { - return err - } - return nil +var _ api.Endpoint = (*proxyClient)(nil) + +type proxyClient struct { + conn *grpc.ClientConn + endpoint api.Endpoint + + logger *logging.Logger } func (c *proxyClient) VerifyEvidence(ctx context.Context, evidence *api.Evidence) (*ias.AVRBundle, error) { - if c.endpoint == nil { - // If the IAS proxy is not configured, generate a mock AVR, under the - // assumption that the runtime is built to support this. The runtime - // will reject the mock AVR if it is not. - avr, err := ias.NewMockAVR(evidence.Quote, evidence.Nonce) - if err != nil { - return nil, err - } - return &ias.AVRBundle{ - Body: avr, - }, nil - } - // Ensure the evidence.Quote passes basic sanity/security checks before // even bothering to contact the backend. var untrustedQuote ias.Quote @@ -73,84 +70,62 @@ func (c *proxyClient) VerifyEvidence(ctx context.Context, evidence *api.Evidence } func (c *proxyClient) GetSPIDInfo(ctx context.Context) (*api.SPIDInfo, error) { - if err := c.fetchSPIDInfo(ctx); err != nil { - return nil, err - } - return c.spidInfo, nil + return c.endpoint.GetSPIDInfo(ctx) } func (c *proxyClient) GetSigRL(ctx context.Context, epidGID uint32) ([]byte, error) { - if c.endpoint == nil { - return nil, fmt.Errorf("IAS proxy is not configured, mock used") - } return c.endpoint.GetSigRL(ctx, epidGID) } func (c *proxyClient) Cleanup() { - if c.conn != nil { - _ = c.conn.Close() - } + _ = c.conn.Close() } -// New creates a new IAS proxy client endpoint. -func New(identity *identity.Identity, addresses []string) (api.Endpoint, error) { - c := &proxyClient{ - identity: identity, - logger: logging.GetLogger("ias/proxyclient"), - } +// New creates a collection of IAS proxy clients (one client per provided address). +func New(identity *identity.Identity, addresses []string) ([]api.Endpoint, error) { + logger := logging.GetLogger("ias/proxyclient") if len(addresses) == 0 { - c.logger.Warn("IAS proxy is not configured, all reports will be mocked") - - c.spidInfo = &api.SPIDInfo{} - _ = c.spidInfo.SPID.UnmarshalBinary(make([]byte, ias.SPIDSize)) - } else { - var resolverState resolver.State - pubKeys := make(map[signature.PublicKey]bool) - for _, addr := range addresses { - spl := strings.Split(addr, "@") - if len(spl) != 2 { - return nil, fmt.Errorf("missing public key in address '%s'", addr) - } - - var pk signature.PublicKey - if err := pk.UnmarshalText([]byte(spl[0])); err != nil { - return nil, fmt.Errorf("malformed public key in address '%s': %w", addr, err) - } - - pubKeys[pk] = true - resolverState.Addresses = append(resolverState.Addresses, resolver.Address{Addr: spl[1]}) + logger.Warn("IAS proxy is not configured, all reports will be mocked") + return []api.Endpoint{&mockEndpoint{}}, nil + } + + clients := make([]api.Endpoint, 0, len(addresses)) + for _, addr := range addresses { + spl := strings.Split(addr, "@") + if len(spl) != 2 { + return nil, fmt.Errorf("missing public key in address '%s'", addr) } + var pk signature.PublicKey + if err := pk.UnmarshalText([]byte(spl[0])); err != nil { + return nil, fmt.Errorf("malformed public key in address '%s': %w", addr, err) + } creds, err := cmnGrpc.NewClientCreds(&cmnGrpc.ClientOptions{ - ServerPubKeys: pubKeys, + ServerPubKeys: map[signature.PublicKey]bool{pk: true}, CommonName: proxy.CommonName, GetClientCertificate: func(cri *tls.CertificateRequestInfo) (*tls.Certificate, error) { return identity.TLSCertificate, nil }, }) if err != nil { - return nil, fmt.Errorf("failed to create client credentials: %w", err) + return nil, fmt.Errorf("failed to create client credentials for address '%s': %w", addr, err) } - - manualResolver := manual.NewBuilderWithScheme("oasis-core-resolver") conn, err := cmnGrpc.Dial( - "oasis-core-resolver:///", + spl[1], grpc.WithTransportCredentials(creds), - // https://github.com/grpc/grpc-go/issues/3003 - grpc.WithDefaultServiceConfig(`{"loadBalancingPolicy":"round_robin"}`), grpc.WithDefaultCallOptions(grpc.WaitForReady(true)), - grpc.WithResolvers(manualResolver), ) if err != nil { - return nil, fmt.Errorf("failed to dial IAS proxy: %w", err) + return nil, fmt.Errorf("failed to dial IAS proxy address '%s': %w", addr, err) } - manualResolver.UpdateState(resolverState) - - c.conn = conn - c.endpoint = api.NewEndpointClient(conn) + clients = append(clients, &proxyClient{ + conn: conn, + endpoint: api.NewEndpointClient(conn), + logger: logger, + }) } - return c, nil + return clients, nil } diff --git a/go/oasis-node/cmd/node/node.go b/go/oasis-node/cmd/node/node.go index 889e92c2b80..146ecafbd72 100644 --- a/go/oasis-node/cmd/node/node.go +++ b/go/oasis-node/cmd/node/node.go @@ -74,7 +74,7 @@ type Node struct { Genesis genesisAPI.Provider Identity *identity.Identity Sentry sentryAPI.Backend - IAS iasAPI.Endpoint + IAS []iasAPI.Endpoint RuntimeRegistry runtimeRegistry.Registry @@ -240,7 +240,6 @@ func (n *Node) initRuntimeWorkers() error { n.Consensus, n.LightClient, n.P2P, - n.IAS, n.Consensus.KeyManager(), n.RuntimeRegistry, ) diff --git a/go/runtime/host/sgx/epid.go b/go/runtime/host/sgx/epid.go index 5b260c664bc..2a9e8f50380 100644 --- a/go/runtime/host/sgx/epid.go +++ b/go/runtime/host/sgx/epid.go @@ -4,6 +4,7 @@ import ( "context" "encoding/binary" "fmt" + "time" "github.com/oasisprotocol/oasis-core/go/common" "github.com/oasisprotocol/oasis-core/go/common/cbor" @@ -19,9 +20,12 @@ import ( type teeStateEPID struct { teeStateImplCommon - epidGID uint32 - spid cmnIAS.SPID - quoteType *cmnIAS.SignatureType + epidGID uint32 + + // prevIAS is the index of the IAS server that was used for the last successful attestation. + // This is used as a heuristic to first query the IAS server that is likely able to + // successfully do the attestation. + prevIAS int } func (ep *teeStateEPID) Init(ctx context.Context, sp *sgxProvisioner, runtimeID common.Namespace, version version.Version) ([]byte, error) { @@ -30,16 +34,9 @@ func (ep *teeStateEPID) Init(ctx context.Context, sp *sgxProvisioner, runtimeID return nil, fmt.Errorf("error while getting quote info from AESMD: %w", err) } - spidInfo, err := sp.ias.GetSPIDInfo(ctx) - if err != nil { - return nil, fmt.Errorf("error while getting IAS SPID information: %w", err) - } - ep.runtimeID = runtimeID ep.version = version ep.epidGID = binary.LittleEndian.Uint32(qi.GID[:]) - ep.spid = spidInfo.SPID - ep.quoteType = &spidInfo.QuoteSignatureType return qi.TargetInfo, nil } @@ -52,8 +49,50 @@ func (ep *teeStateEPID) Update(ctx context.Context, sp *sgxProvisioner, conn pro } supportsAttestationV1 := (regParams.TEEFeatures != nil && regParams.TEEFeatures.SGX.PCS) + // Start with the IAS server that was used for the last successful attestation. + // TODO: Could consider implementing a strategy for more optimized endpoint selection with + // latency and success rate feedback (in ias/proxy/client.go). But (re-)attestations are + // not so frequent and this is the only code that uses the IAS clients, so this is good enough. + for i := ep.prevIAS; i < ep.prevIAS+len(sp.ias); i++ { + idx := i % len(sp.ias) + resp, err := ep.update(ctx, sp, conn, report, nonce, supportsAttestationV1, sp.ias[idx]) + if err == nil { + ep.prevIAS = idx + return resp, nil + } + + sp.logger.Warn("error obtaining attestation, trying next IAS server", "err", err, "client_idx", idx) + if i == ep.prevIAS+len(sp.ias)-1 { + return nil, err + } + + select { + case <-time.After(50 * time.Millisecond): + continue + case <-ctx.Done(): + return nil, ctx.Err() + } + } + return nil, fmt.Errorf("no IAS servers configured") +} + +func (ep *teeStateEPID) update( + ctx context.Context, + sp *sgxProvisioner, + conn protocol.Connection, + report []byte, + nonce string, + supportsAttestationV1 bool, + iasClient ias.Endpoint, +) ([]byte, error) { + // Obtain SPID info. + spidInfo, err := iasClient.GetSPIDInfo(ctx) + if err != nil { + return nil, fmt.Errorf("error while requesting SPID info: %w", err) + } + // Update the SigRL (Not cached, knowing if revoked is important). - sigRL, err := sp.ias.GetSigRL(ctx, ep.epidGID) + sigRL, err := iasClient.GetSigRL(ctx, ep.epidGID) if err != nil { return nil, fmt.Errorf("error while requesting SigRL: %w", err) } @@ -61,8 +100,8 @@ func (ep *teeStateEPID) Update(ctx context.Context, sp *sgxProvisioner, conn pro quote, err := sp.aesm.GetQuote( ctx, report, - *ep.quoteType, - ep.spid, + spidInfo.QuoteSignatureType, + spidInfo.SPID, make([]byte, 16), sigRL, ) @@ -88,7 +127,7 @@ func (ep *teeStateEPID) Update(ctx context.Context, sp *sgxProvisioner, conn pro MinTCBEvaluationDataNumber: quotePolicy.MinTCBEvaluationDataNumber, } - avrBundle, err := sp.ias.VerifyEvidence(ctx, &evidence) + avrBundle, err := iasClient.VerifyEvidence(ctx, &evidence) if err != nil { return nil, fmt.Errorf("error while verifying attestation evidence: %w", err) } @@ -98,7 +137,7 @@ func (ep *teeStateEPID) Update(ctx context.Context, sp *sgxProvisioner, conn pro if decErr == nil && avr.TCBEvaluationDataNumber < quotePolicy.MinTCBEvaluationDataNumber { // Retry again with early updating. evidence.EarlyTCBUpdate = true - avrBundle, err = sp.ias.VerifyEvidence(ctx, &evidence) + avrBundle, err = iasClient.VerifyEvidence(ctx, &evidence) if err != nil { return nil, fmt.Errorf("error while verifying attestation evidence with early update: %w", err) } diff --git a/go/runtime/host/sgx/sgx.go b/go/runtime/host/sgx/sgx.go index 3c19e8f46aa..34aa99fbaa5 100644 --- a/go/runtime/host/sgx/sgx.go +++ b/go/runtime/host/sgx/sgx.go @@ -59,8 +59,8 @@ type Config struct { // LoaderPath is the path to the runtime loader binary. LoaderPath string - // IAS is the Intel Attestation Service endpoint. - IAS ias.Endpoint + // IAS are the Intel Attestation Service endpoint. + IAS []ias.Endpoint // PCS is the Intel Provisioning Certification Service client. PCS pcs.Client // Consensus is the consensus layer backend. @@ -157,7 +157,7 @@ type sgxProvisioner struct { cfg Config sandbox host.Provisioner - ias ias.Endpoint + ias []ias.Endpoint pcs pcs.Client aesm *aesm.Client consensus consensus.Backend diff --git a/go/runtime/host/sgx/sgx_test.go b/go/runtime/host/sgx/sgx_test.go index 57190ae9941..62a703cf8b7 100644 --- a/go/runtime/host/sgx/sgx_test.go +++ b/go/runtime/host/sgx/sgx_test.go @@ -12,6 +12,7 @@ import ( cmnIAS "github.com/oasisprotocol/oasis-core/go/common/sgx/ias" "github.com/oasisprotocol/oasis-core/go/common/version" cmt "github.com/oasisprotocol/oasis-core/go/consensus/cometbft/api" + "github.com/oasisprotocol/oasis-core/go/ias/api" iasHttp "github.com/oasisprotocol/oasis-core/go/ias/http" "github.com/oasisprotocol/oasis-core/go/runtime/bundle" "github.com/oasisprotocol/oasis-core/go/runtime/host" @@ -76,7 +77,7 @@ func TestProvisionerSGX(t *testing.T) { ConsensusProtocolVersion: version.Versions.ConsensusProtocol, }, LoaderPath: envRuntimeLoaderPath, - IAS: ias, + IAS: []api.Endpoint{ias}, RuntimeAttestInterval: 2 * time.Second, InsecureNoSandbox: true, SandboxBinaryPath: bwrapPath, @@ -93,7 +94,7 @@ func TestProvisionerSGX(t *testing.T) { }, LoaderPath: envRuntimeLoaderPath, RuntimeAttestInterval: 2 * time.Second, - IAS: ias, + IAS: []api.Endpoint{ias}, SandboxBinaryPath: bwrapPath, }) }, extraTests) diff --git a/go/runtime/registry/config.go b/go/runtime/registry/config.go index 12f3289d52b..76c1d164d08 100644 --- a/go/runtime/registry/config.go +++ b/go/runtime/registry/config.go @@ -70,7 +70,7 @@ type RuntimeHostConfig struct { Runtimes map[common.Namespace]map[version.Version]*runtimeHost.Config } -func newConfig(dataDir string, commonStore *persistent.CommonStore, consensus consensus.Backend, ias ias.Endpoint) (*RuntimeConfig, error) { //nolint: gocyclo +func newConfig(dataDir string, commonStore *persistent.CommonStore, consensus consensus.Backend, ias []ias.Endpoint) (*RuntimeConfig, error) { //nolint: gocyclo var cfg RuntimeConfig haveSetRuntimes := len(config.GlobalConfig.Runtime.Paths) > 0 diff --git a/go/runtime/registry/registry.go b/go/runtime/registry/registry.go index 99b9fd5d100..3476bbd9f17 100644 --- a/go/runtime/registry/registry.go +++ b/go/runtime/registry/registry.go @@ -571,7 +571,7 @@ func newRuntime( } // New creates a new runtime registry. -func New(ctx context.Context, dataDir string, commonStore *persistent.CommonStore, consensus consensus.Backend, ias ias.Endpoint) (Registry, error) { +func New(ctx context.Context, dataDir string, commonStore *persistent.CommonStore, consensus consensus.Backend, ias []ias.Endpoint) (Registry, error) { cfg, err := newConfig(dataDir, commonStore, consensus, ias) if err != nil { return nil, err diff --git a/go/worker/common/worker.go b/go/worker/common/worker.go index 08a42bad9d1..0e4e97da069 100644 --- a/go/worker/common/worker.go +++ b/go/worker/common/worker.go @@ -10,7 +10,6 @@ import ( "github.com/oasisprotocol/oasis-core/go/config" consensus "github.com/oasisprotocol/oasis-core/go/consensus/api" control "github.com/oasisprotocol/oasis-core/go/control/api" - ias "github.com/oasisprotocol/oasis-core/go/ias/api" keymanagerApi "github.com/oasisprotocol/oasis-core/go/keymanager/api" p2p "github.com/oasisprotocol/oasis-core/go/p2p/api" runtimeRegistry "github.com/oasisprotocol/oasis-core/go/runtime/registry" @@ -29,7 +28,6 @@ type Worker struct { Consensus consensus.Backend LightClient consensus.LightClient P2P p2p.Service - IAS ias.Endpoint KeyManager keymanagerApi.Backend RuntimeRegistry runtimeRegistry.Registry @@ -192,7 +190,6 @@ func newWorker( consensus consensus.Backend, lightClient consensus.LightClient, p2p p2p.Service, - ias ias.Endpoint, keyManager keymanagerApi.Backend, rtRegistry runtimeRegistry.Registry, cfg Config, @@ -222,7 +219,6 @@ func newWorker( Consensus: consensus, LightClient: lightClient, P2P: p2p, - IAS: ias, KeyManager: keyManager, RuntimeRegistry: rtRegistry, runtimes: make(map[common.Namespace]*committee.Node), @@ -256,7 +252,6 @@ func New( consensus consensus.Backend, lightClient consensus.LightClient, p2p p2p.Service, - ias ias.Endpoint, keyManager keymanagerApi.Backend, runtimeRegistry runtimeRegistry.Registry, ) (*Worker, error) { @@ -277,7 +272,6 @@ func New( consensus, lightClient, p2p, - ias, keyManager, runtimeRegistry, *cfg,