From d783b46162f0ba82bd3bf948232892bddb654669 Mon Sep 17 00:00:00 2001 From: Tim Burks Date: Tue, 23 Nov 2021 13:13:00 -0800 Subject: [PATCH] Improved bulk uploads of Google APIs. (#397) * To match Google proto style, get API ID, title, and description from Service Config. * Tighten/refactor service config requirement for uploaded proto-based APIs. * Use values from parsed Discovery documents when bulk-uploading Discovery docs. * Update gnostic dependency. * Remove "compute details" from discovery.sh and protos.sh demo scripts. * Improve robustness of bulk uploads. - Use filepath.Abs() to resolve relative path names used for bulk imports - Allow "AlreadyExists" as an acceptable error for CreateApi calls to avoid failing when concurrent create calls are made to replicated backends. * Code review/quality improvements from @seaneganx * Update demo/protos.sh for new proto spec naming convention --- cmd/registry/cmd/upload/bulk/discovery.go | 23 ++++- cmd/registry/cmd/upload/bulk/openapi.go | 10 +- cmd/registry/cmd/upload/bulk/protos.go | 114 ++++++++++++++++++---- demos/disco.sh | 6 -- demos/protos.sh | 24 ++--- go.mod | 3 +- go.sum | 6 +- 7 files changed, 134 insertions(+), 52 deletions(-) diff --git a/cmd/registry/cmd/upload/bulk/discovery.go b/cmd/registry/cmd/upload/bulk/discovery.go index b4d638aad..7b3304f51 100644 --- a/cmd/registry/cmd/upload/bulk/discovery.go +++ b/cmd/registry/cmd/upload/bulk/discovery.go @@ -24,6 +24,9 @@ import ( "github.com/apigee/registry/log" "github.com/apigee/registry/rpc" "github.com/spf13/cobra" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "gopkg.in/yaml.v2" discovery "github.com/google/gnostic/discovery" ) @@ -77,6 +80,7 @@ type uploadDiscoveryTask struct { versionID string specID string contents []byte + info DiscoveryInfo } func (task *uploadDiscoveryTask) String() string { @@ -111,12 +115,15 @@ func (task *uploadDiscoveryTask) createAPI(ctx context.Context) error { response, err := task.client.UpdateApi(ctx, &rpc.UpdateApiRequest{ Api: &rpc.Api{ Name: task.apiName(), - DisplayName: task.apiID, + DisplayName: task.info.Title, + Description: task.info.Description, }, AllowMissing: true, }) if err == nil { log.Debugf(ctx, "Updated %s", response.Name) + } else if status.Code(err) == codes.AlreadyExists { + log.Debugf(ctx, "Found %s", task.apiName()) } else { log.FromContext(ctx).WithError(err).Debugf("Failed to create API %s", task.apiName()) // Returning this error ends all tasks, which seems appropriate to @@ -205,7 +212,11 @@ func (task *uploadDiscoveryTask) fetchDiscoveryDoc() error { } task.contents, err = normalizeJSON(bytes) - return err + if err != nil { + return err + } + + return yaml.Unmarshal(task.contents, &task.info) } // Normalize JSON documents by remarshalling them to @@ -218,3 +229,11 @@ func normalizeJSON(bytes []byte) ([]byte, error) { } return json.MarshalIndent(m, "", " ") } + +// A subset of the discovery document useful for adding an API to the registry +type DiscoveryInfo struct { + Name string `yaml:"name"` + Title string `yaml:"title"` + Version string `yaml:"version"` + Description string `yaml:"description"` +} diff --git a/cmd/registry/cmd/upload/bulk/openapi.go b/cmd/registry/cmd/upload/bulk/openapi.go index 4eb3aa28f..efa2443d7 100644 --- a/cmd/registry/cmd/upload/bulk/openapi.go +++ b/cmd/registry/cmd/upload/bulk/openapi.go @@ -29,6 +29,8 @@ import ( "github.com/apigee/registry/log" "github.com/apigee/registry/rpc" "github.com/spf13/cobra" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" ) func openAPICommand(ctx context.Context) *cobra.Command { @@ -49,7 +51,11 @@ func openAPICommand(ctx context.Context) *cobra.Command { } for _, arg := range args { - scanDirectoryForOpenAPI(ctx, client, projectID, baseURI, arg) + path, err := filepath.Abs(arg) + if err != nil { + log.FromContext(ctx).WithError(err).Fatal("Invalid path") + } + scanDirectoryForOpenAPI(ctx, client, projectID, baseURI, path) } }, } @@ -175,6 +181,8 @@ func (task *uploadOpenAPITask) createAPI(ctx context.Context) error { }) if err == nil { log.Debugf(ctx, "Updated %s", response.Name) + } else if status.Code(err) == codes.AlreadyExists { + log.Debugf(ctx, "Found %s", task.apiName()) } else { log.FromContext(ctx).WithError(err).Debugf("Failed to create API %s", task.apiName()) // Returning this error ends all tasks, which seems appropriate to diff --git a/cmd/registry/cmd/upload/bulk/protos.go b/cmd/registry/cmd/upload/bulk/protos.go index 944b61872..670b10231 100644 --- a/cmd/registry/cmd/upload/bulk/protos.go +++ b/cmd/registry/cmd/upload/bulk/protos.go @@ -17,6 +17,7 @@ package bulk import ( "context" "fmt" + "io/ioutil" "os" "path" "path/filepath" @@ -28,6 +29,9 @@ import ( "github.com/apigee/registry/log" "github.com/apigee/registry/rpc" "github.com/spf13/cobra" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/status" + "gopkg.in/yaml.v2" ) func protosCommand(ctx context.Context) *cobra.Command { @@ -48,7 +52,11 @@ func protosCommand(ctx context.Context) *cobra.Command { } for _, arg := range args { - scanDirectoryForProtos(ctx, client, projectID, baseURI, arg) + path, err := filepath.Abs(arg) + if err != nil { + log.FromContext(ctx).WithError(err).Fatal("Invalid path") + } + scanDirectoryForProtos(ctx, client, projectID, baseURI, path) } }, } @@ -63,23 +71,37 @@ func scanDirectoryForProtos(ctx context.Context, client connection.Client, proje defer wait() dirPattern := regexp.MustCompile("v.*[1-9]+.*") - if err := filepath.Walk(directory, func(filepath string, info os.FileInfo, err error) error { + if err := filepath.Walk(directory, func(fullname string, info os.FileInfo, err error) error { if err != nil { return err } // Skip non-matching directories. - filename := path.Base(filepath) + filename := path.Base(fullname) if !info.IsDir() || !dirPattern.MatchString(filename) { return nil } + // The API Service Configuration contains important API properties. + serviceConfig, err := readServiceConfig(fullname) + if err != nil { + return err + } + + // Skip APIs with missing or incomplete service configurations + if serviceConfig == nil || serviceConfig.Title == "" || serviceConfig.Name == "" { + return nil + } + taskQueue <- &uploadProtoTask{ - client: client, - baseURI: baseURI, - projectID: projectID, - path: filepath, - directory: directory, + client: client, + baseURI: baseURI, + projectID: projectID, + apiID: strings.TrimSuffix(serviceConfig.Name, ".googleapis.com"), + apiTitle: serviceConfig.Title, + apiDescription: strings.ReplaceAll(serviceConfig.Documentation.Summary, "\n", " "), + path: fullname, + directory: directory, } return nil @@ -89,14 +111,16 @@ func scanDirectoryForProtos(ctx context.Context, client connection.Client, proje } type uploadProtoTask struct { - client connection.Client - baseURI string - projectID string - path string - directory string - apiID string // computed at runtime - versionID string // computed at runtime - specID string // computed at runtime + client connection.Client + baseURI string + projectID string + path string + directory string + apiID string + apiTitle string + apiDescription string + versionID string // computed at runtime + specID string // computed at runtime } func (task *uploadProtoTask) String() string { @@ -125,9 +149,6 @@ func (task *uploadProtoTask) Run(ctx context.Context) error { func (task *uploadProtoTask) populateFields() { parts := strings.Split(task.apiPath(), "/") - apiParts := parts[0 : len(parts)-1] - apiPart := strings.ReplaceAll(strings.Join(apiParts, "-"), "/", "-") - task.apiID = sanitize(apiPart) versionPart := parts[len(parts)-1] task.versionID = sanitize(versionPart) @@ -141,12 +162,15 @@ func (task *uploadProtoTask) createAPI(ctx context.Context) error { response, err := task.client.UpdateApi(ctx, &rpc.UpdateApiRequest{ Api: &rpc.Api{ Name: task.apiName(), - DisplayName: task.apiID, + DisplayName: task.apiTitle, + Description: task.apiDescription, }, AllowMissing: true, }) if err == nil { log.Debugf(ctx, "Updated %s", response.Name) + } else if status.Code(err) == codes.AlreadyExists { + log.Debugf(ctx, "Found %s", task.apiName()) } else { log.FromContext(ctx).WithError(err).Debugf("Failed to create API %s", task.apiName()) // Returning this error ends all tasks, which seems appropriate to @@ -236,7 +260,8 @@ func (task *uploadProtoTask) apiPath() string { } func (task *uploadProtoTask) fileName() string { - return "protos.zip" + parts := strings.Split(task.apiPath(), "/") + return strings.Join(parts, "-") + ".zip" } func (task *uploadProtoTask) zipContents() ([]byte, error) { @@ -248,3 +273,50 @@ func (task *uploadProtoTask) zipContents() ([]byte, error) { return contents.Bytes(), nil } + +type ServiceConfig struct { + Type string `yaml:"type"` + Name string `yaml:"name"` + Title string `yaml:"title"` + Documentation struct { + Summary string `yaml:"summary"` + } `yaml:"documentation"` +} + +// readServiceConfig scans a directory for a YAML file containing +// Google Service Configuration. Often other YAML files are present; +// these are ignored, and the Service Configuration file is recognized +// by the presence of a "type" field containing "google.api.Service". +// This expects (but does not verify) that there is only one such file +// in the directory. +func readServiceConfig(directory string) (*ServiceConfig, error) { + var serviceConfig *ServiceConfig + yamlPattern := regexp.MustCompile(`.*\.yaml`) + err := filepath.Walk(directory, func(fullname string, info os.FileInfo, err error) error { + if err != nil { + return err + } + // Skip everything that's not a YAML file. + filename := path.Base(fullname) + if info.IsDir() || !yamlPattern.MatchString(filename) { + return nil + } + bytes, err := ioutil.ReadFile(fullname) + if err != nil { + return err + } + sc := &ServiceConfig{} + err = yaml.Unmarshal(bytes, sc) + if err != nil { + return err + } + if sc.Type == "google.api.Service" { + serviceConfig = sc + } + return nil + }) + if err != nil { + return nil, err + } + return serviceConfig, nil +} diff --git a/demos/disco.sh b/demos/disco.sh index cec8a301b..2f4290525 100755 --- a/demos/disco.sh +++ b/demos/disco.sh @@ -41,12 +41,6 @@ apg admin create-project --project_id $PROJECT \ registry upload bulk discovery \ --project-id $PROJECT -# Now compute summary details of all of the APIs in the project. -# This will log errors if any of the API specs can't be parsed, -# but for every spec that is parsed, this will set the display name -# and description of the corresponding API from the values in the specs. -registry compute details projects/$PROJECT/locations/global/apis/- - # We can list the APIs with the following command: registry list projects/$PROJECT/locations/global/apis diff --git a/demos/protos.sh b/demos/protos.sh index 6fa3245c2..a7feb9ef3 100755 --- a/demos/protos.sh +++ b/demos/protos.sh @@ -46,12 +46,6 @@ registry upload bulk protos \ --project-id $PROJECT ~/Desktop/googleapis \ --base-uri https://github.com/googleapis/googleapis/blob/$COMMIT -# Now compute summary details of all of the APIs in the project. -# This will log errors if any of the API specs can't be parsed, -# but for every spec that is parsed, this will set the display name -# and description of the corresponding API from the values in the specs. -registry compute details projects/$PROJECT/locations/global/apis/- - # The `registry upload bulk protos` subcommand automatically generated API ids # from the path to the protos in the repo. List the APIs with the following command: registry list projects/$PROJECT/locations/global/apis @@ -69,22 +63,22 @@ registry list projects/$PROJECT/locations/global/apis/-/versions registry list projects/$PROJECT/locations/global/apis/-/versions/-/specs # To see more about an individual spec, use the `registry get` command: -registry get projects/$PROJECT/locations/global/apis/google-cloud-translate/versions/v3/specs/protos.zip +registry get projects/$PROJECT/locations/global/apis/translate/versions/v3/specs/google-cloud-translate-v3.zip # You can also get this with the automatically-generated `apg` command line tool: -apg registry get-api-spec --name projects/$PROJECT/locations/global/apis/google-cloud-translate/versions/v3/specs/protos.zip +apg registry get-api-spec --name projects/$PROJECT/locations/global/apis/translate/versions/v3/specs/google-cloud-translate-v3.zip # Add the `--json` flag to get this as JSON: -apg registry get-api-spec --name projects/$PROJECT/locations/global/apis/google-cloud-translate/versions/v3/specs/protos.zip --json +apg registry get-api-spec --name projects/$PROJECT/locations/global/apis/translate/versions/v3/specs/google-cloud-translate-v3.zip --json # You might notice that that didn't return the actual spec. That's because the spec contents # are accessed through a separate method that (when transcoded to HTTP) allows direct download # of spec contents. -apg registry get-api-spec-contents --name projects/$PROJECT/locations/global/apis/google-cloud-translate/versions/v3/specs/protos.zip +apg registry get-api-spec-contents --name projects/$PROJECT/locations/global/apis/translate/versions/v3/specs/google-cloud-translate-v3.zip # An easier way to get the bytes of the spec is to use `registry get` with the `--contents` flag. # This writes the bytes to stdout, so you probably want to redirect this to a file, as follows: -registry get projects/$PROJECT/locations/global/apis/google-cloud-translate/versions/v3/specs/protos.zip \ +registry get projects/$PROJECT/locations/global/apis/translate/versions/v3/specs/google-cloud-translate-v3.zip \ --contents > protos.zip # When you unzip this file, you'll find a directory hierarchy suitable for compiling with `protoc`. @@ -99,7 +93,7 @@ registry compute complexity projects/$PROJECT/locations/global/apis/-/versions/- registry list projects/$PROJECT/locations/global/apis/-/versions/-/specs/-/artifacts/complexity # We can use the `registry get` subcommand to read individual complexity records. -registry get projects/$PROJECT/locations/global/apis/google-cloud-translate/versions/v3/specs/protos.zip/artifacts/complexity +registry get projects/$PROJECT/locations/global/apis/translate/versions/v3/specs/google-cloud-translate-v3.zip/artifacts/complexity # The registry tool also supports exporting all of the complexity results to a Google sheet. # (The following command expects OAuth client credentials with access to the @@ -111,7 +105,7 @@ registry export sheet projects/$PROJECT/locations/global/apis/-/versions/-/specs registry compute vocabulary projects/$PROJECT/locations/global/apis/-/versions/-/specs/- # Vocabularies are also stored as artifacts associated with API specs. -registry get projects/$PROJECT/locations/global/apis/google-cloud-translate/versions/v3/specs/protos.zip/artifacts/vocabulary +registry get projects/$PROJECT/locations/global/apis/translate/versions/v3/specs/google-cloud-translate-v3.zip/artifacts/vocabulary # The registry command can perform set operations on vocabularies. # To find common terms in all Google speech-related APIs, use the following: @@ -144,6 +138,4 @@ registry export sheet projects/$PROJECT/locations/global/artifacts/vocabulary # Here we run the Google api-linter and compile summary statistics. registry compute lint projects/$PROJECT/locations/global/apis/-/versions/-/specs/- registry compute lintstats projects/$PROJECT/locations/global/apis/-/versions/-/specs/- --linter aip -registry compute lintstats projects/$PROJECT --linter aip - - +registry compute lintstats projects/$PROJECT --linter aip \ No newline at end of file diff --git a/go.mod b/go.mod index 7b7251219..a34fada4e 100644 --- a/go.mod +++ b/go.mod @@ -10,11 +10,10 @@ require ( github.com/ghodss/yaml v1.0.0 github.com/gogo/protobuf v1.3.2 github.com/google/cel-go v0.8.0 - github.com/google/gnostic v0.5.6 + github.com/google/gnostic v0.5.7 github.com/google/go-cmp v0.5.6 github.com/google/uuid v1.3.0 github.com/googleapis/gax-go/v2 v2.1.1 - github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249 github.com/spf13/cobra v1.2.1 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.7.0 diff --git a/go.sum b/go.sum index 902b08084..a26ca3871 100644 --- a/go.sum +++ b/go.sum @@ -227,8 +227,8 @@ github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ github.com/google/cel-go v0.8.0 h1:jmRSm4aSa1RaTH56DcbeCwv53jZE9SvdofV30s838WU= github.com/google/cel-go v0.8.0/go.mod h1:U7ayypeSkw23szu4GaQTPJGx66c20mx8JklMSxrmI1w= github.com/google/cel-spec v0.6.0/go.mod h1:Nwjgxy5CbjlPrtCWjeDjUyKMl8w41YBYGjsyDdqk0xA= -github.com/google/gnostic v0.5.6 h1:+1p4oIo3k0jN30tKKsacTJ5BPr+Tam/mY+3r9sfGxoE= -github.com/google/gnostic v0.5.6/go.mod h1:73MKFl6jIHelAJNaBGFzt3SPtZULs9dYrGFt8OiIsHQ= +github.com/google/gnostic v0.5.7 h1:0x05jSyQvEQj2s7X6zg9Mdk1Gf9vI2DzSBJbMIVdiMU= +github.com/google/gnostic v0.5.7/go.mod h1:73MKFl6jIHelAJNaBGFzt3SPtZULs9dYrGFt8OiIsHQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= @@ -468,8 +468,6 @@ github.com/nats-io/nkeys v0.1.3/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxzi github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs= github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno= -github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249 h1:NHrXEjTNQY7P0Zfx1aMrNhpgxHmow66XQtm0aQLY0AE= -github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8= github.com/oklog/oklog v0.3.2/go.mod h1:FCV+B7mhrz4o+ueLpx+KqkyXRGMWOYEvfiXtdGtbWGs= github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA= github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo=