Skip to content

Commit

Permalink
Improved bulk uploads of Google APIs. (#397)
Browse files Browse the repository at this point in the history
* To match Google proto style, get API ID, title, and description from Service Config.
* Tighten/refactor service config requirement for uploaded proto-based APIs.
* Use values from parsed Discovery documents when bulk-uploading Discovery docs.
* Update gnostic dependency.
* Remove "compute details" from discovery.sh and protos.sh demo scripts.
* Improve robustness of bulk uploads.
  - Use filepath.Abs() to resolve relative path names used for bulk imports
  - Allow "AlreadyExists" as an acceptable error for CreateApi calls to avoid failing when concurrent create calls are made to replicated backends.
* Code review/quality improvements from @seaneganx
* Update demo/protos.sh for new proto spec naming convention
  • Loading branch information
timburks authored Nov 23, 2021
1 parent 0e73a30 commit d783b46
Show file tree
Hide file tree
Showing 7 changed files with 134 additions and 52 deletions.
23 changes: 21 additions & 2 deletions cmd/registry/cmd/upload/bulk/discovery.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ import (
"github.com/apigee/registry/log"
"github.com/apigee/registry/rpc"
"github.com/spf13/cobra"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"gopkg.in/yaml.v2"

discovery "github.com/google/gnostic/discovery"
)
Expand Down Expand Up @@ -77,6 +80,7 @@ type uploadDiscoveryTask struct {
versionID string
specID string
contents []byte
info DiscoveryInfo
}

func (task *uploadDiscoveryTask) String() string {
Expand Down Expand Up @@ -111,12 +115,15 @@ func (task *uploadDiscoveryTask) createAPI(ctx context.Context) error {
response, err := task.client.UpdateApi(ctx, &rpc.UpdateApiRequest{
Api: &rpc.Api{
Name: task.apiName(),
DisplayName: task.apiID,
DisplayName: task.info.Title,
Description: task.info.Description,
},
AllowMissing: true,
})
if err == nil {
log.Debugf(ctx, "Updated %s", response.Name)
} else if status.Code(err) == codes.AlreadyExists {
log.Debugf(ctx, "Found %s", task.apiName())
} else {
log.FromContext(ctx).WithError(err).Debugf("Failed to create API %s", task.apiName())
// Returning this error ends all tasks, which seems appropriate to
Expand Down Expand Up @@ -205,7 +212,11 @@ func (task *uploadDiscoveryTask) fetchDiscoveryDoc() error {
}

task.contents, err = normalizeJSON(bytes)
return err
if err != nil {
return err
}

return yaml.Unmarshal(task.contents, &task.info)
}

// Normalize JSON documents by remarshalling them to
Expand All @@ -218,3 +229,11 @@ func normalizeJSON(bytes []byte) ([]byte, error) {
}
return json.MarshalIndent(m, "", " ")
}

// A subset of the discovery document useful for adding an API to the registry
type DiscoveryInfo struct {
Name string `yaml:"name"`
Title string `yaml:"title"`
Version string `yaml:"version"`
Description string `yaml:"description"`
}
10 changes: 9 additions & 1 deletion cmd/registry/cmd/upload/bulk/openapi.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ import (
"github.com/apigee/registry/log"
"github.com/apigee/registry/rpc"
"github.com/spf13/cobra"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)

func openAPICommand(ctx context.Context) *cobra.Command {
Expand All @@ -49,7 +51,11 @@ func openAPICommand(ctx context.Context) *cobra.Command {
}

for _, arg := range args {
scanDirectoryForOpenAPI(ctx, client, projectID, baseURI, arg)
path, err := filepath.Abs(arg)
if err != nil {
log.FromContext(ctx).WithError(err).Fatal("Invalid path")
}
scanDirectoryForOpenAPI(ctx, client, projectID, baseURI, path)
}
},
}
Expand Down Expand Up @@ -175,6 +181,8 @@ func (task *uploadOpenAPITask) createAPI(ctx context.Context) error {
})
if err == nil {
log.Debugf(ctx, "Updated %s", response.Name)
} else if status.Code(err) == codes.AlreadyExists {
log.Debugf(ctx, "Found %s", task.apiName())
} else {
log.FromContext(ctx).WithError(err).Debugf("Failed to create API %s", task.apiName())
// Returning this error ends all tasks, which seems appropriate to
Expand Down
114 changes: 93 additions & 21 deletions cmd/registry/cmd/upload/bulk/protos.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ package bulk
import (
"context"
"fmt"
"io/ioutil"
"os"
"path"
"path/filepath"
Expand All @@ -28,6 +29,9 @@ import (
"github.com/apigee/registry/log"
"github.com/apigee/registry/rpc"
"github.com/spf13/cobra"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"gopkg.in/yaml.v2"
)

func protosCommand(ctx context.Context) *cobra.Command {
Expand All @@ -48,7 +52,11 @@ func protosCommand(ctx context.Context) *cobra.Command {
}

for _, arg := range args {
scanDirectoryForProtos(ctx, client, projectID, baseURI, arg)
path, err := filepath.Abs(arg)
if err != nil {
log.FromContext(ctx).WithError(err).Fatal("Invalid path")
}
scanDirectoryForProtos(ctx, client, projectID, baseURI, path)
}
},
}
Expand All @@ -63,23 +71,37 @@ func scanDirectoryForProtos(ctx context.Context, client connection.Client, proje
defer wait()

dirPattern := regexp.MustCompile("v.*[1-9]+.*")
if err := filepath.Walk(directory, func(filepath string, info os.FileInfo, err error) error {
if err := filepath.Walk(directory, func(fullname string, info os.FileInfo, err error) error {
if err != nil {
return err
}

// Skip non-matching directories.
filename := path.Base(filepath)
filename := path.Base(fullname)
if !info.IsDir() || !dirPattern.MatchString(filename) {
return nil
}

// The API Service Configuration contains important API properties.
serviceConfig, err := readServiceConfig(fullname)
if err != nil {
return err
}

// Skip APIs with missing or incomplete service configurations
if serviceConfig == nil || serviceConfig.Title == "" || serviceConfig.Name == "" {
return nil
}

taskQueue <- &uploadProtoTask{
client: client,
baseURI: baseURI,
projectID: projectID,
path: filepath,
directory: directory,
client: client,
baseURI: baseURI,
projectID: projectID,
apiID: strings.TrimSuffix(serviceConfig.Name, ".googleapis.com"),
apiTitle: serviceConfig.Title,
apiDescription: strings.ReplaceAll(serviceConfig.Documentation.Summary, "\n", " "),
path: fullname,
directory: directory,
}

return nil
Expand All @@ -89,14 +111,16 @@ func scanDirectoryForProtos(ctx context.Context, client connection.Client, proje
}

type uploadProtoTask struct {
client connection.Client
baseURI string
projectID string
path string
directory string
apiID string // computed at runtime
versionID string // computed at runtime
specID string // computed at runtime
client connection.Client
baseURI string
projectID string
path string
directory string
apiID string
apiTitle string
apiDescription string
versionID string // computed at runtime
specID string // computed at runtime
}

func (task *uploadProtoTask) String() string {
Expand Down Expand Up @@ -125,9 +149,6 @@ func (task *uploadProtoTask) Run(ctx context.Context) error {

func (task *uploadProtoTask) populateFields() {
parts := strings.Split(task.apiPath(), "/")
apiParts := parts[0 : len(parts)-1]
apiPart := strings.ReplaceAll(strings.Join(apiParts, "-"), "/", "-")
task.apiID = sanitize(apiPart)

versionPart := parts[len(parts)-1]
task.versionID = sanitize(versionPart)
Expand All @@ -141,12 +162,15 @@ func (task *uploadProtoTask) createAPI(ctx context.Context) error {
response, err := task.client.UpdateApi(ctx, &rpc.UpdateApiRequest{
Api: &rpc.Api{
Name: task.apiName(),
DisplayName: task.apiID,
DisplayName: task.apiTitle,
Description: task.apiDescription,
},
AllowMissing: true,
})
if err == nil {
log.Debugf(ctx, "Updated %s", response.Name)
} else if status.Code(err) == codes.AlreadyExists {
log.Debugf(ctx, "Found %s", task.apiName())
} else {
log.FromContext(ctx).WithError(err).Debugf("Failed to create API %s", task.apiName())
// Returning this error ends all tasks, which seems appropriate to
Expand Down Expand Up @@ -236,7 +260,8 @@ func (task *uploadProtoTask) apiPath() string {
}

func (task *uploadProtoTask) fileName() string {
return "protos.zip"
parts := strings.Split(task.apiPath(), "/")
return strings.Join(parts, "-") + ".zip"
}

func (task *uploadProtoTask) zipContents() ([]byte, error) {
Expand All @@ -248,3 +273,50 @@ func (task *uploadProtoTask) zipContents() ([]byte, error) {

return contents.Bytes(), nil
}

type ServiceConfig struct {
Type string `yaml:"type"`
Name string `yaml:"name"`
Title string `yaml:"title"`
Documentation struct {
Summary string `yaml:"summary"`
} `yaml:"documentation"`
}

// readServiceConfig scans a directory for a YAML file containing
// Google Service Configuration. Often other YAML files are present;
// these are ignored, and the Service Configuration file is recognized
// by the presence of a "type" field containing "google.api.Service".
// This expects (but does not verify) that there is only one such file
// in the directory.
func readServiceConfig(directory string) (*ServiceConfig, error) {
var serviceConfig *ServiceConfig
yamlPattern := regexp.MustCompile(`.*\.yaml`)
err := filepath.Walk(directory, func(fullname string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// Skip everything that's not a YAML file.
filename := path.Base(fullname)
if info.IsDir() || !yamlPattern.MatchString(filename) {
return nil
}
bytes, err := ioutil.ReadFile(fullname)
if err != nil {
return err
}
sc := &ServiceConfig{}
err = yaml.Unmarshal(bytes, sc)
if err != nil {
return err
}
if sc.Type == "google.api.Service" {
serviceConfig = sc
}
return nil
})
if err != nil {
return nil, err
}
return serviceConfig, nil
}
6 changes: 0 additions & 6 deletions demos/disco.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,6 @@ apg admin create-project --project_id $PROJECT \
registry upload bulk discovery \
--project-id $PROJECT

# Now compute summary details of all of the APIs in the project.
# This will log errors if any of the API specs can't be parsed,
# but for every spec that is parsed, this will set the display name
# and description of the corresponding API from the values in the specs.
registry compute details projects/$PROJECT/locations/global/apis/-

# We can list the APIs with the following command:
registry list projects/$PROJECT/locations/global/apis

Expand Down
24 changes: 8 additions & 16 deletions demos/protos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -46,12 +46,6 @@ registry upload bulk protos \
--project-id $PROJECT ~/Desktop/googleapis \
--base-uri https://github.com/googleapis/googleapis/blob/$COMMIT

# Now compute summary details of all of the APIs in the project.
# This will log errors if any of the API specs can't be parsed,
# but for every spec that is parsed, this will set the display name
# and description of the corresponding API from the values in the specs.
registry compute details projects/$PROJECT/locations/global/apis/-

# The `registry upload bulk protos` subcommand automatically generated API ids
# from the path to the protos in the repo. List the APIs with the following command:
registry list projects/$PROJECT/locations/global/apis
Expand All @@ -69,22 +63,22 @@ registry list projects/$PROJECT/locations/global/apis/-/versions
registry list projects/$PROJECT/locations/global/apis/-/versions/-/specs

# To see more about an individual spec, use the `registry get` command:
registry get projects/$PROJECT/locations/global/apis/google-cloud-translate/versions/v3/specs/protos.zip
registry get projects/$PROJECT/locations/global/apis/translate/versions/v3/specs/google-cloud-translate-v3.zip

# You can also get this with the automatically-generated `apg` command line tool:
apg registry get-api-spec --name projects/$PROJECT/locations/global/apis/google-cloud-translate/versions/v3/specs/protos.zip
apg registry get-api-spec --name projects/$PROJECT/locations/global/apis/translate/versions/v3/specs/google-cloud-translate-v3.zip

# Add the `--json` flag to get this as JSON:
apg registry get-api-spec --name projects/$PROJECT/locations/global/apis/google-cloud-translate/versions/v3/specs/protos.zip --json
apg registry get-api-spec --name projects/$PROJECT/locations/global/apis/translate/versions/v3/specs/google-cloud-translate-v3.zip --json

# You might notice that that didn't return the actual spec. That's because the spec contents
# are accessed through a separate method that (when transcoded to HTTP) allows direct download
# of spec contents.
apg registry get-api-spec-contents --name projects/$PROJECT/locations/global/apis/google-cloud-translate/versions/v3/specs/protos.zip
apg registry get-api-spec-contents --name projects/$PROJECT/locations/global/apis/translate/versions/v3/specs/google-cloud-translate-v3.zip

# An easier way to get the bytes of the spec is to use `registry get` with the `--contents` flag.
# This writes the bytes to stdout, so you probably want to redirect this to a file, as follows:
registry get projects/$PROJECT/locations/global/apis/google-cloud-translate/versions/v3/specs/protos.zip \
registry get projects/$PROJECT/locations/global/apis/translate/versions/v3/specs/google-cloud-translate-v3.zip \
--contents > protos.zip

# When you unzip this file, you'll find a directory hierarchy suitable for compiling with `protoc`.
Expand All @@ -99,7 +93,7 @@ registry compute complexity projects/$PROJECT/locations/global/apis/-/versions/-
registry list projects/$PROJECT/locations/global/apis/-/versions/-/specs/-/artifacts/complexity

# We can use the `registry get` subcommand to read individual complexity records.
registry get projects/$PROJECT/locations/global/apis/google-cloud-translate/versions/v3/specs/protos.zip/artifacts/complexity
registry get projects/$PROJECT/locations/global/apis/translate/versions/v3/specs/google-cloud-translate-v3.zip/artifacts/complexity

# The registry tool also supports exporting all of the complexity results to a Google sheet.
# (The following command expects OAuth client credentials with access to the
Expand All @@ -111,7 +105,7 @@ registry export sheet projects/$PROJECT/locations/global/apis/-/versions/-/specs
registry compute vocabulary projects/$PROJECT/locations/global/apis/-/versions/-/specs/-

# Vocabularies are also stored as artifacts associated with API specs.
registry get projects/$PROJECT/locations/global/apis/google-cloud-translate/versions/v3/specs/protos.zip/artifacts/vocabulary
registry get projects/$PROJECT/locations/global/apis/translate/versions/v3/specs/google-cloud-translate-v3.zip/artifacts/vocabulary

# The registry command can perform set operations on vocabularies.
# To find common terms in all Google speech-related APIs, use the following:
Expand Down Expand Up @@ -144,6 +138,4 @@ registry export sheet projects/$PROJECT/locations/global/artifacts/vocabulary
# Here we run the Google api-linter and compile summary statistics.
registry compute lint projects/$PROJECT/locations/global/apis/-/versions/-/specs/-
registry compute lintstats projects/$PROJECT/locations/global/apis/-/versions/-/specs/- --linter aip
registry compute lintstats projects/$PROJECT --linter aip


registry compute lintstats projects/$PROJECT --linter aip
3 changes: 1 addition & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,10 @@ require (
github.com/ghodss/yaml v1.0.0
github.com/gogo/protobuf v1.3.2
github.com/google/cel-go v0.8.0
github.com/google/gnostic v0.5.6
github.com/google/gnostic v0.5.7
github.com/google/go-cmp v0.5.6
github.com/google/uuid v1.3.0
github.com/googleapis/gax-go/v2 v2.1.1
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249
github.com/spf13/cobra v1.2.1
github.com/spf13/pflag v1.0.5
github.com/stretchr/testify v1.7.0
Expand Down
6 changes: 2 additions & 4 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,8 @@ github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ
github.com/google/cel-go v0.8.0 h1:jmRSm4aSa1RaTH56DcbeCwv53jZE9SvdofV30s838WU=
github.com/google/cel-go v0.8.0/go.mod h1:U7ayypeSkw23szu4GaQTPJGx66c20mx8JklMSxrmI1w=
github.com/google/cel-spec v0.6.0/go.mod h1:Nwjgxy5CbjlPrtCWjeDjUyKMl8w41YBYGjsyDdqk0xA=
github.com/google/gnostic v0.5.6 h1:+1p4oIo3k0jN30tKKsacTJ5BPr+Tam/mY+3r9sfGxoE=
github.com/google/gnostic v0.5.6/go.mod h1:73MKFl6jIHelAJNaBGFzt3SPtZULs9dYrGFt8OiIsHQ=
github.com/google/gnostic v0.5.7 h1:0x05jSyQvEQj2s7X6zg9Mdk1Gf9vI2DzSBJbMIVdiMU=
github.com/google/gnostic v0.5.7/go.mod h1:73MKFl6jIHelAJNaBGFzt3SPtZULs9dYrGFt8OiIsHQ=
github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
Expand Down Expand Up @@ -468,8 +468,6 @@ github.com/nats-io/nkeys v0.1.3/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxzi
github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e h1:fD57ERR4JtEqsWbfPhv4DMiApHyliiK5xCTNVSPiaAs=
github.com/niemeyer/pretty v0.0.0-20200227124842-a10e7caefd8e/go.mod h1:zD1mROLANZcx1PVRCS0qkT7pwLkGfwJo4zjcN/Tysno=
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249 h1:NHrXEjTNQY7P0Zfx1aMrNhpgxHmow66XQtm0aQLY0AE=
github.com/nsf/jsondiff v0.0.0-20210926074059-1e845ec5d249/go.mod h1:mpRZBD8SJ55OIICQ3iWH0Yz3cjzA61JdqMLoWXeB2+8=
github.com/oklog/oklog v0.3.2/go.mod h1:FCV+B7mhrz4o+ueLpx+KqkyXRGMWOYEvfiXtdGtbWGs=
github.com/oklog/run v1.0.0/go.mod h1:dlhp/R75TPv97u0XWUtDeV/lRKWPKSdTuV0TZvrmrQA=
github.com/olekukonko/tablewriter v0.0.0-20170122224234-a0225b3f23b5/go.mod h1:vsDQFd/mU46D+Z4whnwzcISnGGzXWMclvtLoiIKAKIo=
Expand Down

0 comments on commit d783b46

Please sign in to comment.