Skip to content

Commit

Permalink
feat: New cmd for docs auto generation (#2096)
Browse files Browse the repository at this point in the history
## Relevant issue(s)

Resolves #2095

## Description

This change adds a new command for generating documents automatically.

It also includes fixes for some overlooked edge cases.
islamaliev authored Dec 8, 2023
1 parent 48d0c24 commit 81fea6e
Showing 8 changed files with 576 additions and 47 deletions.
34 changes: 34 additions & 0 deletions cmd/gendocs/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// Copyright 2023 Democratized Data Foundation
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

/*
gendocs is a tool to generate the collections' documents automatically.
*/
package main

import (
"os"

"github.com/sourcenetwork/defradb/config"
"github.com/sourcenetwork/defradb/tests/gen/cli"
)

func main() {
conf := config.DefaultConfig()
gendocsCmd := cli.MakeGenDocCommand(conf)
if err := gendocsCmd.Execute(); err != nil {
// this error is okay to discard because cobra
// logs any errors encountered during execution
//
// exiting with a non-zero status code signals
// that an error has ocurred during execution
os.Exit(1)
}
}
21 changes: 21 additions & 0 deletions tests/gen/cli/errors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Copyright 2023 Democratized Data Foundation
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package cli

import "github.com/sourcenetwork/defradb/errors"

const (
errInvalidDemandValue string = "invalid demand value"
)

func NewErrInvalidDemandValue(inner error) error {
return errors.Wrap(errInvalidDemandValue, inner)
}
159 changes: 159 additions & 0 deletions tests/gen/cli/gendocs.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
// Copyright 2023 Democratized Data Foundation
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package cli

import (
"context"
"encoding/json"
"io"
"strconv"
"strings"

"github.com/spf13/cobra"

"github.com/sourcenetwork/defradb/client"
"github.com/sourcenetwork/defradb/config"
"github.com/sourcenetwork/defradb/http"
"github.com/sourcenetwork/defradb/tests/gen"
)

const defaultBatchSize = 1000

func MakeGenDocCommand(cfg *config.Config) *cobra.Command {
var demandJSON string

var cmd = &cobra.Command{
Use: "gendocs --demand <demand_json>",
Short: "Automatically generates documents for existing collections.",
Long: `Automatically generates documents for existing collections.
Example: The following command generates 100 User documents and 500 Device documents:
gendocs --demand '{"User": 100, "Device": 500 }'`,
ValidArgs: []string{"demand"},
RunE: func(cmd *cobra.Command, args []string) error {
// cobra does not chain pre run calls so we have to run them again here
if err := loadConfig(cfg); err != nil {
return err
}
store, err := http.NewClient(cfg.API.Address)
if err != nil {
return err
}

demandMap := make(map[string]int)
err = json.Unmarshal([]byte(demandJSON), &demandMap)
if err != nil {
return NewErrInvalidDemandValue(err)
}

collections, err := store.GetAllCollections(cmd.Context())
if err != nil {
return err
}

opts := []gen.Option{}
for colName, numDocs := range demandMap {
opts = append(opts, gen.WithTypeDemand(colName, numDocs))
}
docs, err := gen.AutoGenerate(colsToDefs(collections), opts...)
if err != nil {
return err
}

out := cmd.OutOrStdout()
_, err = out.Write([]byte("Generated " + strconv.Itoa(len(docs)) +
" documents. Adding to collections...\n"))
if err != nil {
return err
}

batchOffset := 0
for batchOffset < len(docs) {
batchLen := defaultBatchSize
if batchOffset+batchLen > len(docs) {
batchLen = len(docs) - batchOffset
}

colDocsMap := groupDocsByCollection(docs[batchOffset : batchOffset+batchLen])

err = saveBatchToCollections(context.Background(), collections, colDocsMap)
if err != nil {
return err
}

err = reportSavedBatch(out, batchLen, colDocsMap)
if err != nil {
return err
}

batchOffset += batchLen
}

return nil
},
}
cmd.Flags().StringVarP(&demandJSON, "demand", "d", "", "Documents' demand in JSON format")

return cmd
}

func reportSavedBatch(out io.Writer, thisBatch int, colDocsMap map[string][]*client.Document) error {
reports := make([]string, 0, len(colDocsMap))
for colName, colDocs := range colDocsMap {
reports = append(reports, strconv.Itoa(len(colDocs))+" "+colName)
}

r := strings.Join(reports, ", ")
_, err := out.Write([]byte("Added " + strconv.Itoa(thisBatch) + " documents: " + r + "\n"))
return err
}

func saveBatchToCollections(
ctx context.Context,
collections []client.Collection,
colDocsMap map[string][]*client.Document,
) error {
for colName, colDocs := range colDocsMap {
for _, col := range collections {
if col.Description().Name == colName {
err := col.CreateMany(context.Background(), colDocs)
if err != nil {
return err
}
break
}
}
}
return nil
}

func groupDocsByCollection(docs []gen.GeneratedDoc) map[string][]*client.Document {
result := make(map[string][]*client.Document)
for _, doc := range docs {
result[doc.Col.Description.Name] = append(result[doc.Col.Description.Name], doc.Doc)
}
return result
}

func colsToDefs(cols []client.Collection) []client.CollectionDefinition {
var colDefs []client.CollectionDefinition
for _, col := range cols {
colDefs = append(colDefs, col.Definition())
}
return colDefs
}

func loadConfig(cfg *config.Config) error {
if err := cfg.LoadRootDirFromFlagOrDefault(); err != nil {
return err
}
return cfg.LoadWithRootdir(cfg.ConfigFileExists())
}
100 changes: 100 additions & 0 deletions tests/gen/cli/gendocs_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// Copyright 2023 Democratized Data Foundation
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package cli

import (
"bytes"
"io"
"testing"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

"github.com/sourcenetwork/defradb/cli"
"github.com/sourcenetwork/defradb/config"
"github.com/sourcenetwork/defradb/tests/gen"
)

func execAddSchemaCmd(t *testing.T, cfg *config.Config, schema string) {
rootCmd := cli.NewDefraCommand(cfg)
rootCmd.SetArgs([]string{"client", "schema", "add", schema})
err := rootCmd.Execute()
require.NoError(t, err)
}

func TestGendocsCmd_IfNoErrors_ReturnGenerationOutput(t *testing.T) {
cfg, _, close := startTestNode(t)
defer close()

execAddSchemaCmd(t, cfg, `
type User {
name: String
devices: [Device]
}
type Device {
model: String
owner: User
}`)

genDocsCmd := MakeGenDocCommand(cfg)
outputBuf := bytes.NewBufferString("")
genDocsCmd.SetOut(outputBuf)

genDocsCmd.SetArgs([]string{"--demand", `{"User": 3, "Device": 12}`})

err := genDocsCmd.Execute()
require.NoError(t, err)

out, err := io.ReadAll(outputBuf)
require.NoError(t, err)

outStr := string(out)
require.NoError(t, err)

assert.Contains(t, outStr, "15")
assert.Contains(t, outStr, "3")
assert.Contains(t, outStr, "12")
assert.Contains(t, outStr, "User")
assert.Contains(t, outStr, "Device")
}

func TestGendocsCmd_IfInvalidDemandValue_ReturnError(t *testing.T) {
cfg, _, close := startTestNode(t)
defer close()

execAddSchemaCmd(t, cfg, `
type User {
name: String
}`)

genDocsCmd := MakeGenDocCommand(cfg)
genDocsCmd.SetArgs([]string{"--demand", `{"User": invalid}`})

err := genDocsCmd.Execute()
require.ErrorContains(t, err, errInvalidDemandValue)
}

func TestGendocsCmd_IfInvalidConfig_ReturnError(t *testing.T) {
cfg, _, close := startTestNode(t)
defer close()

execAddSchemaCmd(t, cfg, `
type User {
name: String
}`)

genDocsCmd := MakeGenDocCommand(cfg)

genDocsCmd.SetArgs([]string{"--demand", `{"Unknown": 3}`})

err := genDocsCmd.Execute()
require.Error(t, err, gen.NewErrInvalidConfiguration(""))
}
Loading

0 comments on commit 81fea6e

Please sign in to comment.