Skip to content

Commit

Permalink
Support sampling golden test json (#534)
Browse files Browse the repository at this point in the history
* Support sampling golden test json

* lint

* clean up

* cleanup

* codacy

* clean up

* add .sample to sampled json name

* resolve comments

* sample time series

* comment

* Sample place only
  • Loading branch information
shifucun authored Jun 9, 2021
1 parent 3d9550c commit 247bd31
Show file tree
Hide file tree
Showing 42 changed files with 18,753 additions and 192,234 deletions.
4 changes: 0 additions & 4 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,14 @@ require (
cloud.google.com/go/pubsub v1.10.3
cloud.google.com/go/storage v1.15.0
github.com/go-test/deep v1.0.7
github.com/golang/mock v1.5.0 // indirect
github.com/golang/protobuf v1.5.2
github.com/google/go-cmp v0.5.5
go.opencensus.io v0.23.0 // indirect
golang.org/x/net v0.0.0-20210505214959-0714010a04ed // indirect
golang.org/x/oauth2 v0.0.0-20210427180440-81ed05c6b58c
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
google.golang.org/api v0.46.0
google.golang.org/genproto v0.0.0-20210506142907-4a47615972c2
google.golang.org/grpc v1.37.0
google.golang.org/protobuf v1.26.0
honnef.co/go/tools v0.1.4 // indirect

)
36 changes: 1 addition & 35 deletions go.sum

Large diffs are not rendered by default.

126 changes: 126 additions & 0 deletions internal/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,18 @@ import (
"encoding/json"
"fmt"
"io/ioutil"
"math"
"math/rand"
"regexp"
"runtime"
"sort"
"strings"
"time"

"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/reflect/protoreflect"
)

const (
Expand Down Expand Up @@ -87,6 +91,23 @@ type TypePair struct {
Parent string
}

// SamplingStrategy represents the strategy to sample a JSON object.
//
// Sampling is performed uniform acroos the items for list, or the keys for
// map.For example, when MaxSample=4, sampling of [1,2,3,4,5,6,7,8,9]
// would give [3,5,7,9]
type SamplingStrategy struct {
// Maximum number of samples.
//
// -1 means sample all the data. A positive integer indicates the maximum
// number of samples.
MaxSample int
// Sampling strategy for the child fields.
Children map[string]*SamplingStrategy
// Proto fields or map keys that are not sampled at all.
Exclude []string
}

// ZipAndEncode Compresses the given contents using gzip and encodes it in base64
func ZipAndEncode(contents []byte) (string, error) {
// Zip the string
Expand Down Expand Up @@ -291,3 +312,108 @@ func MergeDedupe(s1 []string, s2 []string) []string {
}
return s1
}

// Sample constructs a sampled protobuf message based on the sampling strategy.
// The output is deterministic given the same strategy.
func Sample(m proto.Message, strategy *SamplingStrategy) proto.Message {
pr := m.ProtoReflect()
pr.Range(func(fd protoreflect.FieldDescriptor, value protoreflect.Value) bool {
fieldName := fd.JSONName()

// Clear the excluded fields
for _, ex := range strategy.Exclude {
if ex == fieldName {
pr.Clear(fd)
return true
}
}

// If a field is not in the sampling strategy, keep it.
strat, ok := strategy.Children[fieldName]
if !ok {
return true
}
// Note, map[string]proto.Message is treated as protoreflect.MessageKind,
// So here need to check field and list first.
if fd.IsList() {
// Sample list.
oldList := value.List()
length := oldList.Len()
var newList protoreflect.List

maxSample := strat.MaxSample
if strat.MaxSample == -1 || strat.MaxSample > length {
maxSample = length
}
inc := 1
if length > maxSample {
inc = int(math.Ceil(float64(length) / float64(maxSample)))
}
// Get the latest data first
for i := 0; i < maxSample; i++ {
ind := length - 1 - i*inc
if ind < 0 {
break
}
newList.Append(oldList.Get(ind))
}
pr.Set(fd, protoreflect.ValueOfList(newList))
} else if fd.IsMap() {
currMap := value.Map()
// Get all the keys
allKeys := []string{}
currMap.Range(func(k protoreflect.MapKey, v protoreflect.Value) bool {
// Excluded keys
for _, ex := range strat.Exclude {
if ex == k.String() {
return true
}
}
allKeys = append(allKeys, k.String())
return true
})
// Sort the keys
sort.Strings(allKeys)
// Sample keys
sampleKeys := map[string]struct{}{}

maxSample := strat.MaxSample
if strat.MaxSample == -1 || strat.MaxSample > len(allKeys) {
maxSample = len(allKeys)
}
inc := 1
if len(allKeys) > maxSample {
inc = int(math.Ceil(float64(len(allKeys)) / float64(maxSample)))
}
for i := 0; i < maxSample; i++ {
ind := len(allKeys) - 1 - i*inc
if ind < 0 {
break
}
sampleKeys[allKeys[ind]] = struct{}{}
}
// Clear un-sampled entries
currMap.Range(func(k protoreflect.MapKey, v protoreflect.Value) bool {
if _, ok := sampleKeys[k.String()]; !ok {
currMap.Clear(k)
}
return true
})
// If there are children strategy in a map, then apply this strategy to
// each value of the map.
if len(strat.Children) > 0 {
currMap.Range(func(k protoreflect.MapKey, v protoreflect.Value) bool {
Sample(v.Message().Interface(), strat)
return true
})
}

// Set the map
pr.Set(fd, protoreflect.ValueOfMap(currMap))
} else if fd.Kind() == protoreflect.MessageKind {
Sample(value.Message().Interface(), strat)
}
return true
})
return m
}
122 changes: 122 additions & 0 deletions internal/util/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@ package util
import (
"testing"

pb "github.com/datacommonsorg/mixer/internal/proto"
"github.com/google/go-cmp/cmp"
"google.golang.org/protobuf/reflect/protoreflect"
"google.golang.org/protobuf/testing/protocmp"
)

func TestZipAndEndocde(t *testing.T) {
Expand Down Expand Up @@ -78,3 +81,122 @@ func TestMergeDedupe(t *testing.T) {
}
}
}

func TestSample(t *testing.T) {
for _, c := range []struct {
input protoreflect.ProtoMessage
expected protoreflect.ProtoMessage
strategy *SamplingStrategy
}{
{
&pb.GetLandingPageDataResponse{
ChildPlacesType: "Country",
ChildPlaces: []string{
"geoId/12345",
"geoId/54321",
},
StatVarSeries: map[string]*pb.StatVarSeries{
"country/USA": {
Data: map[string]*pb.Series{
"stat-var-1": {
Val: map[string]float64{
"2011": 1010,
"2012": 1020,
"2013": 1030,
"2014": 1040,
"2015": 1050,
"2016": 1060,
},
},
},
},
"geoId/06": {
Data: map[string]*pb.Series{
"stat-var-1": {
Val: map[string]float64{
"2018": 300,
"2019": 400,
"2020": 500,
},
},
},
},
"geoId/11": {
Data: map[string]*pb.Series{
"stat-var-2": {
Val: map[string]float64{
"2019": 350,
"2020": 450,
},
},
},
},
},
},
&pb.GetLandingPageDataResponse{
ChildPlacesType: "Country",
ChildPlaces: []string{
"geoId/12345",
"geoId/54321",
},
StatVarSeries: map[string]*pb.StatVarSeries{
"country/USA": {
Data: map[string]*pb.Series{
"stat-var-1": {
Val: map[string]float64{
"2012": 1020,
"2014": 1040,
"2016": 1060,
},
},
},
},
"geoId/06": {
Data: map[string]*pb.Series{
"stat-var-1": {
Val: map[string]float64{
"2018": 300,
"2019": 400,
"2020": 500,
},
},
},
},
"geoId/11": {
Data: map[string]*pb.Series{
"stat-var-2": {
Val: map[string]float64{
"2019": 350,
"2020": 450,
},
},
},
},
},
},
&SamplingStrategy{
Children: map[string]*SamplingStrategy{
"statVarSeries": {
MaxSample: -1,
Children: map[string]*SamplingStrategy{
"data": {
MaxSample: -1,
Children: map[string]*SamplingStrategy{
"val": {
MaxSample: 3,
},
},
},
},
},
},
},
},
} {
got := Sample(c.input, c.strategy)
if diff := cmp.Diff(got, c.expected, protocmp.Transform()); diff != "" {
t.Errorf("Sample got diff %+v", diff)
}
}

}
6 changes: 1 addition & 5 deletions test/integration/bq_only_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,12 @@ package integration

import (
"context"
"io/ioutil"
"path"
"runtime"
"testing"

pb "github.com/datacommonsorg/mixer/internal/proto"
"github.com/google/go-cmp/cmp"
"google.golang.org/protobuf/encoding/protojson"
"google.golang.org/protobuf/testing/protocmp"
)

Expand Down Expand Up @@ -66,15 +64,13 @@ func TestSparql(t *testing.T) {
t.Errorf("could not Query: %v", err)
continue
}
goldenFile := path.Join(goldenPath, c.goldenFile)
if generateGolden {
updateGolden(resp, goldenPath, c.goldenFile)
continue
}

var expected pb.QueryResponse
file, _ := ioutil.ReadFile(goldenFile)
if err := protojson.Unmarshal(file, &expected); err != nil {
if err := readJSON(goldenPath, c.goldenFile, &expected); err != nil {
t.Errorf("Can not Unmarshal golden file %s: %v", c.goldenFile, err)
continue
}
Expand Down
Loading

0 comments on commit 247bd31

Please sign in to comment.