Skip to content

Commit

Permalink
Merge pull request #124 from appuio/find-inconsistent-data
Browse files Browse the repository at this point in the history
Add check for facts with inconsistent fact/dimension time ranges.
  • Loading branch information
bastjan authored Feb 28, 2023
2 parents 3b709ba + 1e4b5a6 commit 3e28def
Show file tree
Hide file tree
Showing 7 changed files with 306 additions and 17 deletions.
31 changes: 23 additions & 8 deletions check_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,32 @@ func (cmd *checkMissingCommand) execute(cliCtx *cli.Context) error {
if err != nil {
return err
}
inconsistent, err := check.Inconsistent(ctx, tx)
if err != nil {
return err
}

if len(missing) != 0 {
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
defer w.Flush()
fmt.Fprint(w, "Table\tMissing Field\tID\tSource\n")
for _, m := range missing {
fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", m.Table, m.MissingField, m.ID, m.Source)
}
}

if len(missing) == 0 {
return nil
if len(inconsistent) != 0 {
w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
defer w.Flush()
fmt.Fprint(w, "Table\tDimension ID\tFact Time\tDimension Time Range\n")
for _, i := range inconsistent {
fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", i.Table, i.DimensionID, i.FactTime, i.DimensionRange)
}
}

w := tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0)
defer w.Flush()
fmt.Fprint(w, "Table\tMissing Field\tID\tSource\n")
for _, m := range missing {
fmt.Fprintf(w, "%s\t%s\t%s\t%s\n", m.Table, m.MissingField, m.ID, m.Source)
if len(missing) == 0 && len(inconsistent) == 0 {
return cli.Exit("No missing or inconsistent data found.", 0)
}

return cli.Exit(fmt.Sprintf("%d missing entries found.", len(missing)), 1)
return cli.Exit(fmt.Sprintf("%d missing, %d inconsistent entries found.", len(missing), len(inconsistent)), 1)
}
52 changes: 52 additions & 0 deletions pkg/check/inconsistent.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
package check

import (
"context"
"fmt"
"strings"

"github.com/jmoiron/sqlx"
"go.uber.org/multierr"

"github.com/appuio/appuio-cloud-reporting/pkg/db"
)

var dimensionsWithTimeranges = []db.Model{
db.Discount{},
db.Product{},
db.Query{},
db.Tenant{},
}

const inconsistentFactsQuery = `
select distinct '{{table}}' as "table", {{table}}.id as DimensionID, date_times.timestamp::text as FactTime, {{table}}.during::text as DimensionRange from facts
inner join {{table}} on facts.{{foreign_key}} = {{table}}.id
inner join date_times on facts.date_time_id = date_times.id
where false = {{table}}.during @> date_times.timestamp
`

// InconsistentField represents an inconsistent field.
type InconsistentField struct {
Table string

DimensionID string

FactTime string
DimensionRange string
}

// Inconsistent checks for facts with inconsistent time ranges.
// Those are facts that reference a dimension with a time range that does not include the fact's timestamp.
func Inconsistent(ctx context.Context, tx sqlx.QueryerContext) ([]InconsistentField, error) {
var inconsistent []InconsistentField
var errors []error
for _, m := range dimensionsWithTimeranges {
var ic []InconsistentField
q := strings.NewReplacer("{{table}}", m.TableName(), "{{foreign_key}}", m.ForeignKeyName()).Replace(inconsistentFactsQuery)
err := sqlx.SelectContext(ctx, tx, &ic, fmt.Sprintf(`WITH inconsistent AS (%s) SELECT * FROM inconsistent ORDER BY "table",FactTime`, q))
errors = append(errors, err)
inconsistent = append(inconsistent, ic...)
}

return inconsistent, multierr.Combine(errors...)
}
138 changes: 138 additions & 0 deletions pkg/check/inconsistent_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
package check_test

import (
"context"
"fmt"
"testing"
"time"

"github.com/jackc/pgtype"
"github.com/jmoiron/sqlx"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/suite"

"github.com/appuio/appuio-cloud-reporting/pkg/check"
"github.com/appuio/appuio-cloud-reporting/pkg/db"
"github.com/appuio/appuio-cloud-reporting/pkg/db/dbtest"
)

type InconsistentTestSuite struct {
dbtest.Suite
}

func (s *InconsistentTestSuite) TestInconsistentFields() {
t := s.T()
tx := s.Begin()
defer tx.Rollback()
require.NoError(t, func() error { _, err := tx.Exec("set timezone to 'UTC';"); return err }())

m, err := check.Inconsistent(context.Background(), tx)
require.NoError(t, err)
require.Len(t, m, 0)

expectedInconsistent := s.requireInconsistentTestEntries(t, tx)

m, err = check.Inconsistent(context.Background(), tx)
require.NoError(t, err)
require.Equal(t, expectedInconsistent, m)
}

func (s *InconsistentTestSuite) requireInconsistentTestEntries(t *testing.T, tdb *sqlx.Tx) []check.InconsistentField {
var category db.Category
require.NoError(t,
db.GetNamed(tdb, &category,
"INSERT INTO categories (source,target) VALUES (:source,:target) RETURNING *", db.Category{
Source: "af-south-1:uroboros-research",
}))

at := time.Date(2023, time.January, 2, 3, 0, 0, 0, time.UTC)
var dateTime db.DateTime
require.NoError(t,
db.GetNamed(tdb, &dateTime,
"INSERT INTO date_times (timestamp, year, month, day, hour) VALUES (:timestamp, :year, :month, :day, :hour) RETURNING *",
db.BuildDateTime(at),
))

discountOutsideRange, err := db.CreateDiscount(tdb, db.Discount{
Source: "test_memory:us-rac-2",
During: rangeOutsideDateTimes(),
})
require.NoError(t, err)

var tenantOutsideRange db.Tenant
require.NoError(t,
db.GetNamed(tdb, &tenantOutsideRange,
"INSERT INTO tenants (source,during) VALUES (:source,:during) RETURNING *", db.Tenant{
Source: "tricell",
During: rangeOutsideDateTimes(),
}))

var tenantInsideRange db.Tenant
require.NoError(t,
db.GetNamed(tdb, &tenantInsideRange,
"INSERT INTO tenants (source,during) VALUES (:source,:during) RETURNING *", db.Tenant{
Source: "tricell",
During: db.Timerange(db.MustTimestamp(at), db.MustTimestamp(at.Add(time.Hour))),
}))

var productOutsideRange db.Product
require.NoError(t,
db.GetNamed(tdb, &productOutsideRange,
"INSERT INTO products (source,target,amount,unit,during) VALUES (:source,:target,:amount,:unit,:during) RETURNING *", db.Product{
Source: "test_memory:us-rac-2",
During: rangeOutsideDateTimes(),
}))

var queryOutsideRange db.Query
require.NoError(t,
db.GetNamed(tdb, &queryOutsideRange,
"INSERT INTO queries (name,query,unit,during) VALUES (:name,:query,:unit,:during) RETURNING *", db.Query{
Name: "test_memory",
Query: "test_memory",
Unit: "GiB",
During: rangeOutsideDateTimes(),
}))

testFact := db.Fact{
DateTimeId: dateTime.Id,
QueryId: queryOutsideRange.Id,
TenantId: tenantOutsideRange.Id,
CategoryId: category.Id,
ProductId: productOutsideRange.Id,
DiscountId: discountOutsideRange.Id,
Quantity: 1,
}
createFact(t, tdb, testFact)
testFact.TenantId = tenantInsideRange.Id
createFact(t, tdb, testFact)

formattedAt := at.Format(db.PGTimestampFormat)
formattedRange := fmt.Sprintf("[\"%s\",\"%s\")",
rangeOutsideDateTimes().Lower.Time.Format(db.PGTimestampFormat),
rangeOutsideDateTimes().Upper.Time.Format(db.PGTimestampFormat),
)
return []check.InconsistentField{
{Table: "discounts", DimensionID: discountOutsideRange.Id, FactTime: formattedAt, DimensionRange: formattedRange},
{Table: "products", DimensionID: productOutsideRange.Id, FactTime: formattedAt, DimensionRange: formattedRange},
{Table: "queries", DimensionID: queryOutsideRange.Id, FactTime: formattedAt, DimensionRange: formattedRange},
{Table: "tenants", DimensionID: tenantOutsideRange.Id, FactTime: formattedAt, DimensionRange: formattedRange},
}
}

func rangeOutsideDateTimes() pgtype.Tstzrange {
return db.Timerange(
db.MustTimestamp(time.Date(2023, time.January, 2, 10, 0, 0, 0, time.UTC)),
db.MustTimestamp(time.Date(2023, time.January, 2, 11, 0, 0, 0, time.UTC)),
)
}

func createFact(t *testing.T, tx *sqlx.Tx, fact db.Fact) (rf db.Fact) {
require.NoError(t,
db.GetNamed(tx, &rf,
"INSERT INTO facts (date_time_id,query_id,tenant_id,category_id,product_id,discount_id,quantity) VALUES (:date_time_id,:query_id,:tenant_id,:category_id,:product_id,:discount_id,:quantity) RETURNING *", fact))
return
}

func TestInconsistentTestSuite(t *testing.T) {
suite.Run(t, new(InconsistentTestSuite))
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
-- Timestamp duplicates the (year, month, day, hour) fields, but is more convenient to use.
-- I'd delete the fields but that would be a pretty breaking change.
-- So we just enforce consistency between the two fields.

ALTER TABLE date_times
ADD CONSTRAINT date_times_timestamp_check_consistency CHECK (
(date_times.year || '-' || date_times.month || '-' || date_times.day || ' ' || date_times.hour || ':00:00+00')::timestamptz = date_times.timestamp
);
75 changes: 75 additions & 0 deletions pkg/db/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,13 @@ import (
"github.com/jackc/pgtype"
)

type Model interface {
TableName() string
ForeignKeyName() string
}

var _ Model = Query{}

type Query struct {
Id string
ParentID sql.NullString `db:"parent_id"`
Expand All @@ -22,6 +29,14 @@ type Query struct {
subQueries []Query
}

func (q Query) TableName() string {
return "queries"
}

func (q Query) ForeignKeyName() string {
return "query_id"
}

// CreateQuery creates the given query
func CreateQuery(p NamedPreparer, in Query) (Query, error) {
var query Query
Expand All @@ -30,6 +45,8 @@ func CreateQuery(p NamedPreparer, in Query) (Query, error) {
return query, err
}

var _ Model = Tenant{}

type Tenant struct {
Id string

Expand All @@ -40,6 +57,16 @@ type Tenant struct {
During pgtype.Tstzrange
}

func (t Tenant) TableName() string {
return "tenants"
}

func (t Tenant) ForeignKeyName() string {
return "tenant_id"
}

var _ Model = Category{}

type Category struct {
Id string

Expand All @@ -48,6 +75,16 @@ type Category struct {
Target sql.NullString
}

func (c Category) TableName() string {
return "categories"
}

func (c Category) ForeignKeyName() string {
return "category_id"
}

var _ Model = Product{}

type Product struct {
Id string

Expand All @@ -61,6 +98,14 @@ type Product struct {
During pgtype.Tstzrange
}

func (p Product) TableName() string {
return "products"
}

func (p Product) ForeignKeyName() string {
return "product_id"
}

// CreateProduct creates the given product
func CreateProduct(p NamedPreparer, in Product) (Product, error) {
var product Product
Expand All @@ -69,6 +114,8 @@ func CreateProduct(p NamedPreparer, in Product) (Product, error) {
return product, err
}

var _ Model = Discount{}

type Discount struct {
Id string

Expand All @@ -80,6 +127,14 @@ type Discount struct {
During pgtype.Tstzrange
}

func (d Discount) TableName() string {
return "discounts"
}

func (d Discount) ForeignKeyName() string {
return "discount_id"
}

// CreateDiscount creates the given discount
func CreateDiscount(p NamedPreparer, in Discount) (Discount, error) {
var discount Discount
Expand All @@ -88,6 +143,8 @@ func CreateDiscount(p NamedPreparer, in Discount) (Discount, error) {
return discount, err
}

var _ Model = DateTime{}

type DateTime struct {
Id string

Expand All @@ -99,6 +156,16 @@ type DateTime struct {
Hour int
}

func (d DateTime) TableName() string {
return "date_times"
}

func (d DateTime) ForeignKeyName() string {
return "date_time_id"
}

var _ Model = Fact{}

type Fact struct {
Id string

Expand All @@ -112,6 +179,14 @@ type Fact struct {
Quantity float64
}

func (f Fact) TableName() string {
return "facts"
}

func (f Fact) ForeignKeyName() string {
return "fact_id"
}

// BuildDateTime builds a DateTime object from the given timestamp.
func BuildDateTime(ts time.Time) DateTime {
return DateTime{
Expand Down
Loading

0 comments on commit 3e28def

Please sign in to comment.