-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #124 from appuio/find-inconsistent-data
Add check for facts with inconsistent fact/dimension time ranges.
- Loading branch information
Showing
7 changed files
with
306 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
package check | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"strings" | ||
|
||
"github.com/jmoiron/sqlx" | ||
"go.uber.org/multierr" | ||
|
||
"github.com/appuio/appuio-cloud-reporting/pkg/db" | ||
) | ||
|
||
var dimensionsWithTimeranges = []db.Model{ | ||
db.Discount{}, | ||
db.Product{}, | ||
db.Query{}, | ||
db.Tenant{}, | ||
} | ||
|
||
const inconsistentFactsQuery = ` | ||
select distinct '{{table}}' as "table", {{table}}.id as DimensionID, date_times.timestamp::text as FactTime, {{table}}.during::text as DimensionRange from facts | ||
inner join {{table}} on facts.{{foreign_key}} = {{table}}.id | ||
inner join date_times on facts.date_time_id = date_times.id | ||
where false = {{table}}.during @> date_times.timestamp | ||
` | ||
|
||
// InconsistentField represents an inconsistent field. | ||
type InconsistentField struct { | ||
Table string | ||
|
||
DimensionID string | ||
|
||
FactTime string | ||
DimensionRange string | ||
} | ||
|
||
// Inconsistent checks for facts with inconsistent time ranges. | ||
// Those are facts that reference a dimension with a time range that does not include the fact's timestamp. | ||
func Inconsistent(ctx context.Context, tx sqlx.QueryerContext) ([]InconsistentField, error) { | ||
var inconsistent []InconsistentField | ||
var errors []error | ||
for _, m := range dimensionsWithTimeranges { | ||
var ic []InconsistentField | ||
q := strings.NewReplacer("{{table}}", m.TableName(), "{{foreign_key}}", m.ForeignKeyName()).Replace(inconsistentFactsQuery) | ||
err := sqlx.SelectContext(ctx, tx, &ic, fmt.Sprintf(`WITH inconsistent AS (%s) SELECT * FROM inconsistent ORDER BY "table",FactTime`, q)) | ||
errors = append(errors, err) | ||
inconsistent = append(inconsistent, ic...) | ||
} | ||
|
||
return inconsistent, multierr.Combine(errors...) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
package check_test | ||
|
||
import ( | ||
"context" | ||
"fmt" | ||
"testing" | ||
"time" | ||
|
||
"github.com/jackc/pgtype" | ||
"github.com/jmoiron/sqlx" | ||
"github.com/stretchr/testify/require" | ||
"github.com/stretchr/testify/suite" | ||
|
||
"github.com/appuio/appuio-cloud-reporting/pkg/check" | ||
"github.com/appuio/appuio-cloud-reporting/pkg/db" | ||
"github.com/appuio/appuio-cloud-reporting/pkg/db/dbtest" | ||
) | ||
|
||
type InconsistentTestSuite struct { | ||
dbtest.Suite | ||
} | ||
|
||
func (s *InconsistentTestSuite) TestInconsistentFields() { | ||
t := s.T() | ||
tx := s.Begin() | ||
defer tx.Rollback() | ||
require.NoError(t, func() error { _, err := tx.Exec("set timezone to 'UTC';"); return err }()) | ||
|
||
m, err := check.Inconsistent(context.Background(), tx) | ||
require.NoError(t, err) | ||
require.Len(t, m, 0) | ||
|
||
expectedInconsistent := s.requireInconsistentTestEntries(t, tx) | ||
|
||
m, err = check.Inconsistent(context.Background(), tx) | ||
require.NoError(t, err) | ||
require.Equal(t, expectedInconsistent, m) | ||
} | ||
|
||
func (s *InconsistentTestSuite) requireInconsistentTestEntries(t *testing.T, tdb *sqlx.Tx) []check.InconsistentField { | ||
var category db.Category | ||
require.NoError(t, | ||
db.GetNamed(tdb, &category, | ||
"INSERT INTO categories (source,target) VALUES (:source,:target) RETURNING *", db.Category{ | ||
Source: "af-south-1:uroboros-research", | ||
})) | ||
|
||
at := time.Date(2023, time.January, 2, 3, 0, 0, 0, time.UTC) | ||
var dateTime db.DateTime | ||
require.NoError(t, | ||
db.GetNamed(tdb, &dateTime, | ||
"INSERT INTO date_times (timestamp, year, month, day, hour) VALUES (:timestamp, :year, :month, :day, :hour) RETURNING *", | ||
db.BuildDateTime(at), | ||
)) | ||
|
||
discountOutsideRange, err := db.CreateDiscount(tdb, db.Discount{ | ||
Source: "test_memory:us-rac-2", | ||
During: rangeOutsideDateTimes(), | ||
}) | ||
require.NoError(t, err) | ||
|
||
var tenantOutsideRange db.Tenant | ||
require.NoError(t, | ||
db.GetNamed(tdb, &tenantOutsideRange, | ||
"INSERT INTO tenants (source,during) VALUES (:source,:during) RETURNING *", db.Tenant{ | ||
Source: "tricell", | ||
During: rangeOutsideDateTimes(), | ||
})) | ||
|
||
var tenantInsideRange db.Tenant | ||
require.NoError(t, | ||
db.GetNamed(tdb, &tenantInsideRange, | ||
"INSERT INTO tenants (source,during) VALUES (:source,:during) RETURNING *", db.Tenant{ | ||
Source: "tricell", | ||
During: db.Timerange(db.MustTimestamp(at), db.MustTimestamp(at.Add(time.Hour))), | ||
})) | ||
|
||
var productOutsideRange db.Product | ||
require.NoError(t, | ||
db.GetNamed(tdb, &productOutsideRange, | ||
"INSERT INTO products (source,target,amount,unit,during) VALUES (:source,:target,:amount,:unit,:during) RETURNING *", db.Product{ | ||
Source: "test_memory:us-rac-2", | ||
During: rangeOutsideDateTimes(), | ||
})) | ||
|
||
var queryOutsideRange db.Query | ||
require.NoError(t, | ||
db.GetNamed(tdb, &queryOutsideRange, | ||
"INSERT INTO queries (name,query,unit,during) VALUES (:name,:query,:unit,:during) RETURNING *", db.Query{ | ||
Name: "test_memory", | ||
Query: "test_memory", | ||
Unit: "GiB", | ||
During: rangeOutsideDateTimes(), | ||
})) | ||
|
||
testFact := db.Fact{ | ||
DateTimeId: dateTime.Id, | ||
QueryId: queryOutsideRange.Id, | ||
TenantId: tenantOutsideRange.Id, | ||
CategoryId: category.Id, | ||
ProductId: productOutsideRange.Id, | ||
DiscountId: discountOutsideRange.Id, | ||
Quantity: 1, | ||
} | ||
createFact(t, tdb, testFact) | ||
testFact.TenantId = tenantInsideRange.Id | ||
createFact(t, tdb, testFact) | ||
|
||
formattedAt := at.Format(db.PGTimestampFormat) | ||
formattedRange := fmt.Sprintf("[\"%s\",\"%s\")", | ||
rangeOutsideDateTimes().Lower.Time.Format(db.PGTimestampFormat), | ||
rangeOutsideDateTimes().Upper.Time.Format(db.PGTimestampFormat), | ||
) | ||
return []check.InconsistentField{ | ||
{Table: "discounts", DimensionID: discountOutsideRange.Id, FactTime: formattedAt, DimensionRange: formattedRange}, | ||
{Table: "products", DimensionID: productOutsideRange.Id, FactTime: formattedAt, DimensionRange: formattedRange}, | ||
{Table: "queries", DimensionID: queryOutsideRange.Id, FactTime: formattedAt, DimensionRange: formattedRange}, | ||
{Table: "tenants", DimensionID: tenantOutsideRange.Id, FactTime: formattedAt, DimensionRange: formattedRange}, | ||
} | ||
} | ||
|
||
func rangeOutsideDateTimes() pgtype.Tstzrange { | ||
return db.Timerange( | ||
db.MustTimestamp(time.Date(2023, time.January, 2, 10, 0, 0, 0, time.UTC)), | ||
db.MustTimestamp(time.Date(2023, time.January, 2, 11, 0, 0, 0, time.UTC)), | ||
) | ||
} | ||
|
||
func createFact(t *testing.T, tx *sqlx.Tx, fact db.Fact) (rf db.Fact) { | ||
require.NoError(t, | ||
db.GetNamed(tx, &rf, | ||
"INSERT INTO facts (date_time_id,query_id,tenant_id,category_id,product_id,discount_id,quantity) VALUES (:date_time_id,:query_id,:tenant_id,:category_id,:product_id,:discount_id,:quantity) RETURNING *", fact)) | ||
return | ||
} | ||
|
||
func TestInconsistentTestSuite(t *testing.T) { | ||
suite.Run(t, new(InconsistentTestSuite)) | ||
} |
8 changes: 8 additions & 0 deletions
8
pkg/db/migrations/0014_enforce_date_times_timestamp_consistency.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
-- Timestamp duplicates the (year, month, day, hour) fields, but is more convenient to use. | ||
-- I'd delete the fields but that would be a pretty breaking change. | ||
-- So we just enforce consistency between the two fields. | ||
|
||
ALTER TABLE date_times | ||
ADD CONSTRAINT date_times_timestamp_check_consistency CHECK ( | ||
(date_times.year || '-' || date_times.month || '-' || date_times.day || ' ' || date_times.hour || ':00:00+00')::timestamptz = date_times.timestamp | ||
); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.