Skip to content

Commit

Permalink
feat: Add optimization feature for composite cursors (#1)
Browse files Browse the repository at this point in the history
Current implementation of pagination on composite cursors is rather
inefficient on large tables even in the presence of index. Changes in
this commit add an optimization option that causes paginator to emit a
bit different WHERE condition that query optimizators have an easier
time optimizing.

We kept changes backward-compatible. We need to opt-in to get the new
feature enabled.
  • Loading branch information
tadeboro authored May 21, 2024
1 parent 5bdb970 commit ad1b943
Show file tree
Hide file tree
Showing 5 changed files with 178 additions and 25 deletions.
48 changes: 48 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,54 @@ Default options used by paginator when not specified:
- `Order`: `paginator.DESC`
- `TupleCmp`: `paginator.DISABLED`
When cursor uses more than one key/rule, paginator instances by default generate SQL that is compatible with almost all database management systems. But this query can be very inefficient and can result in a lot of database scans even when proper indices are in place. By enabling the `TupleCmp` option, paginator will emit a slightly different SQL query when all cursor keys are ordered in the same way.
For example, let us assume we have the following code:
```go
paginator.New(
paginator.WithKeys([]string{"CreatedAt", "ID"}),
paginate.WithAfter(after),
paginate.WithLimit(3),
).Paginate(db, &result)
```
The query that hits our database in this case would look something like this:
```sql
SELECT *
FROM orders
WHERE orders.created_at > $1
OR orders.created_at = $2 AND orders.id > $3
ORDER BY orders.created_at ASC, orders.id ASC
LIMIT 4
```
Even if we index our table on `(created_at, id)` columns, some database engines will still perform at least full index scan to get to the items we need. And this is the primary use case for tuple comparison optimization. If we enable optimization, our code would look something like this:
```go
paginator.New(
paginator.WithKeys([]string{"CreatedAt", "ID"}),
paginate.WithAfter(after),
paginate.WithLimit(3),
paginate.WithTupleCmp(paginate.ENABLED),
).Paginate(db, &result)
```
The query that hits our database now looks something like this:
```sql
SELECT *
FROM orders
WHERE (orders.created_at, orders.id) > ($1, $2)
ORDER BY orders.created_at ASC, orders.id ASC
LIMIT 4
```
In this case, if we have index on `(created_at, id)` columns, most DB angines will know how to optimize this query into a simple initial index lookup + scan, making cursor overhead neglible.
### paginator.Rule
- `Key`: Field name in target model struct.
Expand Down
5 changes: 4 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@ version: "3"
services:
postgres:
image: postgres:11-alpine
command:
- -c
- log_statement=all
ports:
- 8765:5432
environment:
POSTGRES_DB: test
POSTGRES_USER: test
POSTGRES_PASSWORD: test
POSTGRES_PASSWORD: test
37 changes: 28 additions & 9 deletions paginator/option.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
package paginator

type Flag string

const (
ENABLED Flag = "ENABLED"
DISABLED Flag = "DISABLED"
)

var defaultConfig = Config{
Keys: []string{"ID"},
Limit: 10,
Order: DESC,
Keys: []string{"ID"},
Limit: 10,
Order: DESC,
TupleCmp: DISABLED,
}

// Option for paginator
Expand All @@ -13,12 +21,13 @@ type Option interface {

// Config for paginator
type Config struct {
Rules []Rule
Keys []string
Limit int
Order Order
After string
Before string
Rules []Rule
Keys []string
Limit int
Order Order
After string
Before string
TupleCmp Flag
}

// Apply applies config to paginator
Expand All @@ -42,6 +51,9 @@ func (c *Config) Apply(p *Paginator) {
if c.Before != "" {
p.SetBeforeCursor(c.Before)
}
if c.TupleCmp != "" {
p.SetAllowTupleCmp(c.TupleCmp == ENABLED)
}
}

// WithRules configures rules for paginator
Expand Down Expand Up @@ -85,3 +97,10 @@ func WithBefore(c string) Option {
Before: c,
}
}

// WithAllowTupleCmp enables tuple comparison optimization
func WithTupleCmp(flag Flag) Option {
return &Config{
TupleCmp: flag,
}
}
75 changes: 60 additions & 15 deletions paginator/paginator.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,11 @@ func New(opts ...Option) *Paginator {

// Paginator a builder doing pagination
type Paginator struct {
cursor Cursor
rules []Rule
limit int
order Order
cursor Cursor
rules []Rule
limit int
order Order
allowTupleCmp bool
}

// SetRules sets paging rules
Expand Down Expand Up @@ -65,6 +66,11 @@ func (p *Paginator) SetBeforeCursor(beforeCursor string) {
p.cursor.Before = &beforeCursor
}

// SetAllowTupleCmp enables or disables tuple comparison optimization
func (p *Paginator) SetAllowTupleCmp(allow bool) {
p.allowTupleCmp = allow
}

// Paginate paginates data
func (p *Paginator) Paginate(db *gorm.DB, dest interface{}) (result *gorm.DB, c Cursor, err error) {
if err = p.validate(db, dest); err != nil {
Expand Down Expand Up @@ -221,13 +227,19 @@ func (p *Paginator) appendPagingQuery(db *gorm.DB, fields []interface{}) *gorm.D
stmt := db
stmt = stmt.Limit(p.limit + 1)
stmt = stmt.Order(p.buildOrderSQL())
if len(fields) > 0 {
stmt = stmt.Where(
p.buildCursorSQLQuery(),
p.buildCursorSQLQueryArgs(fields)...,
)

if len(fields) == 0 {
return stmt
}
return stmt

if p.allowTupleCmp && p.canOptimizePagingQuery() {
return stmt.Where(p.buildOptimizedCursorSQLQuery(), fields)
}

return stmt.Where(
p.buildCursorSQLQuery(),
p.buildCursorSQLQueryArgs(fields)...,
)
}

func (p *Paginator) buildOrderSQL() string {
Expand All @@ -246,11 +258,7 @@ func (p *Paginator) buildCursorSQLQuery() string {
queries := make([]string, len(p.rules))
query := ""
for i, rule := range p.rules {
operator := "<"
if (p.isForward() && rule.Order == ASC) ||
(p.isBackward() && rule.Order == DESC) {
operator = ">"
}
operator := p.getCmpOperator(rule.Order)
queries[i] = fmt.Sprintf("%s%s %s ?", query, rule.SQLRepr, operator)
query = fmt.Sprintf("%s%s = ? AND ", query, rule.SQLRepr)
}
Expand All @@ -259,6 +267,43 @@ func (p *Paginator) buildCursorSQLQuery() string {
return strings.Join(queries, " OR ")
}

// We can only optimize paging query if sorting orders are consistent across
// all columns used in cursor. This is a prerequisite for tuple comparison that
// optimized queries use.
func (p *Paginator) canOptimizePagingQuery() bool {
order := p.rules[0].Order

for _, rule := range p.rules {
if order != rule.Order {
return false
}
}

return true
}

func (p *Paginator) getCmpOperator(order Order) string {
if (p.isForward() && order == ASC) || (p.isBackward() && order == DESC) {
return ">"
}

return "<"
}

func (p *Paginator) buildOptimizedCursorSQLQuery() string {
names := make([]string, len(p.rules))

for i, rule := range p.rules {
names[i] = rule.SQLRepr
}

return fmt.Sprintf(
"(%s) %s ?",
strings.Join(names, ", "),
p.getCmpOperator(p.rules[0].Order),
)
}

func (p *Paginator) buildCursorSQLQueryArgs(fields []interface{}) (args []interface{}) {
for i := 1; i <= len(fields); i++ {
args = append(args, fields[:i]...)
Expand Down
38 changes: 38 additions & 0 deletions paginator/paginator_paginate_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ func (s *paginatorSuite) TestPaginateDefaultOptions() {
// * Key: ID
// * Limit: 10
// * Order: DESC
// * TupleCmp: DISABLED

var p1 []order
_, c, _ := New().Paginate(s.db, &p1)
Expand Down Expand Up @@ -215,6 +216,43 @@ func (s *paginatorSuite) TestPaginateMultipleKeys() {
s.assertForwardOnly(c)
}

func (s *paginatorSuite) TestPaginateMultipleKeysTupleCmp() {
now := time.Now()
// ordered by (CreatedAt desc, ID desc) -> 2, 3, 1
s.givenOrders([]order{
{ID: 1, CreatedAt: now},
{ID: 2, CreatedAt: now.Add(1 * time.Hour)},
{ID: 3, CreatedAt: now},
})

cfg := Config{
Keys: []string{"CreatedAt", "ID"},
Limit: 2,
TupleCmp: ENABLED,
}

var p1 []order
_, c, _ := New(&cfg).Paginate(s.db, &p1)
s.assertIDs(p1, 2, 3)
s.assertForwardOnly(c)

var p2 []order
_, c, _ = New(
&cfg,
WithAfter(*c.After),
).Paginate(s.db, &p2)
s.assertIDs(p2, 1)
s.assertBackwardOnly(c)

var p3 []order
_, c, _ = New(
&cfg,
WithBefore(*c.Before),
).Paginate(s.db, &p3)
s.assertIDs(p3, 2, 3)
s.assertForwardOnly(c)
}

func (s *paginatorSuite) TestPaginatePointerKey() {
s.givenOrders([]order{
{ID: 1, Remark: ptrStr("3")},
Expand Down

0 comments on commit ad1b943

Please sign in to comment.