Skip to content

Commit

Permalink
commitlog: add compaction cleaner (#120)
Browse files Browse the repository at this point in the history
  • Loading branch information
travisjeffery authored Apr 26, 2018
1 parent 6031a75 commit e2a8d10
Show file tree
Hide file tree
Showing 10 changed files with 308 additions and 68 deletions.
Binary file added cmd/jocko/jocko
Binary file not shown.
4 changes: 3 additions & 1 deletion commitlog/commitlog.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ type CommitLog struct {
}

type Options struct {
Path string
Path string
// MaxSegmentBytes is the max number of bytes a segment can contain, once the limit is hit a
// new segment will be split off.
MaxSegmentBytes int64
MaxLogBytes int64
}
Expand Down
3 changes: 1 addition & 2 deletions commitlog/commitlog_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,7 @@ var (
commitlog.NewMessageSet(0, msgs...),
commitlog.NewMessageSet(1, msgs...),
}
maxBytes = msgSets[0].Size()
path = filepath.Join(os.TempDir(), fmt.Sprintf("commitlogtest%d", rand.Int63()))
path = filepath.Join(os.TempDir(), fmt.Sprintf("commitlogtest%d", rand.Int63()))
)

func TestNewCommitLog(t *testing.T) {
Expand Down
80 changes: 80 additions & 0 deletions commitlog/compact_cleaner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package commitlog

import (
"github.com/cespare/xxhash"
)

type CompactCleaner struct {
// map from key hash to offset
m map[uint64]int64
}

func NewCompactCleaner() *CompactCleaner {
return &CompactCleaner{
m: make(map[uint64]int64),
}
}

func (c *CompactCleaner) Clean(segments []*Segment) (cleaned []*Segment, err error) {
if len(segments) == 0 {
return segments, nil
}

var ss *SegmentScanner
var ms MessageSet
var offset int64

// build the map of keys to their latest offsets
for _, segment := range segments {
ss = NewSegmentScanner(segment)

for ms, err = ss.Scan(); err == nil; ms, err = ss.Scan() {
offset = ms.Offset()
for _, msg := range ms.Messages() {
c.m[Hash(msg.Key())] = offset
}
}
}

// TODO: handle joining segments when they're smaller than max segment size
for _, ds := range segments {
ss = NewSegmentScanner(ds)

cs, err := NewSegment(ds.path, ds.BaseOffset, ds.maxBytes, cleanedSuffix)
if err != nil {
return nil, err
}

for ms, err = ss.Scan(); err == nil; ms, err = ss.Scan() {
var retain bool
offset = ms.Offset()
for _, msg := range ms.Messages() {
if c.m[Hash(msg.Key())] <= offset {
retain = true
}
}

if retain {
if _, err = cs.Write(ms); err != nil {
return nil, err
}
}
}

if err = cs.Replace(ds); err != nil {
return nil, err
}

cleaned = append(cleaned, cs)
}

return cleaned, nil
}

func Hash(b []byte) uint64 {
h := xxhash.New()
if _, err := h.Write(b); err != nil {
panic(err)
}
return h.Sum64()
}
117 changes: 117 additions & 0 deletions commitlog/compact_cleaner_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
package commitlog_test

import (
"os"
"testing"
"time"

"github.com/stretchr/testify/require"
"github.com/travisjeffery/jocko/commitlog"
"github.com/travisjeffery/jocko/protocol"
)

func TestCompactCleaner(t *testing.T) {
req := require.New(t)
var err error

var msgSets []commitlog.MessageSet
msgSets = append(msgSets, newMessageSet(0, &protocol.Message{
Key: []byte("travisjeffery"),
Value: []byte("one tj"),
MagicByte: 2,
Timestamp: time.Now(),
}))

msgSets = append(msgSets, newMessageSet(1, &protocol.Message{
Key: []byte("another"),
Value: []byte("one another"),
MagicByte: 2,
Timestamp: time.Now(),
}))

msgSets = append(msgSets, newMessageSet(2, &protocol.Message{
Key: []byte("travisjeffery"),
Value: []byte("two tj"),
MagicByte: 2,
Timestamp: time.Now(),
}))

msgSets = append(msgSets, newMessageSet(3, &protocol.Message{
Key: []byte("again another"),
Value: []byte("again another"),
MagicByte: 2,
Timestamp: time.Now(),
}))

path := os.TempDir()
defer os.RemoveAll(path)

opts := commitlog.Options{
Path: path,
MaxSegmentBytes: int64(len(msgSets[0]) + len(msgSets[1])),
MaxLogBytes: 1000,
}
l, err := commitlog.New(opts)
require.NoError(t, err)

for _, msgSet := range msgSets {
_, err = l.Append(msgSet)
require.NoError(t, err)
}

segments := l.Segments()
req.Equal(2, len(l.Segments()))
segment := segments[0]

scanner := commitlog.NewSegmentScanner(segment)
ms, err := scanner.Scan()
require.NoError(t, err)
require.Equal(t, msgSets[0], ms)

cc := commitlog.NewCompactCleaner()
cleaned, err := cc.Clean(segments)
req.NoError(err)
req.Equal(2, len(cleaned))

scanner = commitlog.NewSegmentScanner(cleaned[0])

var count int
for {
ms, err = scanner.Scan()
if err != nil {
break
}
req.Equal(1, len(ms.Messages()))
req.Equal([]byte("another"), ms.Messages()[0].Key())
req.Equal([]byte("one another"), ms.Messages()[0].Value())
count++
}
req.Equal(1, count)

scanner = commitlog.NewSegmentScanner(cleaned[1])
count = 0
for {
ms, err = scanner.Scan()
if err != nil {
break
}
req.Equal(1, len(ms.Messages()))
req.Equal([]byte("travisjeffery"), ms.Messages()[0].Key())
req.Equal([]byte("two tj"), ms.Messages()[0].Value())
count++
}
req.Equal(1, count)

}

func newMessageSet(offset uint64, pmsgs ...*protocol.Message) commitlog.MessageSet {
var cmsgs []commitlog.Message
for _, msg := range pmsgs {
b, err := protocol.Encode(msg)
if err != nil {
panic(err)
}
cmsgs = append(cmsgs, b)
}
return commitlog.NewMessageSet(offset, cmsgs...)
}
4 changes: 2 additions & 2 deletions commitlog/index_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import (
)

func TestIndex(t *testing.T) {
path := filepath.Join(os.TempDir(), fmt.Sprintf(indexNameFormat, rand.Int63()))
path := filepath.Join(os.TempDir(), fmt.Sprintf(fileFormat, rand.Int63(), indexSuffix))
totalEntries := rand.Intn(10) + 10
//case for roundDown
bytes := int64(totalEntries*entryWidth + 1)
Expand Down Expand Up @@ -72,7 +72,7 @@ func TestIndex(t *testing.T) {
}

func TestIndexScanner(t *testing.T) {
path := filepath.Join(os.TempDir(), fmt.Sprintf(indexNameFormat, rand.Int63()))
path := filepath.Join(os.TempDir(), fmt.Sprintf(fileFormat, rand.Int63(), indexSuffix))
totalEntries := rand.Intn(10) + 10
//case for roundDown
bytes := int64(totalEntries*entryWidth + 1)
Expand Down
12 changes: 4 additions & 8 deletions commitlog/message.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func (m Message) Value() []byte {

func (m Message) Size() int32 {
var size int32 = 4 + 1 + 1
if m.MagicByte() == 1 {
if m.MagicByte() > 0 {
size += 8
}
size += 4
Expand Down Expand Up @@ -71,13 +71,9 @@ func (m Message) keyOffsets() (start, end, size int32) {
}

func (m Message) valueOffsets() (start, end, size int32) {
keyStart, keyEnd, keySize := m.keyOffsets()
if keySize == -1 {
start = keyStart + 4
} else {
start = keyEnd + 1
}
_, keyEnd, _ := m.keyOffsets()
start = keyEnd
size = int32(Encoding.Uint32(m[start:]))
end = start + size
end = start + 4 + size
return
}
Loading

0 comments on commit e2a8d10

Please sign in to comment.