remove cache dependency and fixup comments

chirst · Jun 26, 2024 · e813d15 · e813d15
1 parent 8955e19
commit e813d15
Show file tree

Hide file tree

Showing 5 changed files with 126 additions and 59 deletions.
diff --git a/go.mod b/go.mod
@@ -1,5 +1,3 @@
 module github.com/chirst/cdb
 
 go 1.22
-
-require github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da
diff --git a/go.sum b/go.sum
diff --git a/pager/cache/cache.go b/pager/cache/cache.go
@@ -0,0 +1,70 @@
+package cache
+
+import "slices"
+
+// lruPageCache implements pageCache
+type lruPageCache struct {
+	cache map[int][]byte
+	// evictList maintains an ordered list of keys currently in the cache. The
+	// list is ordered by the least recently used item at the 0th index of the
+	// list.
+	evictList []int
+	maxSize   int
+}
+
+// NewLRU creates a LRU (least recently used) cache. This cache takes a maxSize
+// which determines how many items can be cached. When the maximum size of the
+// cache is exceeded, the least recently used item will be evicted.
+func NewLRU(maxSize int) *lruPageCache {
+	return &lruPageCache{
+		cache:     map[int][]byte{},
+		evictList: []int{},
+		maxSize:   maxSize,
+	}
+}
+
+// Get returns a bool indicating if the key was found and the value for the key.
+func (c *lruPageCache) Get(key int) (value []byte, hit bool) {
+	v, ok := c.cache[key]
+	if !ok {
+		return nil, false
+	}
+	c.prioritize(key)
+	return v, true
+}
+
+// Add adds the key to the cache and prioritizes it. If a collision occurs, the
+// key will be prioritized and the value will be updated.
+func (c *lruPageCache) Add(key int, value []byte) {
+	if _, ok := c.cache[key]; ok {
+		c.prioritize(key)
+		c.cache[key] = value
+		return
+	}
+	if c.maxSize == len(c.cache) {
+		c.evict()
+	}
+	c.cache[key] = value
+	c.evictList = append(c.evictList, key)
+}
+
+// Remove removes the key from the cache. If the key is not found it will be
+// ignored.
+func (c *lruPageCache) Remove(key int) {
+	if _, ok := c.cache[key]; ok {
+		delete(c.cache, key)
+		i := slices.Index(c.evictList, key)
+		c.evictList = slices.Delete(c.evictList, i, i+1)
+	}
+}
+
+func (c *lruPageCache) prioritize(key int) {
+	i := slices.Index(c.evictList, key)
+	c.evictList = append(slices.Delete(c.evictList, i, i+1), key)
+}
+
+func (c *lruPageCache) evict() {
+	evictKey := c.evictList[0]
+	c.evictList = c.evictList[1:]
+	delete(c.cache, evictKey)
+}
diff --git a/pager/cache/cache_test.go b/pager/cache/cache_test.go
@@ -0,0 +1,38 @@
+package cache
+
+import "testing"
+
+func TestCache(t *testing.T) {
+	c := NewLRU(5)
+	c.Add(5, []byte{5})
+	c.Add(8, []byte{8})
+	c.Add(12, []byte{12})
+	c.Add(21, []byte{21})
+	c.Add(240, []byte{240})
+
+	c.Get(5)
+	c.Get(12)
+	c.Get(8)
+	c.Get(240)
+
+	c.Add(241, []byte{241})
+
+	if cl := len(c.cache); cl != 5 {
+		t.Fatalf("expected cache size 5 got %d", cl)
+	}
+	if _, ok := c.cache[5]; !ok {
+		t.Fatal("expected cache[5] to be ok")
+	}
+	if _, ok := c.cache[12]; !ok {
+		t.Fatal("expected cache[12] to be ok")
+	}
+	if _, ok := c.cache[8]; !ok {
+		t.Fatal("expected cache[8] to be ok")
+	}
+	if _, ok := c.cache[240]; !ok {
+		t.Fatal("expected cache[240] to be ok")
+	}
+	if _, ok := c.cache[241]; !ok {
+		t.Fatal("expected cache[241] to be ok")
+	}
+}
diff --git a/pager/pager.go b/pager/pager.go
@@ -3,31 +3,20 @@
 // memory. It also handles locking.
 package pager
 
-// TODO probably make this it's own package or better define public api
-// TODO pageCache should have different implementations that can be swapped.
-// Should also have customizable cache size. Could also have command to clear
-// cache.
 // TODO think about the similarities and differences between pageCache and
 // dirtyPages. Think about why the pageCache stores raw bytes of a page and the
 // dirtyPages stores a pointer to a page. Think about how caching should be
 // handled during a write. For instance, newPage hits dirtyPages, but not
 // pageCache.
-// TODO pager should have a catalogue in memory. When a write transaction is
-// started there should be a bit set by the opcode indicating whether or not the
-// transaction will update the catalogue. This means a write transaction started
-// and bit flipped. Right before the write transaction is closed the schema
-// cache will be repopulated. On startup the schema cache will also need to be
-// hydrated sometime soon after any pending journals are dealt with. This should
-// mean not having to make a bunch of stuff to create tables right away.
+// TODO try and remove specific integer types in favor of just int.
 
 import (
 	"bytes"
 	"encoding/binary"
-	"log"
 	"sort"
 	"sync"
 
-	"github.com/golang/groupcache/lru"
+	"github.com/chirst/cdb/pager/cache"
 )
 
 const (
@@ -52,6 +41,16 @@ const (
 	EMPTY_PARENT_PAGE_NUMBER = 0
 )
 
+// pageCache defines the page caching interface.
+type pageCache interface {
+	Get(pageNumber int) ([]byte, bool)
+	Add(key int, value []byte)
+	Remove(key int)
+}
+
+// Pager is an abstraction of the database file. Pager handles efficiently
+// accessing the file in a thread safe manner and atomically writing to the
+// file.
 type Pager struct {
 	// store implements storage and is typically a file
 	store storage
@@ -89,16 +88,17 @@ func New(useMemory bool) (*Pager, error) {
 	cmpb := make([]byte, FREE_PAGE_COUNTER_SIZE)
 	s.ReadAt(cmpb, FREE_PAGE_COUNTER_OFFSET)
 	cmpi := binary.LittleEndian.Uint16(cmpb)
+	// If the max page is the reserved page number the free page counter has not
+	// yet been set. Meaning the max page should probably be 1.
 	if cmpi == EMPTY_PARENT_PAGE_NUMBER {
-		// The max page cannot be the reserved page number
 		cmpi = 1
 	}
 	p := &Pager{
 		store:          s,
 		currentMaxPage: cmpi,
 		fileLock:       sync.RWMutex{},
 		dirtyPages:     []*Page{},
-		pageCache:      newLruPageCache(PAGE_CACHE_SIZE),
+		pageCache:      cache.NewLRU(PAGE_CACHE_SIZE),
 	}
 	p.GetPage(1)
 	return p, nil
@@ -130,7 +130,7 @@ func (p *Pager) EndWrite() error {
 	}
 	for _, fp := range p.dirtyPages {
 		p.WritePage(fp)
-		p.pageCache.remove(fp.GetNumber())
+		p.pageCache.Remove(int(fp.GetNumber()))
 	}
 	p.dirtyPages = []*Page{}
 	p.writeMaxPageNumber()
@@ -143,7 +143,7 @@ func (p *Pager) EndWrite() error {
 }
 
 func (p *Pager) GetPage(pageNumber uint16) *Page {
-	if v, hit := p.pageCache.get(pageNumber); hit {
+	if v, hit := p.pageCache.Get(int(pageNumber)); hit {
 		ap := p.allocatePage(pageNumber, v)
 		if p.isWriting {
 			p.dirtyPages = append(p.dirtyPages, ap)
@@ -157,7 +157,7 @@ func (p *Pager) GetPage(pageNumber uint16) *Page {
 	if p.isWriting {
 		p.dirtyPages = append(p.dirtyPages, ap)
 	}
-	p.pageCache.add(pageNumber, page)
+	p.pageCache.Add(int(pageNumber), page)
 	return ap
 }
 
@@ -410,40 +410,3 @@ func (p *Page) GetValue(key []byte) ([]byte, bool) {
 	}
 	return []byte{}, false
 }
-
-type pageCache interface {
-	get(pageNumber uint16) ([]byte, bool)
-	add(key uint16, value []byte)
-	remove(key uint16)
-}
-
-// lruPageCache implements pageCache
-type lruPageCache struct {
-	cache *lru.Cache
-}
-
-func (c *lruPageCache) get(key uint16) (value []byte, hit bool) {
-	v, ok := c.cache.Get(key)
-	if !ok {
-		return nil, false
-	}
-	vb, ok := v.([]byte)
-	if !ok {
-		log.Fatal("lru cache is not byte array")
-	}
-	return vb, true
-}
-
-func (c *lruPageCache) add(key uint16, value []byte) {
-	c.cache.Add(key, value)
-}
-
-func (c *lruPageCache) remove(key uint16) {
-	c.cache.Remove(key)
-}
-
-func newLruPageCache(maxSize int) *lruPageCache {
-	return &lruPageCache{
-		cache: lru.New(maxSize),
-	}
-}