Skip to content

Commit

Permalink
Restructuring the cache file/directory layout for better performance.
Browse files Browse the repository at this point in the history
  • Loading branch information
ola-rozenfeld committed Jan 31, 2024
1 parent 50846d6 commit efa688d
Showing 1 changed file with 29 additions and 15 deletions.
44 changes: 29 additions & 15 deletions go/pkg/diskcache/diskcache.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ const maxConcurrentRequests = 1000
// DiskCache is a local disk LRU CAS and Action Cache cache.
type DiskCache struct {
root string // path to the root directory of the disk cache.
prefixes sync.Map // set of existing 2-character digest prefixes.
maxCapacityBytes uint64 // if disk size exceeds this, old items will be evicted as needed.
mu sync.Mutex // protects the queue.
store sync.Map // map of keys to qitems.
Expand Down Expand Up @@ -125,15 +126,18 @@ func New(ctx context.Context, root string, maxCapacityBytes uint64) *DiskCache {
if d.IsDir() {
return nil
}
fname := d.Name()
k, err := res.getKeyFromFileName(fname)
// We use Git's directory/file naming structure as inspiration:
// https://git-scm.com/book/en/v2/Git-Internals-Git-Objects#:~:text=The%20subdirectory%20is%20named%20with%20the%20first%202%20characters%20of%20the%20SHA%2D1%2C%20and%20the%20filename%20is%20the%20remaining%2038%20characters.
subdir := filepath.Base(filepath.Dir(path))
res.prefixes.Store(subdir, true)
k, err := res.getKeyFromFileName(subdir + d.Name())
if err != nil {
log.Errorf("Error parsing cached file name %s: %v", fname, err)
log.Errorf("Error parsing cached file name %s: %v", path, err)
return nil
}
atime, err := GetLastAccessTime(filepath.Join(root, fname))
atime, err := GetLastAccessTime(path)
if err != nil {
log.Errorf("Error getting last accessed time of %s: %v", fname, err)
log.Errorf("Error getting last accessed time of %s: %v", path, err)
return nil
}
it := &qitem{
Expand All @@ -142,7 +146,7 @@ func New(ctx context.Context, root string, maxCapacityBytes uint64) *DiskCache {
}
size, err := res.getItemSize(k)
if err != nil {
log.Errorf("Error getting file size of %s: %v", fname, err)
log.Errorf("Error getting file size of %s: %v", path, err)
return nil
}
res.store.Store(k, it)
Expand Down Expand Up @@ -175,25 +179,25 @@ func (d *DiskCache) TotalSizeBytes() uint64 {
return uint64(atomic.LoadInt64(&d.sizeBytes))
}

// This function is defined in https://pkg.go.dev/strings#CutPrefix
// This function is defined in https://pkg.go.dev/strings#CutSuffix
// It is copy/pasted here as a hack, because I failed to upgrade the *Reclient* repo to the latest Go 1.20.7.
func CutPrefix(s, prefix string) (after string, found bool) {
if !strings.HasPrefix(s, prefix) {
func CutSuffix(s, suffix string) (before string, found bool) {
if !strings.HasSuffix(s, suffix) {
return s, false
}
return s[len(prefix):], true
return s[:len(s)-len(suffix)], true
}

func (d *DiskCache) getKeyFromFileName(fname string) (key, error) {
pair := strings.Split(fname, ".")
if len(pair) != 2 {
return key{}, fmt.Errorf("Expected file name in the form [ac_]hash/size, got %s", fname)
return key{}, fmt.Errorf("expected file name in the form [ac_]hash/size, got %s", fname)
}
size, err := strconv.ParseInt(pair[1], 10, 64)
if err != nil {
return key{}, fmt.Errorf("invalid size in digest %s: %s", fname, err)
}
hash, isAc := CutPrefix(pair[0], "ac_")
hash, isAc := CutSuffix(pair[0], "ac_")
dg, err := digest.New(hash, size)
if err != nil {
return key{}, fmt.Errorf("invalid digest from file name %s: %v", fname, err)
Expand All @@ -202,11 +206,18 @@ func (d *DiskCache) getKeyFromFileName(fname string) (key, error) {
}

func (d *DiskCache) getPath(k key) string {
prefix := ""
suffix := ""
if !k.isCas {
prefix = "ac_"
suffix = "_ac"
}
return filepath.Join(d.root, k.digest.Hash[:2], fmt.Sprintf("%s%s.%d", k.digest.Hash[2:], suffix, k.digest.Size))
}

func (d *DiskCache) maybeCreatePrefixDir(k key) {
prefix := k.digest.Hash[:2]
if _, loaded := d.prefixes.LoadOrStore(prefix, true); !loaded {
os.MkdirAll(filepath.Join(d.root, prefix), os.ModePerm)

Check failure on line 219 in go/pkg/diskcache/diskcache.go

View workflow job for this annotation

GitHub Actions / lint

Error return value of `os.MkdirAll` is not checked (errcheck)
}
return filepath.Join(d.root, fmt.Sprintf("%s%s.%d", prefix, k.digest.Hash, k.digest.Size))
}

func (d *DiskCache) StoreCas(dg digest.Digest, path string) error {
Expand All @@ -226,6 +237,7 @@ func (d *DiskCache) StoreCas(dg digest.Digest, path string) error {
d.mu.Lock()
heap.Push(d.queue, it)
d.mu.Unlock()
d.maybeCreatePrefixDir(it.key)
if err := copyFile(path, d.getPath(it.key), dg.Size); err != nil {
return err
}
Expand Down Expand Up @@ -259,6 +271,7 @@ func (d *DiskCache) StoreActionCache(dg digest.Digest, ar *repb.ActionResult) er
d.mu.Lock()
heap.Push(d.queue, it)
d.mu.Unlock()
d.maybeCreatePrefixDir(it.key)
if err := os.WriteFile(d.getPath(it.key), bytes, 0644); err != nil {
return err

Check failure on line 276 in go/pkg/diskcache/diskcache.go

View workflow job for this annotation

GitHub Actions / lint

error returned from external package is unwrapped: sig: func os.WriteFile(name string, data []byte, perm io/fs.FileMode) error (wrapcheck)
}
Expand Down Expand Up @@ -292,6 +305,7 @@ func (d *DiskCache) gc() {
}
atomic.AddInt64(&d.sizeBytes, -size)
it.mu.Lock()
// We only delete the files, and not the prefix directories, because the prefixes are not worth worrying about.
if err := os.Remove(d.getPath(it.key)); err != nil {
log.Errorf("Error removing file: %v", err)
}
Expand Down

0 comments on commit efa688d

Please sign in to comment.