Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hg: Add a cache for mercurial repositories. #372

Merged
merged 12 commits into from
Jun 12, 2024
2 changes: 1 addition & 1 deletion pkg/vendir/directory/directory.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func (d *Directory) Sync(syncOpts SyncOpts) (ctlconf.LockDirectory, error) {
lockDirContents.Git = &lock

case contents.Hg != nil:
hgSync := ctlhg.NewSync(*contents.Hg, NewInfoLog(d.ui), syncOpts.RefFetcher)
hgSync := ctlhg.NewSync(*contents.Hg, NewInfoLog(d.ui), syncOpts.RefFetcher, syncOpts.Cache)

d.ui.PrintLinef("Fetching: %s + %s (hg from %s)", d.opts.Path, contents.Path, hgSync.Desc())

Expand Down
118 changes: 83 additions & 35 deletions pkg/vendir/fetch/hg/hg.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ package hg

import (
"bytes"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"net/url"
Expand All @@ -21,12 +23,28 @@ type Hg struct {
opts ctlconf.DirectoryContentsHg
infoLog io.Writer
refFetcher ctlfetch.RefFetcher
authDir string
env []string
cacheID string
}

func NewHg(opts ctlconf.DirectoryContentsHg,
infoLog io.Writer, refFetcher ctlfetch.RefFetcher) *Hg {
infoLog io.Writer, refFetcher ctlfetch.RefFetcher,
tempArea ctlfetch.TempArea,
) (*Hg, error) {
t := Hg{opts, infoLog, refFetcher, "", nil, ""}
if err := t.setup(tempArea); err != nil {
return nil, err
}
return &t, nil
}

return &Hg{opts, infoLog, refFetcher}
// getCacheID returns a cache id for the repository
// It doesn't include the ref because we want to reuse a cache when only the ref
// is changed
// Basically we combine all data used to write the hgrc file
func (t *Hg) getCacheID() string {
return t.cacheID
}

//nolint:revive
Expand All @@ -35,27 +53,50 @@ type HgInfo struct {
ChangeSetTitle string
}

func (t *Hg) Retrieve(dstPath string, tempArea ctlfetch.TempArea) (HgInfo, error) {
if len(t.opts.URL) == 0 {
return HgInfo{}, fmt.Errorf("Expected non-empty URL")
// cloneHasTargetRef returns true if the given clone contains the target
// ref, and this ref is a revision id (not a tag or a branch)
func (t *Hg) cloneHasTargetRef(dstPath string) bool {
out, _, err := t.run([]string{"id", "--id", "-r", t.opts.Ref}, dstPath)
if err != nil {
return false
}
out = strings.TrimSpace(out)
if strings.HasPrefix(t.opts.Ref, out) {
return true
}
return false
}

err := t.fetch(dstPath, tempArea)
if err != nil {
func (t *Hg) clone(dstPath string) error {
if err := t.initClone(dstPath); err != nil {
return err
}
return t.syncClone(dstPath)
}

func (t *Hg) syncClone(dstPath string) error {
if _, _, err := t.run([]string{"pull"}, dstPath); err != nil {
return err
}
return nil
}

func (t *Hg) checkout(dstPath string) (HgInfo, error) {
if _, _, err := t.run([]string{"checkout", t.opts.Ref}, dstPath); err != nil {
return HgInfo{}, err
}

info := HgInfo{}

// use hg log to retrieve full cset sha
out, _, err := t.run([]string{"log", "-r", ".", "-T", "{node}"}, nil, dstPath)
out, _, err := t.run([]string{"log", "-r", ".", "-T", "{node}"}, dstPath)
if err != nil {
return HgInfo{}, err
}

info.SHA = strings.TrimSpace(out)

out, _, err = t.run([]string{"log", "-l", "1", "-T", "{desc|firstline|strip}", "-r", info.SHA}, nil, dstPath)
out, _, err = t.run([]string{"log", "-l", "1", "-T", "{desc|firstline|strip}", "-r", info.SHA}, dstPath)
if err != nil {
return HgInfo{}, err
}
Expand All @@ -65,7 +106,20 @@ func (t *Hg) Retrieve(dstPath string, tempArea ctlfetch.TempArea) (HgInfo, error
return info, nil
}

func (t *Hg) fetch(dstPath string, tempArea ctlfetch.TempArea) error {
func (t *Hg) Close() {
if t.authDir != "" {
os.RemoveAll(t.authDir)
t.authDir = ""
}
}

func (t *Hg) setup(tempArea ctlfetch.TempArea) error {
if len(t.opts.URL) == 0 {
return fmt.Errorf("Expected non-empty URL")
}

cacheID := t.opts.URL

authOpts, err := t.getAuthOpts()
if err != nil {
return err
Expand All @@ -76,17 +130,12 @@ func (t *Hg) fetch(dstPath string, tempArea ctlfetch.TempArea) error {
return err
}

defer os.RemoveAll(authDir)
t.authDir = authDir

env := os.Environ()
t.env = os.Environ()

hgURL := t.opts.URL

_, _, err = t.run([]string{"init"}, env, dstPath)
if err != nil {
return err
}

var hgRc string

if t.opts.Evolve {
Expand Down Expand Up @@ -147,39 +196,38 @@ hgauth.password = %s
if err != nil {
return fmt.Errorf("Writing %s: %s", hgRcPath, err)
}
env = append(env, "HGRCPATH="+hgRcPath)
t.env = append(t.env, "HGRCPATH="+hgRcPath)
}

sha := sha256.Sum256([]byte(cacheID))
t.cacheID = hex.EncodeToString(sha[:])

return nil
}

func (t *Hg) initClone(dstPath string) error {
hgURL := t.opts.URL

if _, _, err := t.run([]string{"init"}, dstPath); err != nil {
return err
}

repoHgRcPath := filepath.Join(dstPath, ".hg", "hgrc")

repoHgRc := fmt.Sprintf("[paths]\ndefault = %s\n", hgURL)

err = os.WriteFile(repoHgRcPath, []byte(repoHgRc), 0600)
if err != nil {
if err := os.WriteFile(repoHgRcPath, []byte(repoHgRc), 0600); err != nil {
return fmt.Errorf("Writing %s: %s", repoHgRcPath, err)
}

return t.runMultiple([][]string{
{"pull"},
{"checkout", t.opts.Ref},
}, env, dstPath)
}

func (t *Hg) runMultiple(argss [][]string, env []string, dstPath string) error {
for _, args := range argss {
_, _, err := t.run(args, env, dstPath)
if err != nil {
return err
}
}
return nil
}

func (t *Hg) run(args []string, env []string, dstPath string) (string, string, error) {
func (t *Hg) run(args []string, dstPath string) (string, string, error) {
var stdoutBs, stderrBs bytes.Buffer

cmd := exec.Command("hg", args...)
cmd.Env = env
cmd.Env = t.env
cmd.Dir = dstPath
cmd.Stdout = io.MultiWriter(t.infoLog, &stdoutBs)
cmd.Stderr = io.MultiWriter(t.infoLog, &stderrBs)
Expand Down
41 changes: 36 additions & 5 deletions pkg/vendir/fetch/hg/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,20 @@ import (

ctlconf "carvel.dev/vendir/pkg/vendir/config"
ctlfetch "carvel.dev/vendir/pkg/vendir/fetch"
ctlcache "carvel.dev/vendir/pkg/vendir/fetch/cache"
)

type Sync struct {
opts ctlconf.DirectoryContentsHg
log io.Writer
refFetcher ctlfetch.RefFetcher
cache ctlcache.Cache
}

func NewSync(opts ctlconf.DirectoryContentsHg,
log io.Writer, refFetcher ctlfetch.RefFetcher) Sync {
log io.Writer, refFetcher ctlfetch.RefFetcher, cache ctlcache.Cache) Sync {

return Sync{opts, log, refFetcher}
return Sync{opts, log, refFetcher, cache}
}

func (d Sync) Desc() string {
Expand All @@ -44,11 +46,40 @@ func (d Sync) Sync(dstPath string, tempArea ctlfetch.TempArea) (ctlconf.LockDire

defer os.RemoveAll(incomingTmpPath)

hg := NewHg(d.opts, d.log, d.refFetcher)
hg, err := NewHg(d.opts, d.log, d.refFetcher, tempArea)
if err != nil {
return hgLockConf, fmt.Errorf("Setting up hg: %w", err)
}
defer hg.Close()

if cachePath, ok := d.cache.Has("hg", hg.getCacheID()); ok {
// fetch from cachedDir
if err := d.cache.CopyFrom("hg", hg.getCacheID(), incomingTmpPath); err != nil {
return hgLockConf, fmt.Errorf("Extracting cached hg clone: %w", err)
}
// Sync if needed
if !hg.cloneHasTargetRef(cachePath) {
if err := hg.syncClone(incomingTmpPath); err != nil {
return hgLockConf, fmt.Errorf("Syncing hg repository: %w", err)
}
if err := d.cache.Save("hg", hg.getCacheID(), incomingTmpPath); err != nil {
return hgLockConf, fmt.Errorf("Saving hg repository to cache: %w", err)
}
}
} else {
// fetch in the target directory
if err := hg.clone(incomingTmpPath); err != nil {
return hgLockConf, fmt.Errorf("Cloning hg repository: %w", err)
}
if err := d.cache.Save("hg", hg.getCacheID(), incomingTmpPath); err != nil {
return hgLockConf, fmt.Errorf("Saving hg repository to cache: %w", err)
}
}

info, err := hg.Retrieve(incomingTmpPath, tempArea)
// now checkout the wanted revision
info, err := hg.checkout(incomingTmpPath)
if err != nil {
return hgLockConf, fmt.Errorf("Fetching hg repository: %s", err)
return hgLockConf, fmt.Errorf("Checking out hg repository: %s", err)
}

hgLockConf.SHA = info.SHA
Expand Down
6 changes: 6 additions & 0 deletions test/e2e/assets/hg-repos/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
``build.sh`` assemble a mercurial repository and more, suitable for testing the hg
fetcher cache feature:

- A base repository (repo)
- A bundle with an extra changeset
- A json file with the changeset ids we need in the test, and the bundle filename
Binary file added test/e2e/assets/hg-repos/asset.tgz
Binary file not shown.
44 changes: 44 additions & 0 deletions test/e2e/assets/hg-repos/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#!/bin/bash

rm -rf build

mkdir build
cd build

mkdir repo
cd repo

hg init .

echo "content1" > file1.txt

hg add file1.txt
hg commit -m "Added file1"
CSET1_ID=$(hg id --id)

hg tag first-tag
hg phase -p

hg topic "wip"
echo "content2" > file1.txt
hg commit -m "extra cset"
CSETX_ID=$(hg id --id)

hg strip -r .

BUNDLE=$(basename .hg/strip-backup/*-backup.hg)
mv .hg/strip-backup/$BUNDLE ..

hg checkout 00000

cd ..

cat > info.json <<EOF
{
"initial-changeset": "$CSET1_ID",
"extra-bundle": "$BUNDLE",
"extra-changeset": "$CSETX_ID"
}
EOF

tar caf ../asset.tgz .
Loading
Loading