From 64f1ad80eef6ee27b748089e321ca5f28556ab7f Mon Sep 17 00:00:00 2001 From: Stefan Hengl Date: Fri, 27 Sep 2024 09:38:35 +0200 Subject: [PATCH] archive: use ModTime as proxy for LatestCommitDate (#836) This is motivated by #832 We use archive index in our e2e tests. In order to test our latest improvements to ranking, archive index needs to set the latest commit date. Test plan: - new unit test - I checked that the tar files downloaded from github have the correct mod time. --- internal/archive/archive.go | 8 +++- internal/archive/e2e_test.go | 78 ++++++++++++++++++++++++++++++++++-- internal/archive/index.go | 17 ++++++-- 3 files changed, 93 insertions(+), 10 deletions(-) diff --git a/internal/archive/archive.go b/internal/archive/archive.go index 2c8cb64fa..2048a25fb 100644 --- a/internal/archive/archive.go +++ b/internal/archive/archive.go @@ -11,6 +11,7 @@ import ( "net/url" "os" "strings" + "time" ) type Archive interface { @@ -20,8 +21,9 @@ type Archive interface { type File struct { io.ReadCloser - Name string - Size int64 + Name string + Size int64 + ModTime time.Time } type tarArchive struct { @@ -45,6 +47,7 @@ func (a *tarArchive) Next() (*File, error) { ReadCloser: io.NopCloser(a.tr), Name: hdr.Name, Size: hdr.Size, + ModTime: hdr.ModTime, }, nil } } @@ -71,6 +74,7 @@ func (a *zipArchive) Next() (*File, error) { ReadCloser: r, Name: f.Name, Size: int64(f.UncompressedSize64), + ModTime: f.Modified, }, nil } diff --git a/internal/archive/e2e_test.go b/internal/archive/e2e_test.go index 79b3ba538..b861eb09a 100644 --- a/internal/archive/e2e_test.go +++ b/internal/archive/e2e_test.go @@ -11,8 +11,12 @@ import ( "io" "log" "os" + "path/filepath" "strings" "testing" + "time" + + "github.com/stretchr/testify/require" "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/build" @@ -28,11 +32,18 @@ func TestMain(m *testing.M) { os.Exit(m.Run()) } +var modTime = time.Date(2024, 9, 26, 0, 0, 0, 0, time.UTC) + func writeArchive(w io.Writer, format string, files map[string]string) (err error) { if format == "zip" { zw := zip.NewWriter(w) for name, body := range files { - f, err := zw.Create(name) + header := &zip.FileHeader{ + Name: name, + Method: zip.Deflate, + Modified: modTime, + } + f, err := zw.CreateHeader(header) if err != nil { return err } @@ -63,9 +74,10 @@ func writeArchive(w io.Writer, format string, files map[string]string) (err erro for name, body := range files { hdr := &tar.Header{ - Name: name, - Mode: 0o600, - Size: int64(len(body)), + Name: name, + Mode: 0o600, + Size: int64(len(body)), + ModTime: modTime, } if err := tw.WriteHeader(hdr); err != nil { return err @@ -189,3 +201,61 @@ func testIndexIncrementally(t *testing.T, format string) { } } } + +// TestLatestCommitDate tests that the latest commit date is set correctly if +// the mod time of the files has been set during the archive creation. +func TestLatestCommitDate(t *testing.T) { + for _, format := range []string{"tar", "tgz", "zip"} { + t.Run(format, func(t *testing.T) { + testLatestCommitDate(t, format) + }) + } +} + +func testLatestCommitDate(t *testing.T, format string) { + // Create an archive + archive, err := os.CreateTemp("", "TestLatestCommitDate") + require.NoError(t, err) + defer os.Remove(archive.Name()) + + fileSize := 10 + files := map[string]string{} + for i := 0; i < 4; i++ { + s := fmt.Sprintf("%d", i) + files["F"+s] = strings.Repeat("a", fileSize) + files["!F"+s] = strings.Repeat("a", fileSize) + } + + err = writeArchive(archive, format, files) + if err != nil { + t.Fatalf("unable to create archive %v", err) + } + archive.Close() + + // Index + indexDir := t.TempDir() + bopts := build.Options{ + IndexDir: indexDir, + } + opts := Options{ + Archive: archive.Name(), + Name: "repo", + Branch: "master", + Commit: "cccccccccccccccccccccccccccccccccccccccc", + } + + err = Index(opts, bopts) + require.NoError(t, err) + + // Read the metadata of the index we just created and check the latest commit date. + f, err := os.Open(indexDir) + require.NoError(t, err) + + indexFiles, err := f.Readdirnames(1) + require.Len(t, indexFiles, 1) + + repos, _, err := zoekt.ReadMetadataPath(filepath.Join(indexDir, indexFiles[0])) + require.NoError(t, err) + require.Len(t, repos, 1) + require.True(t, repos[0].LatestCommitDate.Equal(modTime)) +} diff --git a/internal/archive/index.go b/internal/archive/index.go index 2262c9521..c8836768f 100644 --- a/internal/archive/index.go +++ b/internal/archive/index.go @@ -7,6 +7,7 @@ import ( "io" "net/url" "strings" + "sync" "github.com/sourcegraph/zoekt" "github.com/sourcegraph/zoekt/build" @@ -113,14 +114,22 @@ func Index(opts Options, bopts build.Options) error { defer a.Close() bopts.RepositoryDescription.Source = opts.Archive - builder, err := build.NewBuilder(bopts) - if err != nil { - return err - } + var builder *build.Builder + once := sync.Once{} + var onceErr error add := func(f *File) error { defer f.Close() + once.Do(func() { + // We use the ModTime of the first file as a proxy for the latest commit date. + bopts.RepositoryDescription.LatestCommitDate = f.ModTime + builder, onceErr = build.NewBuilder(bopts) + }) + if onceErr != nil { + return onceErr + } + contents, err := io.ReadAll(f) if err != nil { return err