From a6c2b8046c0d3fe72add0a63e6e94a36bfd0c16c Mon Sep 17 00:00:00 2001 From: Reilly Brogan Date: Thu, 13 Jun 2024 18:37:10 -0500 Subject: [PATCH] git: Fix submodule fetch, rewrite logic The logic for this was quite broken and had several failure cases due to that. This commit re-writes that to be saner. Changes: - Individual exec commands are all split out to separate functions so that they can be more easily composed. No function does more than a single command - Control flow now happens entirely in the Fetch() function - Before there were several failure states around submodules due to the `git switch` command happening immediately after the `git fetch` if the repo was already cloned. This is fixed by moving to a unified control flow. - `Fetch()` will now run either the initial clone OR update refs before moving onto further steps - `git clone` no longer attempts to initialize submodules. Several features from `git submodules` aren't yet implemented by `git clone` and so submodule initialization is moved entirely into a `git submodule` command - `git switch` now happens after the clone/fetch but before `git submodule` - `git switch` now no longer runs with any submodule-related flags. Like `git clone` the features we need are just not implemented yet and as far as I can tell there are subtle behavioral differences that were causing issues. - `git submodule` will now use `--filter=blob:none` to specify that submodule checkouts should themselves be blobless when possible. As far as I can tell this works correctly with `--recursive` and recursive submodules. - Because we are now checking out submodules with blobless clones when possible the `fixPermissions()` logic has been updated to account for promisor files that could be in submodule `.git` directories --- builder/source/git.go | 87 +++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 45 deletions(-) diff --git a/builder/source/git.go b/builder/source/git.go index 9c6d5cd..3ca5022 100644 --- a/builder/source/git.go +++ b/builder/source/git.go @@ -65,71 +65,62 @@ func NewGit(uri, ref string) (*GitSource, error) { return g, nil } -// submodules will handle setup of the git submodules after a -// reset has taken place. -func (g *GitSource) submodules() error { - cmd := exec.Command("git", "submodule", "update", "--init", "--recursive") +// clone shallow clones an upstream git repository to the local disk. +func (g *GitSource) clone() error { + // Create a blobless clone without checking out a ref + cmd := exec.Command("git", "clone", "--filter=blob:none", "--no-checkout", g.URI, g.ClonePath) - cmd.Dir = g.ClonePath cmd.Stdout = os.Stdout cmd.Stderr = os.Stdout return cmd.Run() } -// For some reason git blobless clones create pack files with 600 permissions. These break future operations -// as those files cannot be read by non-root users. Fix those permissions so things work as they should. -func (g *GitSource) fixPermissions() error { - cmd := exec.Command("bash", "-c", "chmod +r .git/objects/pack/*.promisor") +// updateRefs checks the upstream for new refs and tags in case we need them for future git commands. +func (g *GitSource) updateRefs() error { + // --tags: Update git tags as well + // --force: Force overwrite any refs locally (such as when upstream moves a tag) + cmd := exec.Command("git", "fetch", "--tags", "--force", "origin") cmd.Dir = g.ClonePath + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stdout return cmd.Run() } -// clone shallow clones an upstream git repository to the local disk. -func clone(uri, path, ref string) error { - var cmd *exec.Cmd +// switch will switch to the given ref. +func (g *GitSource) switchRef() error { + cmd := exec.Command("git", "switch", "--discard-changes", "--detach", g.Ref) - // Create a blobless clone without checking out a ref - initCmd := exec.Command("git", "clone", "--filter=blob:none", "--no-checkout", uri, path) - initCmd.Stdout = os.Stdout - initCmd.Stderr = os.Stdout + cmd.Dir = g.ClonePath + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stdout - if err := initCmd.Run(); err != nil { - return err - } + return cmd.Run() +} - // Checkout the ref we want - cmd = exec.Command("git", "switch", "--discard-changes", "--recurse-submodules", "--detach", ref) - cmd.Dir = path +// submodules will handle setup of the git submodules after a +// reset has taken place. +func (g *GitSource) submodules() error { + // --init initializes the submodule if it hasn't been initialized alredy + cmd := exec.Command("git", "submodule", "update", "--init", "--filter=blob:none", "--recursive") + cmd.Dir = g.ClonePath cmd.Stdout = os.Stdout cmd.Stderr = os.Stdout return cmd.Run() } -// reset fetches the new git reference (a tag or commit SHA1 hash) and -// hard resets the repository on that reference. -func reset(path, ref string) error { - fetchArgs := []string{ - "fetch", - "--tags", - "origin", - } - - fetchCmd := exec.Command("git", fetchArgs...) - fetchCmd.Dir = path - - if err := fetchCmd.Run(); err != nil { - return err - } +// For some reason git blobless clones create pack files with 600 permissions. These break future operations +// as those files cannot be read by non-root users. Fix those permissions so things work as they should. +func (g *GitSource) fixPermissions() error { + cmd := exec.Command("bash", "-c", `find . -name "*.promisor" -type f -exec chmod +r "{}" \;`) - resetCmd := exec.Command("git", "switch", "--discard-changes", "--recurse-submodules", "--detach", ref) - resetCmd.Dir = path + cmd.Dir = g.ClonePath - return resetCmd.Run() + return cmd.Run() } // Fetch will attempt to download the git tree locally. If it already exists @@ -137,18 +128,24 @@ func reset(path, ref string) error { func (g *GitSource) Fetch() error { // First things first, make sure we have a destination if !PathExists(g.ClonePath) { - if err := clone(g.URI, g.ClonePath, g.Ref); err != nil { + if err := g.clone(); err != nil { return err } } else { - // Repo already exists locally, try to reset to the new reference - if err := reset(g.ClonePath, g.Ref); err != nil { + // Repo already exists locally, get the latest refs from origin + if err := g.updateRefs(); err != nil { return err } } - // Check out submodules - err := g.submodules() + // Checkout the ref we want + err := g.switchRef() + if err != nil { + return err + } + + // Update or checkout submodules + err = g.submodules() if err != nil { return err }