Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduced sha256 support for git-sizer #109

Closed
wants to merge 14 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion git-sizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ func mainImplementation(ctx context.Context, stdout, stderr io.Writer, args []st

// Try to open the repository, but it's not an error yet if this
// fails, because the user might only be asking for `--help`.
repo, repoErr := git.NewRepository(".")
repo, repoErr := git.NewRepositoryFromPath(".")

flags := pflag.NewFlagSet("git-sizer", pflag.ContinueOnError)
flags.Usage = func() {
Expand Down
124 changes: 99 additions & 25 deletions git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"bytes"
"errors"
"fmt"
"io/fs"
"os"
"os/exec"
"path/filepath"
Expand All @@ -15,26 +16,32 @@ type ObjectType string

// Repository represents a Git repository on disk.
type Repository struct {
path string
// gitDir is the path to the `GIT_DIR` for this repository. It
// might be absolute or it might be relative to the current
// directory.
gitDir string

// gitBin is the path of the `git` executable that should be used
// when running commands in this repository.
gitBin string
gitBin string
hashAlgo HashAlgo
}

// smartJoin returns the path that can be described as `relPath`
// relative to `path`, given that `path` is either absolute or is
// relative to the current directory.
// smartJoin returns `relPath` if it is an absolute path. If not, it
// assumes that `relPath` is relative to `path`, so it joins them
// together and returns the result. In that case, if `path` itself is
// relative, then the return value is also relative.
func smartJoin(path, relPath string) string {
if filepath.IsAbs(relPath) {
return relPath
}
return filepath.Join(path, relPath)
}

// NewRepository creates a new repository object that can be used for
// running `git` commands within that repository.
func NewRepository(path string) (*Repository, error) {
// NewRepositoryFromGitDir creates a new `Repository` object that can
// be used for running `git` commands, given the value of `GIT_DIR`
// for the repository.
func NewRepositoryFromGitDir(gitDir string) (*Repository, error) {
// Find the `git` executable to be used:
gitBin, err := findGitBin()
if err != nil {
Expand All @@ -43,6 +50,44 @@ func NewRepository(path string) (*Repository, error) {
)
}

hashAlgo := HashSHA1
//nolint:gosec // `gitDir` is the path we need Git to access.
cmd := exec.Command(gitBin, "--git-dir", gitDir, "rev-parse", "--show-object-format")
if out, err := cmd.Output(); err == nil {
if string(bytes.TrimSpace(out)) == "sha256" {
hashAlgo = HashSHA256
}
}

repo := Repository{
gitDir: gitDir,
gitBin: gitBin,
hashAlgo: hashAlgo,
}

full, err := repo.IsFull()
if err != nil {
return nil, fmt.Errorf("determining whether the repository is a full clone: %w", err)
}
if !full {
return nil, errors.New("this appears to be a shallow clone; full clone required")
}

return &repo, nil
}

// NewRepositoryFromPath creates a new `Repository` object that can be
// used for running `git` commands within `path`. It does so by asking
// `git` what `GIT_DIR` to use. Git, in turn, bases its decision on
// the path and the environment.
func NewRepositoryFromPath(path string) (*Repository, error) {
gitBin, err := findGitBin()
if err != nil {
return nil, fmt.Errorf(
"could not find 'git' executable (is it in your PATH?): %w", err,
)
}

//nolint:gosec // `gitBin` is chosen carefully, and `path` is the
// path to the repository.
cmd := exec.Command(gitBin, "-C", path, "rev-parse", "--git-dir")
Expand All @@ -63,25 +108,28 @@ func NewRepository(path string) (*Repository, error) {
}
gitDir := smartJoin(path, string(bytes.TrimSpace(out)))

//nolint:gosec // `gitBin` is chosen carefully.
cmd = exec.Command(gitBin, "rev-parse", "--git-path", "shallow")
cmd.Dir = gitDir
out, err = cmd.Output()
return NewRepositoryFromGitDir(gitDir)
}

// IsFull returns `true` iff `repo` appears to be a full clone.
func (repo *Repository) IsFull() (bool, error) {
shallow, err := repo.GitPath("shallow")
if err != nil {
return nil, fmt.Errorf(
"could not run 'git rev-parse --git-path shallow': %w", err,
)
return false, err
}
shallow := smartJoin(gitDir, string(bytes.TrimSpace(out)))

_, err = os.Lstat(shallow)
if err == nil {
return nil, errors.New("this appears to be a shallow clone; full clone required")
return false, nil
}

if !errors.Is(err, fs.ErrNotExist) {
return false, err
}

return &Repository{
path: gitDir,
gitBin: gitBin,
}, nil
// The `shallow` file is absent, which is what we expect
// for a full clone.
return true, nil
}

func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd {
Expand All @@ -103,15 +151,41 @@ func (repo *Repository) GitCommand(callerArgs ...string) *exec.Cmd {

cmd.Env = append(
os.Environ(),
"GIT_DIR="+repo.path,
"GIT_DIR="+repo.gitDir,
// Disable grafts when running our commands:
"GIT_GRAFT_FILE="+os.DevNull,
)

return cmd
}

// Path returns the path to `repo`.
func (repo *Repository) Path() string {
return repo.path
// GitDir returns the path to `repo`'s `GIT_DIR`. It might be absolute
// or it might be relative to the current directory.
func (repo *Repository) GitDir() string {
return repo.gitDir
}

// GitPath returns that path of a file within the git repository, by
// calling `git rev-parse --git-path $relPath`. The returned path is
// relative to the current directory.
func (repo *Repository) GitPath(relPath string) (string, error) {
cmd := repo.GitCommand("rev-parse", "--git-path", relPath)
out, err := cmd.Output()
if err != nil {
return "", fmt.Errorf(
"running 'git rev-parse --git-path %s': %w", relPath, err,
)
}
// `git rev-parse --git-path` is documented to return the path
// relative to the current directory. Since we haven't changed the
// current directory, we can use it as-is:
return string(bytes.TrimSpace(out)), nil
}

func (repo *Repository) HashAlgo() HashAlgo {
return repo.hashAlgo
}

func (repo *Repository) HashSize() int {
return repo.hashAlgo.HashSize()
}
33 changes: 24 additions & 9 deletions git/git_bin.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,41 @@ package git

import (
"path/filepath"
"sync"

"github.com/cli/safeexec"
)

// This variable will be used to memoize the result of `findGitBin()`,
// since its return value only depends on the environment.
var gitBinMemo struct {
once sync.Once

gitBin string
err error
}

// findGitBin finds the `git` binary in PATH that should be used by
// the rest of `git-sizer`. It uses `safeexec` to find the executable,
// because on Windows, `exec.Cmd` looks not only in PATH, but also in
// the current directory. This is a potential risk if the repository
// being scanned is hostile and non-bare because it might possibly
// contain an executable file named `git`.
func findGitBin() (string, error) {
gitBin, err := safeexec.LookPath("git")
if err != nil {
return "", err
}
gitBinMemo.once.Do(func() {
p, err := safeexec.LookPath("git")
if err != nil {
gitBinMemo.err = err
return
}

gitBin, err = filepath.Abs(gitBin)
if err != nil {
return "", err
}
p, err = filepath.Abs(p)
if err != nil {
gitBinMemo.err = err
return
}

return gitBin, nil
gitBinMemo.gitBin = p
})
return gitBinMemo.gitBin, gitBinMemo.err
}
8 changes: 4 additions & 4 deletions git/gitconfig.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,10 @@ func (config *Config) FullKey(key string) string {
// a component boundary (i.e., at a '.'). If yes, it returns `true`
// and the part of the key after the prefix; e.g.:
//
// configKeyMatchesPrefix("foo.bar", "foo") → true, "bar"
// configKeyMatchesPrefix("foo.bar", "foo.") → true, "bar"
// configKeyMatchesPrefix("foo.bar", "foo.bar") → true, ""
// configKeyMatchesPrefix("foo.bar", "foo.bar.") → false, ""
// configKeyMatchesPrefix("foo.bar", "foo") → true, "bar"
// configKeyMatchesPrefix("foo.bar", "foo.") → true, "bar"
// configKeyMatchesPrefix("foo.bar", "foo.bar") → true, ""
// configKeyMatchesPrefix("foo.bar", "foo.bar.") → false, ""
func configKeyMatchesPrefix(key, prefix string) (bool, string) {
if prefix == "" {
return true, key
Expand Down
6 changes: 3 additions & 3 deletions git/obj_iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func (repo *Repository) NewObjectIter(ctx context.Context) (*ObjectIter, error)
errCh: make(chan error),
headerCh: make(chan BatchHeader),
}

hashHexSize := repo.HashSize() * 2
iter.p.Add(
// Read OIDs from `iter.oidCh` and write them to `git
// rev-list`:
Expand Down Expand Up @@ -68,10 +68,10 @@ func (repo *Repository) NewObjectIter(ctx context.Context) (*ObjectIter, error)
pipe.LinewiseFunction(
"copy-oids",
func(_ context.Context, _ pipe.Env, line []byte, stdout *bufio.Writer) error {
if len(line) < 40 {
if len(line) < hashHexSize {
return fmt.Errorf("line too short: '%s'", line)
}
if _, err := stdout.Write(line[:40]); err != nil {
if _, err := stdout.Write(line[:hashHexSize]); err != nil {
return fmt.Errorf("writing OID to 'git cat-file': %w", err)
}
if err := stdout.WriteByte('\n'); err != nil {
Expand Down
4 changes: 2 additions & 2 deletions git/obj_resolver.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ func (repo *Repository) ResolveObject(name string) (OID, error) {
cmd := repo.GitCommand("rev-parse", "--verify", "--end-of-options", name)
output, err := cmd.Output()
if err != nil {
return NullOID, fmt.Errorf("resolving object %q: %w", name, err)
return repo.HashAlgo().NullOID(), fmt.Errorf("resolving object %q: %w", name, err)
}
oidString := string(bytes.TrimSpace(output))
oid, err := NewOID(oidString)
if err != nil {
return NullOID, fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err)
return repo.HashAlgo().NullOID(), fmt.Errorf("parsing output %q from 'rev-parse': %w", oidString, err)
}
return oid, nil
}
63 changes: 54 additions & 9 deletions git/oid.go
Original file line number Diff line number Diff line change
@@ -1,31 +1,76 @@
package git

import (
"bytes"
//nolint:gosec // Git indeed does use SHA-1, still
"crypto/sha1"
"crypto/sha256"
"encoding/hex"
"errors"
)

const (
HashSizeSHA256 = sha256.Size
HashSizeSHA1 = sha1.Size
HashSizeMax = HashSizeSHA256
)

type HashAlgo int

const (
HashUnknown HashAlgo = iota
HashSHA1
HashSHA256
)

// OID represents the SHA-1 object ID of a Git object, in binary
// format.
type OID struct {
v [20]byte
v [HashSizeMax]byte
hashSize int
}

// NullOID is the null object ID; i.e., all zeros.
var NullOID OID
func (h HashAlgo) NullOID() OID {
switch h {
case HashSHA1:
return OID{hashSize: HashSizeSHA1}
case HashSHA256:
return OID{hashSize: HashSizeSHA256}
}
return OID{}
}

func (h HashAlgo) HashSize() int {
switch h {
case HashSHA1:
return HashSizeSHA1
case HashSHA256:
return HashSizeSHA256
}
return 0
}

// defaultNullOID is the null object ID; i.e., all zeros.
var defaultNullOID OID

func IsNullOID(o OID) bool {
return bytes.Equal(o.v[:], defaultNullOID.v[:])
}

// OIDFromBytes converts a byte slice containing an object ID in
// binary format into an `OID`.
func OIDFromBytes(oidBytes []byte) (OID, error) {
var oid OID
if len(oidBytes) != len(oid.v) {
oidSize := len(oidBytes)
if oidSize != HashSizeSHA1 && oidSize != HashSizeSHA256 {
return OID{}, errors.New("bytes oid has the wrong length")
}
copy(oid.v[0:20], oidBytes)
oid.hashSize = oidSize
copy(oid.v[0:oidSize], oidBytes)
return oid, nil
}

// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40}`)
// NewOID converts an object ID in hex format (i.e., `[0-9a-f]{40,64}`)
// into an `OID`.
func NewOID(s string) (OID, error) {
oidBytes, err := hex.DecodeString(s)
Expand All @@ -37,18 +82,18 @@ func NewOID(s string) (OID, error) {

// String formats `oid` as a string in hex format.
func (oid OID) String() string {
return hex.EncodeToString(oid.v[:])
return hex.EncodeToString(oid.v[:oid.hashSize])
}

// Bytes returns a byte slice view of `oid`, in binary format.
func (oid OID) Bytes() []byte {
return oid.v[:]
return oid.v[:oid.hashSize]
}

// MarshalJSON expresses `oid` as a JSON string with its enclosing
// quotation marks.
func (oid OID) MarshalJSON() ([]byte, error) {
src := oid.v[:]
src := oid.v[:oid.hashSize]
dst := make([]byte, hex.EncodedLen(len(src))+2)
dst[0] = '"'
dst[len(dst)-1] = '"'
Expand Down
Loading
Loading