Skip to content

Commit

Permalink
Merge pull request #1896 from buildpacks/bugfix/jjbustamante/issue-1286
Browse files Browse the repository at this point in the history
Hardlinks are dereferenced in generated archives
  • Loading branch information
jjbustamante authored Dec 20, 2023
2 parents d9fddda + 4738eb2 commit 7b5164c
Show file tree
Hide file tree
Showing 13 changed files with 247 additions and 8 deletions.
36 changes: 35 additions & 1 deletion pkg/archive/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ func WriteDirToTar(tw TarWriter, srcDir, basePath string, uid, gid int, mode int
}
}

hardLinkFiles := map[uint64]string{}
return filepath.Walk(srcDir, func(file string, fi os.FileInfo, err error) error {
var relPath string
if fileFilter != nil {
Expand Down Expand Up @@ -218,12 +219,16 @@ func WriteDirToTar(tw TarWriter, srcDir, basePath string, uid, gid int, mode int
}

header.Name = getHeaderNameFromBaseAndRelPath(basePath, relPath)
if err = processHardLinks(file, fi, hardLinkFiles, header); err != nil {
return err
}

err = writeHeader(header, uid, gid, mode, normalizeModTime, tw)
if err != nil {
return err
}

if hasRegularMode(fi) {
if hasRegularMode(fi) && header.Size > 0 {
f, err := os.Open(filepath.Clean(file))
if err != nil {
return err
Expand All @@ -239,6 +244,35 @@ func WriteDirToTar(tw TarWriter, srcDir, basePath string, uid, gid int, mode int
})
}

// processHardLinks determine if the given file has hard-links associated with it, the given hardLinkFiles map keeps track
// of any previous hard-link previously processed. In case the hard-link was already found, the header will be updated with
// the previous information otherwise the new hard-link found will be tracked into the map
func processHardLinks(file string, fi os.FileInfo, hardLinkFiles map[uint64]string, header *tar.Header) error {
var (
err error
hardlinks bool
inode uint64
)
if hardlinks, err = hasHardlinks(fi, file); err != nil {
return err
}
if hardlinks {
inode, err = getInodeFromStat(fi.Sys(), file)
if err != nil {
return err
}

if processedPath, ok := hardLinkFiles[inode]; ok {
header.Typeflag = tar.TypeLink
header.Linkname = processedPath
header.Size = 0
} else {
hardLinkFiles[inode] = header.Name
}
}
return nil
}

// WriteZipToTar writes the contents of a zip file to a tar writer.
func WriteZipToTar(tw TarWriter, srcZip, basePath string, uid, gid int, mode int64, normalizeModTime bool, fileFilter func(string) bool) error {
zipReader, err := zip.OpenReader(srcZip)
Expand Down
36 changes: 35 additions & 1 deletion pkg/archive/archive_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ func testArchive(t *testing.T, when spec.G, it spec.S) {

it.After(func() {
if err := os.RemoveAll(tmpDir); err != nil {
t.Fatalf("failed to clean up tmp dir %s: %s", tmpDir, err)
if runtime.GOOS != "windows" {
// skip "The process cannot access the file because it is being used by another process" on windows
t.Fatalf("failed to clean up tmp dir %s: %s", tmpDir, err)
}
}
})

Expand Down Expand Up @@ -442,6 +445,37 @@ func testArchive(t *testing.T, when spec.G, it spec.S) {
})
})
})

when("hard link files are present", func() {
it.Before(func() {
src = filepath.Join("testdata", "dir-to-tar-with-hardlink")
// create a hard link
err := os.Link(filepath.Join(src, "original-file"), filepath.Join(src, "original-file-2"))
h.AssertNil(t, err)
})

it.After(func() {
os.RemoveAll(filepath.Join(src, "original-file-2"))
})

it("tar file file doesn't include duplicated data", func() {
outputFilename := filepath.Join(tmpDir, "file-with-hard-links.tar")
fh, err := os.Create(outputFilename)
h.AssertNil(t, err)

tw := tar.NewWriter(fh)
err = archive.WriteDirToTar(tw, src, "/nested/dir", 1234, 2345, 0777, true, false, nil)

h.AssertNil(t, err)
h.AssertNil(t, tw.Close())
h.AssertNil(t, fh.Close())
h.AssertOnTarEntries(t, outputFilename,
"/nested/dir/original-file",
"/nested/dir/original-file-2",
h.AreEquivalentHardLinks(),
)
})
})
})

when("#WriteZipToTar", func() {
Expand Down
22 changes: 22 additions & 0 deletions pkg/archive/archive_unix.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//go:build linux || darwin

package archive

import (
"os"
"syscall"
)

// hasHardlinks check if the given files has a hard-link associated with it
func hasHardlinks(fi os.FileInfo, path string) (bool, error) {
return fi.Sys().(*syscall.Stat_t).Nlink > 1, nil
}

// getInodeFromStat returns the inode (index node) value associated with the given file
func getInodeFromStat(stat interface{}, path string) (inode uint64, err error) {
s, ok := stat.(*syscall.Stat_t)
if ok {
inode = s.Ino
}
return
}
68 changes: 68 additions & 0 deletions pkg/archive/archive_windows.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
//go:build windows

package archive

import (
"os"
"syscall"

"golang.org/x/sys/windows"
)

// hasHardlinks returns true if the given file has hard-links associated with it
func hasHardlinks(fi os.FileInfo, path string) (bool, error) {
var numberOfLinks uint32
switch v := fi.Sys().(type) {
case *syscall.ByHandleFileInformation:
numberOfLinks = v.NumberOfLinks
default:
// We need an instance of a ByHandleFileInformation to read NumberOfLinks
info, err := open(path)
if err != nil {
return false, err
}
numberOfLinks = info.NumberOfLinks
}
return numberOfLinks > 1, nil
}

// getInodeFromStat returns an equivalent representation of unix inode on windows based on FileIndexHigh and FileIndexLow values
func getInodeFromStat(stat interface{}, path string) (inode uint64, err error) {
s, ok := stat.(*syscall.ByHandleFileInformation)
if ok {
inode = (uint64(s.FileIndexHigh) << 32) | uint64(s.FileIndexLow)
} else {
s, err = open(path)
if err == nil {
inode = (uint64(s.FileIndexHigh) << 32) | uint64(s.FileIndexLow)
}
}
return
}

// open returns a ByHandleFileInformation object representation of the given file
func open(path string) (*syscall.ByHandleFileInformation, error) {
fPath, err := syscall.UTF16PtrFromString(path)
if err != nil {
return nil, err
}

handle, err := syscall.CreateFile(
fPath,
windows.FILE_READ_ATTRIBUTES,
syscall.FILE_SHARE_READ|syscall.FILE_SHARE_WRITE|syscall.FILE_SHARE_DELETE,
nil,
syscall.OPEN_EXISTING,
syscall.FILE_FLAG_BACKUP_SEMANTICS,
0)
if err != nil {
return nil, err
}
defer syscall.CloseHandle(handle)

var info syscall.ByHandleFileInformation
if err = syscall.GetFileInformationByHandle(handle, &info); err != nil {
return nil, err
}
return &info, nil
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
foo
4 changes: 4 additions & 0 deletions pkg/buildpack/buildpack.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,10 @@ func toDistTar(tw archive.TarWriter, descriptor Descriptor, blob Blob) error {

header.Mode = calcFileMode(header)
header.Name = path.Join(baseTarDir, header.Name)

if header.Typeflag == tar.TypeLink {
header.Linkname = path.Join(baseTarDir, path.Clean(header.Linkname))
}
err = tw.WriteHeader(header)
if err != nil {
return errors.Wrapf(err, "failed to write header for '%s'", header.Name)
Expand Down
7 changes: 1 addition & 6 deletions pkg/buildpack/buildpack_tar_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,12 +107,7 @@ func (b *BuildModuleWriter) writeBuildModuleToTar(tw archive.TarWriter, module B
return errors.Wrapf(err, "failed to write header for '%s'", header.Name)
}

buf, err := io.ReadAll(tr)
if err != nil {
return errors.Wrapf(err, "failed to read contents of '%s'", header.Name)
}

_, err = tw.Write(buf)
_, err = io.Copy(tw, tr)
if err != nil {
return errors.Wrapf(err, "failed to write contents to '%s'", header.Name)
}
Expand Down
33 changes: 33 additions & 0 deletions pkg/buildpack/buildpack_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/sclevine/spec/report"

"github.com/buildpacks/pack/pkg/archive"
"github.com/buildpacks/pack/pkg/blob"
"github.com/buildpacks/pack/pkg/buildpack"
"github.com/buildpacks/pack/pkg/dist"
h "github.com/buildpacks/pack/testhelpers"
Expand Down Expand Up @@ -511,6 +512,38 @@ version = "1.2.3"
h.AssertNil(t, err)
})
})

when("hardlink is present", func() {
var bpRootFolder string

it.Before(func() {
bpRootFolder = filepath.Join("testdata", "buildpack-with-hardlink")
// create a hard link
err := os.Link(filepath.Join(bpRootFolder, "original-file"), filepath.Join(bpRootFolder, "original-file-2"))
h.AssertNil(t, err)
})

it.After(func() {
os.RemoveAll(filepath.Join(bpRootFolder, "original-file-2"))
})

it("hardlink is preserved in the output tar file", func() {
bp, err := buildpack.FromBuildpackRootBlob(
blob.NewBlob(bpRootFolder),
archive.DefaultTarWriterFactory(),
)
h.AssertNil(t, err)

tarPath := writeBlobToFile(bp)
defer os.Remove(tarPath)

h.AssertOnTarEntries(t, tarPath,
"/cnb/buildpacks/bp.one/1.2.3/original-file",
"/cnb/buildpacks/bp.one/1.2.3/original-file-2",
h.AreEquivalentHardLinks(),
)
})
})
})

when("#Match", func() {
Expand Down
1 change: 1 addition & 0 deletions pkg/buildpack/testdata/buildpack-with-hardlink/bin/build
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
build-contents
Empty file.
10 changes: 10 additions & 0 deletions pkg/buildpack/testdata/buildpack-with-hardlink/buildpack.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
api = "0.3"

[buildpack]
id = "bp.one"
version = "1.2.3"
homepage = "http://one.buildpack"

[[stacks]]
id = "some.stack.id"
mixins = ["mixinX", "build:mixinY", "run:mixinZ"]
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
foo
36 changes: 36 additions & 0 deletions testhelpers/tar_assertions.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ var gzipMagicHeader = []byte{'\x1f', '\x8b'}

type TarEntryAssertion func(t *testing.T, header *tar.Header, data []byte)

type TarEntriesAssertion func(t *testing.T, header1 *tar.Header, data1 []byte, header2 *tar.Header, data2 []byte)

func AssertOnTarEntry(t *testing.T, tarPath, entryPath string, assertFns ...TarEntryAssertion) {
t.Helper()

Expand Down Expand Up @@ -48,6 +50,27 @@ func AssertOnNestedTar(nestedEntryPath string, assertions ...TarEntryAssertion)
}
}

func AssertOnTarEntries(t *testing.T, tarPath string, entryPath1, entryPath2 string, assertFns ...TarEntriesAssertion) {
t.Helper()

tarFile, err := os.Open(filepath.Clean(tarPath))
AssertNil(t, err)
defer tarFile.Close()

header1, data1, err := readTarFileEntry(tarFile, entryPath1)
AssertNil(t, err)

_, err = tarFile.Seek(0, io.SeekStart)
AssertNil(t, err)

header2, data2, err := readTarFileEntry(tarFile, entryPath2)
AssertNil(t, err)

for _, fn := range assertFns {
fn(t, header1, data1, header2, data2)
}
}

func readTarFileEntry(reader io.Reader, entryPath string) (*tar.Header, []byte, error) {
var (
gzipReader *gzip.Reader
Expand Down Expand Up @@ -113,6 +136,19 @@ func SymlinksTo(expectedTarget string) TarEntryAssertion {
}
}

func AreEquivalentHardLinks() TarEntriesAssertion {
return func(t *testing.T, header1 *tar.Header, _ []byte, header2 *tar.Header, _ []byte) {
t.Helper()
if header1.Typeflag != tar.TypeLink && header2.Typeflag != tar.TypeLink {
t.Fatalf("path '%s' and '%s' are not hardlinks, type flags are '%c' and '%c'", header1.Name, header2.Name, header1.Typeflag, header2.Typeflag)
}

if header1.Linkname != header2.Name && header2.Linkname != header1.Name {
t.Fatalf("'%s' and '%s' are not the same file", header1.Name, header2.Name)
}
}
}

func HasOwnerAndGroup(expectedUID int, expectedGID int) TarEntryAssertion {
return func(t *testing.T, header *tar.Header, _ []byte) {
t.Helper()
Expand Down

0 comments on commit 7b5164c

Please sign in to comment.