Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Layer Scanning] Add symlinks support for the FileRequirer logic in image.FromTarball; changed the Layer.Uncompressed method to return a new ReaderCloser every time the method is called. #398

Merged
merged 1 commit into from
Feb 10, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 90 additions & 38 deletions artifact/image/layerscanning/image/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ type Config struct {
func DefaultConfig() *Config {
return &Config{
MaxFileBytes: DefaultMaxFileBytes,
Requirer: &require.FileRequirerAll{},
// All files are required by default.
Requirer: &require.FileRequirerAll{},
}
}

Expand Down Expand Up @@ -193,44 +194,65 @@ func FromV1Image(v1Image v1.Image, config *Config) (*Image, error) {
}
}

// Reverse loop through the layers to start from the latest layer first. This allows us to skip
// all files already seen.
for i := len(chainLayers) - 1; i >= 0; i-- {
chainLayer := chainLayers[i]
requiredTargets := make(map[string]bool)
for range DefaultMaxSymlinkDepth {
// Reverse loop through the layers to start from the latest layer first. This allows us to skip
// all files already seen.
for i := len(chainLayers) - 1; i >= 0; i-- {
chainLayer := chainLayers[i]

// If the layer is empty, then there is nothing to do.
if chainLayer.latestLayer.IsEmpty() {
continue
}
// If the layer is empty, then there is nothing to do.
if chainLayer.latestLayer.IsEmpty() {
continue
}

originLayerID := chainLayer.latestLayer.DiffID().Encoded()
originLayerID := chainLayer.latestLayer.DiffID().Encoded()

// Create the chain layer directory if it doesn't exist.
// Use filepath here as it is a path that will be written to disk.
dirPath := filepath.Join(tempPath, originLayerID)
if err := os.Mkdir(dirPath, dirPermission); err != nil && !errors.Is(err, fs.ErrExist) {
return &outputImage, fmt.Errorf("failed to create chain layer directory: %w", err)
}
// Create the chain layer directory if it doesn't exist.
// Use filepath here as it is a path that will be written to disk.
dirPath := filepath.Join(tempPath, originLayerID)
if err := os.Mkdir(dirPath, dirPermission); err != nil && !errors.Is(err, fs.ErrExist) {
return &outputImage, fmt.Errorf("failed to create chain layer directory: %w", err)
}

chainLayersToFill := chainLayers[i:]
layerReader, err := chainLayer.latestLayer.Uncompressed()
if err != nil {
return &outputImage, err
chainLayersToFill := chainLayers[i:]
layerReader, err := chainLayer.latestLayer.Uncompressed()
if err != nil {
return &outputImage, err
}

err = func() error {
// Manually close at the end of the for loop.
defer layerReader.Close()

tarReader := tar.NewReader(layerReader)
requiredTargets, err = fillChainLayerWithFilesFromTar(&outputImage, tarReader, originLayerID, dirPath, chainLayersToFill, config.Requirer, requiredTargets)
if err != nil {
return fmt.Errorf("failed to fill chain layer with v1 layer tar: %w", err)
}
return nil
}()

if err != nil {
return &outputImage, err
}
}

err = func() error {
// Manually close at the end of the for loop.
defer layerReader.Close()
// If there are no more required targets from symlinks, then there is no need to continue.
if len(requiredTargets) == 0 {
break
}

tarReader := tar.NewReader(layerReader)
if err := fillChainLayerWithFilesFromTar(&outputImage, tarReader, originLayerID, dirPath, chainLayersToFill); err != nil {
return fmt.Errorf("failed to fill chain layer with v1 layer tar: %w", err)
stillHaveRequiredTargets := false
for _, isRequired := range requiredTargets {
if isRequired {
stillHaveRequiredTargets = true
break
}
return nil
}()
}

if err != nil {
return &outputImage, err
if !stillHaveRequiredTargets {
break
}
}
return &outputImage, nil
Expand Down Expand Up @@ -302,14 +324,16 @@ func initializeChainLayers(v1Layers []v1.Layer, configFile *v1.ConfigFile) ([]*c
// fillChainLayerWithFilesFromTar fills the chain layers with the files found in the tar. The
// chainLayersToFill are the chain layers that will be filled with the files via the virtual
// filesystem.
func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLayerID string, dirPath string, chainLayersToFill []*chainLayer) error {
func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLayerID string, dirPath string, chainLayersToFill []*chainLayer, requirer require.FileRequirer, requiredTargets map[string]bool) (map[string]bool, error) {
currentChainLayer := chainLayersToFill[0]

for {
header, err := tarReader.Next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return fmt.Errorf("could not read tar: %w", err)
return nil, fmt.Errorf("could not read tar: %w", err)
}
// Some tools prepend everything with "./", so if we don't path.Clean the name, we may have
// duplicate entries, which angers tar-split. Using path instead of filepath to keep `/` and
Expand Down Expand Up @@ -345,6 +369,7 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
continue
}

// Check if the file is a whiteout.
isWhiteout := whiteout.IsWhiteout(basename)
// TODO: b/379094217 - Handle Opaque Whiteouts
if isWhiteout {
Expand All @@ -363,8 +388,33 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
// any forward slashes to the appropriate OS specific path separator.
realFilePath := filepath.Join(dirPath, filepath.FromSlash(cleanedFilePath))

// If the file is not required, then skip it.
if !img.config.Requirer.FileRequired(virtualPath, header.FileInfo()) {
// If the file already exists in the current chain layer, then skip it. This is done because
// the tar file could be read multiple times to handle required symlinks.
if currentChainLayer.fileNodeTree.Get(virtualPath) != nil {
continue
}

// Skip files that are not required by extractors and are not targets of required symlinks.
// Try multiple paths variations
// (with parent dir, without leading slash, with leading slash). For example:
// - `realFilePath`: `tmp/12345/etc/os-release`. This is used when actually writing the file to disk.
// - `cleanedFilePath`: `etc/os-release`. This is used when checking if the file is required.
// - `virtualPath`: `/etc/os-release`. This is used when checking if the file is required.
required := false
for _, p := range []string{realFilePath, cleanedFilePath, virtualPath} {
if requirer.FileRequired(p, header.FileInfo()) {
required = true
break
}
if _, ok := requiredTargets[p]; ok {
required = true

// The required target has been checked, so it can be marked as not required.
requiredTargets[p] = false
break
}
}
if !required {
continue
}

Expand All @@ -375,7 +425,7 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
case tar.TypeReg:
newNode, err = img.handleFile(realFilePath, virtualPath, originLayerID, tarReader, header, isWhiteout)
case tar.TypeSymlink, tar.TypeLink:
newNode, err = img.handleSymlink(realFilePath, virtualPath, originLayerID, tarReader, header, isWhiteout)
newNode, err = img.handleSymlink(virtualPath, originLayerID, tarReader, header, isWhiteout, requiredTargets)
default:
log.Warnf("unsupported file type: %v, path: %s", header.Typeflag, header.Name)
continue
Expand All @@ -386,20 +436,20 @@ func fillChainLayerWithFilesFromTar(img *Image, tarReader *tar.Reader, originLay
log.Warnf("failed to handle tar entry with path %s: %w", virtualPath, err)
continue
}
return fmt.Errorf("failed to handle tar entry with path %s: %w", virtualPath, err)
return nil, fmt.Errorf("failed to handle tar entry with path %s: %w", virtualPath, err)
}

// In each outer loop, a layer is added to each relevant output chainLayer slice. Because the
// outer loop is looping backwards (latest layer first), we ignore any files that are already in
// each chainLayer, as they would have been overwritten.
fillChainLayersWithFileNode(chainLayersToFill, newNode)
}
return nil
return requiredTargets, nil
}

// handleSymlink returns the symlink header mode. Symlinks are handled by creating a fileNode with
// the symlink mode with additional metadata.
func (img *Image) handleSymlink(realFilePath, virtualPath, originLayerID string, tarReader *tar.Reader, header *tar.Header, isWhiteout bool) (*fileNode, error) {
func (img *Image) handleSymlink(virtualPath, originLayerID string, tarReader *tar.Reader, header *tar.Header, isWhiteout bool, requiredTargets map[string]bool) (*fileNode, error) {
targetPath := filepath.ToSlash(header.Linkname)
if targetPath == "" {
return nil, fmt.Errorf("symlink header has no target path")
Expand All @@ -415,6 +465,8 @@ func (img *Image) handleSymlink(realFilePath, virtualPath, originLayerID string,
targetPath = path.Clean(path.Join(path.Dir(virtualPath), targetPath))
}

requiredTargets[targetPath] = true

return &fileNode{
extractDir: img.ExtractDir,
originLayerID: originLayerID,
Expand Down
54 changes: 54 additions & 0 deletions artifact/image/layerscanning/image/image_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,60 @@ func TestFromTarball(t *testing.T) {
},
},
},
{
name: "image with required symlink but non-required target path",
tarPath: filepath.Join(testdataDir, "symlink-basic.tar"),
config: &Config{
MaxFileBytes: DefaultMaxFileBytes,
// dir1/sample.txt is not explicitly required, but should be unpacked because it is the
// target of a required symlink.
Requirer: require.NewFileRequirerPaths([]string{
"/dir1/absolute-symlink.txt",
}),
},
wantChainLayerEntries: []chainLayerEntries{
{
filepathContentPairs: []filepathContentPair{
{
filepath: "dir1/sample.txt",
content: "sample text\n",
},
{
filepath: "dir1/absolute-symlink.txt",
content: "sample text\n",
},
},
},
},
},
{
name: "image with symlink chain but non-required target path",
tarPath: filepath.Join(testdataDir, "symlink-basic.tar"),
config: &Config{
MaxFileBytes: DefaultMaxFileBytes,
Requirer: require.NewFileRequirerPaths([]string{
"/dir1/chain-symlink.txt",
}),
},
wantChainLayerEntries: []chainLayerEntries{
{
filepathContentPairs: []filepathContentPair{
{
filepath: "dir1/sample.txt",
content: "sample text\n",
},
{
filepath: "dir1/absolute-symlink.txt",
content: "sample text\n",
},
{
filepath: "dir1/chain-symlink.txt",
content: "sample text\n",
},
},
},
},
},
{
name: "image with symlink cycle",
tarPath: filepath.Join(testdataDir, "symlink-cycle.tar"),
Expand Down
19 changes: 9 additions & 10 deletions artifact/image/layerscanning/image/layer.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ var (

// Layer implements the Layer interface.
type Layer struct {
v1Layer v1.Layer
diffID digest.Digest
buildCommand string
isEmpty bool
uncompressed io.ReadCloser
}

// FS returns a scalibr compliant file system.
Expand All @@ -75,10 +75,15 @@ func (layer *Layer) Command() string {
return layer.buildCommand
}

// Uncompressed gets the uncompressed ReadCloser which holds all files in the layer.
// Uncompressed returns a new uncompressed ReadCloser from the v1 layer which holds all files in the
// layer.
// TODO: b/378938357 - Figure out a better way to get the uncompressed ReadCloser.
func (layer *Layer) Uncompressed() (io.ReadCloser, error) {
return layer.uncompressed, nil
uncompressed, err := layer.v1Layer.Uncompressed()
if err != nil {
return nil, fmt.Errorf("%w: %w", ErrUncompressedReaderMissingFromLayer, err)
}
return uncompressed, nil
}

// convertV1Layer converts a v1.Layer to a scalibr Layer. This involves getting the diffID and
Expand All @@ -89,16 +94,11 @@ func convertV1Layer(v1Layer v1.Layer, command string, isEmpty bool) (*Layer, err
return nil, fmt.Errorf("%w: %w", ErrDiffIDMissingFromLayer, err)
}

uncompressed, err := v1Layer.Uncompressed()
if err != nil {
return nil, fmt.Errorf("%w: %w", ErrUncompressedReaderMissingFromLayer, err)
}

return &Layer{
v1Layer: v1Layer,
diffID: digest.Digest(diffID.String()),
buildCommand: command,
isEmpty: isEmpty,
uncompressed: uncompressed,
}, nil
}

Expand All @@ -115,7 +115,6 @@ type chainLayer struct {

// FS returns a scalibrfs.FS that can be used to scan for inventory.
func (chainLayer *chainLayer) FS() scalibrfs.FS {
// root should be "/" given we are dealing with file paths.
return &FS{
tree: chainLayer.fileNodeTree,
maxSymlinkDepth: DefaultMaxSymlinkDepth,
Expand Down
4 changes: 2 additions & 2 deletions artifact/image/layerscanning/image/layer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@ func TestConvertV1Layer(t *testing.T) {
command: "ADD file",
isEmpty: false,
wantLayer: &Layer{
v1Layer: fakev1layer.New("abc123", "ADD file", false, reader),
diffID: "sha256:abc123",
buildCommand: "ADD file",
isEmpty: false,
uncompressed: reader,
},
},
{
Expand All @@ -77,7 +77,7 @@ func TestConvertV1Layer(t *testing.T) {
if tc.wantError != nil && gotError == tc.wantError {
t.Errorf("convertV1Layer(%v, %v, %v) returned error: %v, want error: %v", tc.v1Layer, tc.command, tc.isEmpty, gotError, tc.wantError)
}
if diff := cmp.Diff(gotLayer, tc.wantLayer, cmp.AllowUnexported(Layer{})); tc.wantLayer != nil && diff != "" {
if diff := cmp.Diff(gotLayer, tc.wantLayer, cmp.AllowUnexported(Layer{}, fakev1layer.FakeV1Layer{})); tc.wantLayer != nil && diff != "" {
t.Errorf("convertV1Layer(%v, %v, %v) returned layer: %v, want layer: %v", tc.v1Layer, tc.command, tc.isEmpty, gotLayer, tc.wantLayer)
}
})
Expand Down
27 changes: 27 additions & 0 deletions artifact/image/testfixtures/symlinks-across-layers/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Use Alpine as the builder since the final image is built on scratch
# which doesn't contain the `ln` command to generate symlinks.
FROM alpine:latest as builder

RUN mkdir dir1
RUN mkdir dir2
RUN mkdir


RUN echo "sample text" > dir1/sample.txt
RUN ln -s /dir1/sample.txt /dir2/absolute-symlink.txt
RUN ln -s /dir2/absolute-symlink.txt /dir3/chain-symlink.txt


# - root
# - dir1
# - sample.txt
# - dir2
# - absolute-symlink.txt -> /dir1/sample.txt
# - dir3
# - chain-symlink.txt -> /dir2absolute-symlink.txt
FROM scratch

# Must copy over the entire directory to preserve the symlinks.
COPY --from=builder /dir3/ /dir3/
COPY --from=builder /dir2/ /dir2/
COPY --from=builder /dir1/ /dir1/