Skip to content

Commit

Permalink
Added logic to guess where a base image starts given the history of c…
Browse files Browse the repository at this point in the history
…ommands used to build an application image.

PiperOrigin-RevId: 696527545
  • Loading branch information
Mario Leyva authored and copybara-github committed Nov 14, 2024
1 parent fa233c9 commit 464c973
Show file tree
Hide file tree
Showing 2 changed files with 328 additions and 0 deletions.
120 changes: 120 additions & 0 deletions artifact/image/layerscanning/find_base_image.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package image provides functionality to scan a container image by layers for software
// inventory.
package image

import (
"errors"
"strings"

v1 "github.com/google/go-containerregistry/pkg/v1"
)

const (
// cmdPrefix is the prefix for CMD instructions in the history created by.
cmdPrefix = "/bin/sh -c #(nop) CMD"
cmdBuildKitPrefix = "CMD"
entrypointPrefix = "/bin/sh -c #(nop) ENTRYPOINT"
entrypointBuildKitPrefix = "ENTRYPOINT"
)

// ErrBaseImageNotFound is returned when the base image is not found.
var ErrBaseImageNotFound = errors.New("unable to find base image not found")

// findBaseImageIndex tries to determine the index of the base image given the command history of
// the image.
//
// e.g. In the following example, we should detect when the nginx:latest image layers start.
//
// FROM nginx:latest
// COPY custom-binary /custom-binary
// CMD ["buildcmd"]
//
// with the base image (nginx:latest) having the following history:
//
// ADD rootfs.tar.xz /
// CMD ["bash"]
// ENV NGINX_VERSION=1.27.2
// RUN /bin/sh -c set -x
// COPY file1 /file1
// ENTRYPOINT [\"/docker-entrypoint.sh\"]
// EXPOSE map[80/tcp:{}]
// STOPSIGNAL SIGQUIT
// CMD [\"nginx\" \"-g\" \"daemon off;\"]
//
// The complete history of the image would be as follows:
//
// ADD rootfs.tar.xz /
// CMD ["bash"]
// ENV NGINX_VERSION=1.27.2
// RUN /bin/sh -c set -x
// COPY file1 /file1
// ENTRYPOINT [\"/docker-entrypoint.sh\"]
// EXPOSE map[80/tcp:{}]
// STOPSIGNAL SIGQUIT
// CMD [\"nginx\" \"-g\" \"daemon off;\"] // finds the second to last CMD and returns index
// COPY custom-binary /custom-binary
// CMD ["buildcmd"] // skips the last CMD
//
// This function tries to the determines that the base image ends at the second to last CMD command.
// It does this by:
// 1. Iterating through the histories starting from the final layer and going backwards.
// 2. Skipping all the empty layers until it finds a populated layer. This includes commands such
// as ENTRYPOINT, EXPOSE, STOPSIGNAL, CMD, etc.
// 3. Once a populated layer is found, it looks for the first empty layer with a CMD or ENTRYPOINT
// command.
// 4. If no CMD or ENTRYPOINT command is found, then an error is returned.
func findBaseImageIndex(histories []v1.History) (int, error) {
// A populated layer refers to a layer that either adds, removes, or modifies files / directories
// in a container image.
foundPopulatedLayer := false

possibleFinalBaseImageCommands := []string{
cmdPrefix,
cmdBuildKitPrefix,
entrypointPrefix,
entrypointBuildKitPrefix,
}

for i := len(histories) - 1; i >= 0; i-- {
h := histories[i]

buildCommand := h.CreatedBy
layerIsEmpty := h.EmptyLayer

if !foundPopulatedLayer {
// Skip empty layers if we haven't found a populated layer yet. This includes commands such as
// ENTRYPOINT, EXPOSE, STOPSIGNAL, CMD, etc.
if layerIsEmpty {
continue
}
foundPopulatedLayer = true
}

// If we've found a populated layer, then we can skip all other populated layers.
if !layerIsEmpty {
continue
}

// Look for CMD or ENTRYPOINT commands in potential base image.
for _, prefix := range possibleFinalBaseImageCommands {
if strings.HasPrefix(buildCommand, prefix) {
return i, nil
}
}
}
return 0, ErrBaseImageNotFound
}
208 changes: 208 additions & 0 deletions artifact/image/layerscanning/find_base_image_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
// Copyright 2024 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package image

import (
"testing"

v1 "github.com/google/go-containerregistry/pkg/v1"
)

func TestFindBaseImageIndex(t *testing.T) {
tests := []struct {
name string
histories []v1.History
wantIndex int
wantError error
}{
{
name: "empty history",
histories: []v1.History{},
wantError: ErrBaseImageNotFound,
},
{
name: "single empty history",
histories: []v1.History{
{
CreatedBy: "Empty Layer",
EmptyLayer: true,
},
},
wantError: ErrBaseImageNotFound,
},
{
name: "single non empty history",
histories: []v1.History{
{
CreatedBy: "Non Empty Layer",
EmptyLayer: false,
},
},
wantError: ErrBaseImageNotFound,
},
{
name: "single CMD command in history",
histories: []v1.History{
{
CreatedBy: "Non Empty Layer",
EmptyLayer: false,
},
{
CreatedBy: "CMD[\"somecmd\"]",
EmptyLayer: true,
},
},
wantError: ErrBaseImageNotFound,
},
{
name: "two CMD commands in history",
histories: []v1.History{
{
CreatedBy: "ADD file:123 in /",
EmptyLayer: false,
},
{
CreatedBy: "CMD[\"/bin/sh\"]",
EmptyLayer: true,
},
{
CreatedBy: "RUN apt-get update",
EmptyLayer: false,
},
{
CreatedBy: "COPY dir /dir",
EmptyLayer: false,
},
{
CreatedBy: "ENTRYPOINT [\"entrypoint.sh\"]",
EmptyLayer: true,
},
{
CreatedBy: "CMD [\"buildcmd\"]",
EmptyLayer: true,
},
},
wantIndex: 1,
},
{
name: "nginx image with multiple base images in history",
histories: []v1.History{
{
CreatedBy: "ADD rootfs.tar.xz /",
EmptyLayer: false,
},
{
CreatedBy: "CMD[\"bash\"]",
EmptyLayer: true,
},
{
CreatedBy: "ENV NGINX_VERSION=1.27.2",
EmptyLayer: true,
},
{
CreatedBy: "RUN /bin/sh -c set -x",
EmptyLayer: false,
},
{
CreatedBy: "COPY file1 /file1",
EmptyLayer: false,
},
{
CreatedBy: "Entrypoint [\"/docker-entrypoint.sh\"]",
EmptyLayer: true,
},
{
CreatedBy: "EXPOSE map[80/tcp:{}]",
EmptyLayer: true,
},
{
CreatedBy: "STOPSIGNAL SIGQUIT",
EmptyLayer: true,
},
{
CreatedBy: "CMD [\"nginx\" \"-g\" \"daemon off;\"]",
EmptyLayer: true,
},
},
// Want to return the index of the following command: CMD ["bash"]
wantIndex: 1,
},
{
name: "custom nginx image with multiple base images in history",
histories: []v1.History{
{
CreatedBy: "ADD rootfs.tar.xz /",
EmptyLayer: false,
},
{
CreatedBy: "CMD[\"bash\"]",
EmptyLayer: true,
},
{
CreatedBy: "ENV NGINX_VERSION=1.27.2",
EmptyLayer: true,
},
{
CreatedBy: "RUN /bin/sh -c set -x",
EmptyLayer: false,
},
{
CreatedBy: "COPY file1 /file1",
EmptyLayer: false,
},
{
CreatedBy: "Entrypoint [\"/docker-entrypoint.sh\"]",
EmptyLayer: true,
},
{
CreatedBy: "EXPOSE map[80/tcp:{}]",
EmptyLayer: true,
},
{
CreatedBy: "STOPSIGNAL SIGQUIT",
EmptyLayer: true,
},
{
CreatedBy: "CMD [\"nginx\" \"-g\" \"daemon off;\"]",
EmptyLayer: true,
},
{
CreatedBy: "/bin/sh -c #(nop) COPY custom-binary /custom-binary",
EmptyLayer: false,
},
{
CreatedBy: "/bin/sh -c #(nop) CMD [\"buildcmd\"]",
EmptyLayer: true,
},
},
// Want to return the index of the following command: CMD ["nginx" "-g" "daemon off;"]
wantIndex: 8,
},
}

for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
gotIndex, gotErr := findBaseImageIndex(test.histories)
if test.wantError != gotErr {
t.Errorf("findBaseImageIndex(%v) returned error: %v, want error: %v", test.histories, gotErr, test.wantError)
return
}

if gotIndex != test.wantIndex {
t.Errorf("guessBaseImageIndex(%v) = %v, want: %v", test.histories, gotIndex, test.wantIndex)
}
})
}
}

0 comments on commit 464c973

Please sign in to comment.