Skip to content

Commit

Permalink
*: introduce pidfd-socket flag
Browse files Browse the repository at this point in the history
The container manager like containerd-shim can't use cgroup.kill feature or
freeze all the processes in cgroup to terminate the exec init process.
It's unsafe to call kill(2) since the pid can be recycled. It's good to
provide the pidfd of init process through the pidfd-socket. It's similar to
the console-socket. With the pidfd, the container manager like containerd-shim
can send the signal to target process safely.

And for the standard init process, we can have polling support to get
exit event instead of blocking on wait4.

Signed-off-by: Wei Fu <[email protected]>
  • Loading branch information
fuweid committed Nov 21, 2023
1 parent 27eb67a commit 94505a0
Show file tree
Hide file tree
Showing 14 changed files with 371 additions and 5 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ vendor/pkg
/contrib/cmd/seccompagent/seccompagent
/contrib/cmd/fs-idmap/fs-idmap
/contrib/cmd/memfd-bind/memfd-bind
/contrib/cmd/pidfd-kill/pidfd-kill
man/man8
release
Vagrantfile
Expand Down
7 changes: 4 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,10 @@ runc-bin: runc-dmz
$(GO_BUILD) -o runc .

.PHONY: all
all: runc recvtty sd-helper seccompagent fs-idmap memfd-bind
all: runc recvtty sd-helper seccompagent fs-idmap memfd-bind pidfd-kill

.PHONY: recvtty sd-helper seccompagent fs-idmap memfd-bind
recvtty sd-helper seccompagent fs-idmap memfd-bind:
.PHONY: recvtty sd-helper seccompagent fs-idmap memfd-bind pidfd-kill
recvtty sd-helper seccompagent fs-idmap memfd-bind pidfd-kill:
$(GO_BUILD) -o contrib/cmd/$@/$@ ./contrib/cmd/$@

.PHONY: static
Expand Down Expand Up @@ -194,6 +194,7 @@ clean:
rm -f contrib/cmd/sd-helper/sd-helper
rm -f contrib/cmd/seccompagent/seccompagent
rm -f contrib/cmd/memfd-bind/memfd-bind
rm -f contrib/cmd/pidfd-kill/pidfd-kill
sudo rm -rf release
rm -rf man/man8

Expand Down
114 changes: 114 additions & 0 deletions contrib/cmd/pidfd-kill/pidfd-kill.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
package main

import (
"errors"
"fmt"
"net"
"os"
"os/signal"

"github.com/urfave/cli"
"golang.org/x/sys/unix"

"github.com/opencontainers/runc/libcontainer/utils"
)

const (
usage = `Open Container Initiative contrib/cmd/pidfd-kill
pidfd-kill is an implementation of a consumer of runC's --pidfd-socket API.
After received SIGTERM, pidfd-kill sends the given signal to init process by
pidfd received from --pidfd-socket.
To use pidfd-kill, just specify a socket path at which you want to receive
pidfd:
$ pidfd-kill [--signal KILL] socket.sock
`
)

func main() {
app := cli.NewApp()
app.Name = "pidfd-kill"
app.Usage = usage

app.Flags = []cli.Flag{
cli.StringFlag{
Name: "signal",
Value: "SIGKILL",
Usage: "Signal to send to the init process",
},
cli.StringFlag{
Name: "pid-file",
Value: "",
Usage: "Path to write the pidfd-kill process ID to",
},
}

app.Action = func(ctx *cli.Context) error {
args := ctx.Args()
if len(args) != 1 {
return errors.New("required a single socket path")
}

socketFile := ctx.Args()[0]

pidFile := ctx.String("pid-file")
if pidFile != "" {
pid := fmt.Sprintf("%d\n", os.Getpid())
if err := os.WriteFile(pidFile, []byte(pid), 0o644); err != nil {
return err
}
defer os.Remove(pidFile)
}

sigStr := ctx.String("signal")
if sigStr == "" {
sigStr = "SIGKILL"
}
sig := unix.SignalNum(sigStr)

pidfdFile, err := recvPidfd(socketFile)
if err != nil {
return err
}
defer pidfdFile.Close()

signalCh := make(chan os.Signal, 16)
signal.Notify(signalCh, unix.SIGTERM)
<-signalCh

return unix.PidfdSendSignal(int(pidfdFile.Fd()), sig, nil, 0)
}
if err := app.Run(os.Args); err != nil {
fmt.Fprintln(os.Stderr, "fatal error:", err)
os.Exit(1)
}
}

func recvPidfd(socketFile string) (*os.File, error) {
ln, err := net.Listen("unix", socketFile)
if err != nil {
return nil, err
}
defer ln.Close()

conn, err := ln.Accept()
if err != nil {
return nil, err
}
defer conn.Close()

unixconn, ok := conn.(*net.UnixConn)
if !ok {
return nil, errors.New("failed to cast to unixconn")
}

socket, err := unixconn.File()
if err != nil {
return nil, err
}
defer socket.Close()

return utils.RecvFile(socket)
}
4 changes: 4 additions & 0 deletions create.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ command(s) that get executed on start, edit the args parameter of the spec. See
Value: "",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
},
cli.StringFlag{
Name: "pidfd-socket",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the init process",
},
cli.StringFlag{
Name: "pid-file",
Value: "",
Expand Down
5 changes: 5 additions & 0 deletions exec.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ following will output a list of processes running in the container:
Name: "console-socket",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
},
cli.StringFlag{
Name: "pidfd-socket",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the exec process",
},
cli.StringFlag{
Name: "cwd",
Usage: "current working directory in the container",
Expand Down Expand Up @@ -181,6 +185,7 @@ func execProcess(context *cli.Context) (int, error) {
shouldDestroy: false,
container: container,
consoleSocket: context.String("console-socket"),
pidfdSocket: context.String("pidfd-socket"),
detach: context.Bool("detach"),
pidFile: context.String("pid-file"),
action: CT_ACT_RUN,
Expand Down
7 changes: 7 additions & 0 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,13 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
cmd.Env = append(cmd.Env, "_LIBCONTAINER_LOGLEVEL="+p.LogLevel)
}

if p.PidfdSocket != nil {
cmd.ExtraFiles = append(cmd.ExtraFiles, p.PidfdSocket)
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_PIDFD_SOCK="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
)
}

if safeExe != nil {
// Due to a Go stdlib bug, we need to add safeExe to the set of
// ExtraFiles otherwise it is possible for the stdlib to clobber the fd
Expand Down
33 changes: 31 additions & 2 deletions libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,16 @@ func startInitialization() (retErr error) {
defer consoleSocket.Close()
}

var pidfdSocket *os.File
if envSockFd := os.Getenv("_LIBCONTAINER_PIDFD_SOCK"); envSockFd != "" {
sockFd, err := strconv.Atoi(envSockFd)
if err != nil {
return fmt.Errorf("unable to convert _LIBCONTAINER_PIDFD_SOCK: %w", err)
}
pidfdSocket = os.NewFile(uintptr(sockFd), "pidfd-socket")
defer pidfdSocket.Close()
}

// Get mount files (O_PATH).
mountSrcFds, err := parseFdsFromEnv("_LIBCONTAINER_MOUNT_FDS")
if err != nil {
Expand Down Expand Up @@ -222,10 +232,10 @@ func startInitialization() (retErr error) {
}

// If init succeeds, it will not return, hence none of the defers will be called.
return containerInit(it, &config, syncPipe, consoleSocket, fifofd, logFD, dmzExe, mountFds{sourceFds: mountSrcFds, idmapFds: idmapFds})
return containerInit(it, &config, syncPipe, consoleSocket, pidfdSocket, fifofd, logFD, dmzExe, mountFds{sourceFds: mountSrcFds, idmapFds: idmapFds})
}

func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSocket *os.File, fifoFd, logFd int, dmzExe *os.File, mountFds mountFds) error {
func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSocket, pidfdSocket *os.File, fifoFd, logFd int, dmzExe *os.File, mountFds mountFds) error {
if err := populateProcessEnvironment(config.Env); err != nil {
return err
}
Expand All @@ -240,6 +250,7 @@ func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSock
i := &linuxSetnsInit{
pipe: pipe,
consoleSocket: consoleSocket,
pidfdSocket: pidfdSocket,
config: config,
logFd: logFd,
dmzExe: dmzExe,
Expand All @@ -249,6 +260,7 @@ func containerInit(t initType, config *initConfig, pipe *syncSocket, consoleSock
i := &linuxStandardInit{
pipe: pipe,
consoleSocket: consoleSocket,
pidfdSocket: pidfdSocket,
parentPid: unix.Getppid(),
config: config,
fifoFd: fifoFd,
Expand Down Expand Up @@ -690,3 +702,20 @@ func signalAllProcesses(m cgroups.Manager, s unix.Signal) error {

return nil
}

// setupPidfd opens a process file descriptor of init process, and sends the
// file descriptor back to the socket.
func setupPidfd(socket *os.File, initType string) error {
defer socket.Close()

pidFd, err := unix.PidfdOpen(os.Getpid(), 0)
if err != nil {
return fmt.Errorf("failed to pidfd_open: %w", err)
}

if err := utils.SendRawFd(socket, initType, uintptr(pidFd)); err != nil {
unix.Close(pidFd)
return fmt.Errorf("failed to send pidfd on socket: %w", err)
}
return unix.Close(pidFd)
}
3 changes: 3 additions & 0 deletions libcontainer/process.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ type Process struct {
// ConsoleSocket provides the masterfd console.
ConsoleSocket *os.File

// PidfdSocket provides process file descriptor of it own.
PidfdSocket *os.File

// Init specifies whether the process is the first process in the container.
Init bool

Expand Down
6 changes: 6 additions & 0 deletions libcontainer/setns_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
type linuxSetnsInit struct {
pipe *syncSocket
consoleSocket *os.File
pidfdSocket *os.File
config *initConfig
logFd int
dmzExe *os.File
Expand Down Expand Up @@ -56,6 +57,11 @@ func (l *linuxSetnsInit) Init() error {
return err
}
}
if l.pidfdSocket != nil {
if err := setupPidfd(l.pidfdSocket, "setns"); err != nil {
return fmt.Errorf("failed to setup pidfd: %w", err)
}
}
if l.config.NoNewPrivileges {
if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil {
return err
Expand Down
7 changes: 7 additions & 0 deletions libcontainer/standard_init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
type linuxStandardInit struct {
pipe *syncSocket
consoleSocket *os.File
pidfdSocket *os.File
parentPid int
fifoFd int
logFd int
Expand Down Expand Up @@ -114,6 +115,12 @@ func (l *linuxStandardInit) Init() error {
}
}

if l.pidfdSocket != nil {
if err := setupPidfd(l.pidfdSocket, "standard"); err != nil {
return fmt.Errorf("failed to setup pidfd: %w", err)
}
}

// Finish the rootfs setup.
if l.config.Config.Namespaces.Contains(configs.NEWNS) {
if err := finalizeRootfs(l.config.Config); err != nil {
Expand Down
4 changes: 4 additions & 0 deletions run.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ command(s) that get executed on start, edit the args parameter of the spec. See
Value: "",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the master end of the console's pseudoterminal",
},
cli.StringFlag{
Name: "pidfd-socket",
Usage: "path to an AF_UNIX socket which will receive a file descriptor referencing the init process",
},
cli.BoolFlag{
Name: "detach, d",
Usage: "detach from the container's process",
Expand Down
42 changes: 42 additions & 0 deletions tests/integration/helpers.bash
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ RECVTTY="${INTEGRATION_ROOT}/../../contrib/cmd/recvtty/recvtty"
SD_HELPER="${INTEGRATION_ROOT}/../../contrib/cmd/sd-helper/sd-helper"
SECCOMP_AGENT="${INTEGRATION_ROOT}/../../contrib/cmd/seccompagent/seccompagent"
FS_IDMAP="${INTEGRATION_ROOT}/../../contrib/cmd/fs-idmap/fs-idmap"
PIDFD_KILL="${INTEGRATION_ROOT}/../../contrib/cmd/pidfd-kill/pidfd-kill"

# Some variables may not always be set. Set those to empty value,
# if unset, to avoid "unbound variable" error.
Expand Down Expand Up @@ -697,3 +698,44 @@ function requires_idmap_fs() {
esac
# If we have another error, the integration test will fail and report it.
}

# setup_pidfd_kill runs pidfd-kill process in background and receives the
# SIGTERM as signal to send the given signal to init process.
function setup_pidfd_kill() {
local signal=$1

[ ! -v ROOT ] && return 1
local dir="${ROOT}/pidfd"

mkdir "${dir}"
export PIDFD_SOCKET="${dir}/sock"

("${PIDFD_KILL}" --pid-file "${dir}/pid" --signal "${signal}" "${PIDFD_SOCKET}" &) &

# ensure socket is ready
retry 10 1 stat "${PIDFD_SOCKET}"
}

# teardown_pidfd_kill cleanups all the resources related to pidfd-kill.
function teardown_pidfd_kill() {
[ ! -v ROOT ] && return 0

local dir="${ROOT}/pidfd"

if [ -f "${dir}/pid" ]; then
kill -9 "$(cat "${dir}/pid")"
fi

rm -rf "${dir}"
}

# pidfd_kill sends the signal to init process.
function pidfd_kill() {
[ ! -v ROOT ] && return 0

local dir="${ROOT}/pidfd"

if [ -f "${dir}/pid" ]; then
kill "$(cat "${dir}/pid")"
fi
}
Loading

0 comments on commit 94505a0

Please sign in to comment.