Skip to content

Commit

Permalink
refactor: move some c code to go
Browse files Browse the repository at this point in the history
Signed-off-by: lifubang <[email protected]>
  • Loading branch information
lifubang committed Jun 5, 2024
1 parent 7cce7e2 commit bbab957
Show file tree
Hide file tree
Showing 11 changed files with 411 additions and 824 deletions.
5 changes: 5 additions & 0 deletions libcontainer/configs/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,11 @@ type IDMap struct {
Size int64 `json:"size"`
}

// ToString is to serize the IDMap to a string.
func (i IDMap) ToString() string {
return fmt.Sprintf("%d %d %d", i.ContainerID, i.HostID, i.Size)
}

// Seccomp represents syscall restrictions
// By default, only the native architecture of the kernel is allowed to be used
// for syscalls. Additional architectures can be added by specifying them in
Expand Down
110 changes: 24 additions & 86 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/execabs"
"golang.org/x/sys/unix"

"github.com/opencontainers/runc/libcontainer/cgroups"
Expand Down Expand Up @@ -580,6 +579,10 @@ func (c *Container) newParentProcess(p *Process) (parentProcess, error) {
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_INITPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
)
cmd.ExtraFiles = append(cmd.ExtraFiles, comm.stage1SockChild)
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_STAGE1PIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
)
cmd.ExtraFiles = append(cmd.ExtraFiles, comm.syncSockChild.File())
cmd.Env = append(cmd.Env,
"_LIBCONTAINER_SYNCPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1),
Expand Down Expand Up @@ -653,14 +656,16 @@ func (c *Container) newInitProcess(p *Process, cmd *exec.Cmd, comm *processComm)
}

init := &initProcess{
cmd: cmd,
comm: comm,
manager: c.cgroupManager,
containerProcess: containerProcess{
cmd: cmd,
comm: comm,
manager: c.cgroupManager,
config: c.newInitConfig(p),
process: p,
bootstrapData: data,
container: c,
},
intelRdtManager: c.intelRdtManager,
config: c.newInitConfig(p),
container: c,
process: p,
bootstrapData: data,
}
c.initProcess = init
return init, nil
Expand All @@ -679,15 +684,18 @@ func (c *Container) newSetnsProcess(p *Process, cmd *exec.Cmd, comm *processComm
return nil, err
}
proc := &setnsProcess{
cmd: cmd,
containerProcess: containerProcess{
cmd: cmd,
comm: comm,
manager: c.cgroupManager,
config: c.newInitConfig(p),
process: p,
bootstrapData: data,
container: c,
},
cgroupPaths: state.CgroupPaths,
rootlessCgroups: c.config.RootlessCgroups,
intelRdtPath: state.IntelRdtPath,
comm: comm,
manager: c.cgroupManager,
config: c.newInitConfig(p),
process: p,
bootstrapData: data,
initProcessPid: state.InitProcessPid,
}
if len(p.SubCgroupPaths) > 0 {
Expand Down Expand Up @@ -1041,17 +1049,6 @@ func (c *Container) orderNamespacePaths(namespaces map[configs.NamespaceType]str
return paths, nil
}

func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) {
data := bytes.NewBuffer(nil)
for _, im := range idMap {
line := fmt.Sprintf("%d %d %d\n", im.ContainerID, im.HostID, im.Size)
if _, err := data.WriteString(line); err != nil {
return nil, err
}
}
return data.Bytes(), nil
}

// netlinkError is an error wrapper type for use by custom netlink message
// types. Panics with errors are wrapped in netlinkError so that the recover
// in bootstrapData can distinguish intentional panics.
Expand Down Expand Up @@ -1098,59 +1095,6 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa
})
}

// write namespace paths only when we are not joining an existing user ns
_, joinExistingUser := nsMaps[configs.NEWUSER]
if !joinExistingUser {
// write uid mappings
if len(c.config.UIDMappings) > 0 {
if c.config.RootlessEUID {
// We resolve the paths for new{u,g}idmap from
// the context of runc to avoid doing a path
// lookup in the nsexec context.
if path, err := execabs.LookPath("newuidmap"); err == nil {
r.AddData(&Bytemsg{
Type: UidmapPathAttr,
Value: []byte(path),
})
}
}
b, err := encodeIDMapping(c.config.UIDMappings)
if err != nil {
return nil, err
}
r.AddData(&Bytemsg{
Type: UidmapAttr,
Value: b,
})
}

// write gid mappings
if len(c.config.GIDMappings) > 0 {
b, err := encodeIDMapping(c.config.GIDMappings)
if err != nil {
return nil, err
}
r.AddData(&Bytemsg{
Type: GidmapAttr,
Value: b,
})
if c.config.RootlessEUID {
if path, err := execabs.LookPath("newgidmap"); err == nil {
r.AddData(&Bytemsg{
Type: GidmapPathAttr,
Value: []byte(path),
})
}
}
if requiresRootOrMappingTool(c.config) {
r.AddData(&Boolmsg{
Type: SetgroupAttr,
Value: true,
})
}
}
}

if c.config.OomScoreAdj != nil {
// write oom_score_adj
r.AddData(&Bytemsg{
Expand All @@ -1159,12 +1103,6 @@ func (c *Container) bootstrapData(cloneFlags uintptr, nsMaps map[configs.Namespa
})
}

// write rootless
r.AddData(&Boolmsg{
Type: RootlessEUIDAttr,
Value: c.config.RootlessEUID,
})

// write boottime and monotonic time ns offsets.
if c.config.TimeOffsets != nil {
var offsetSpec bytes.Buffer
Expand Down Expand Up @@ -1205,9 +1143,9 @@ func ignoreTerminateErrors(err error) error {
return err
}

func requiresRootOrMappingTool(c *configs.Config) bool {
func requiresRootOrMappingTool(gidMappings []configs.IDMap) bool {
gidMap := []configs.IDMap{
{ContainerID: 0, HostID: int64(os.Getegid()), Size: 1},
}
return !reflect.DeepEqual(c.GIDMappings, gidMap)
return !reflect.DeepEqual(gidMappings, gidMap)
}
149 changes: 149 additions & 0 deletions libcontainer/container_setup.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
package libcontainer

import (
"encoding/json"
"fmt"
"io"
"os"

"github.com/opencontainers/runc/libcontainer/configs"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/sirupsen/logrus"
"github.com/vishvananda/netlink/nl"
"golang.org/x/sys/execabs"
"golang.org/x/sys/unix"
)

// NsExecSyncMsg is used for communication between the parent and child during
// container setup.
type NsExecSyncMsg uint32

const (
syncUsermapPls NsExecSyncMsg = iota + 0x40
syncUsermapAck
syncRecvPidPls
syncRecvPidAck
syncTimeOffsetsPls
syncTimeOffsetsAck
)

type NsExecSetup struct {
process *containerProcess
}

const bufSize int = 4

// parseNsExecSync runs the given callback function on each message received
// from the child. It will return once the child sends SYNC_RECVPID_PLS.
func parseNsExecSync(r io.Reader, fn func(NsExecSyncMsg) error) error {
logrus.Debugf("start to communicate with the nsexec\n")
var msg NsExecSyncMsg
var buf [bufSize]byte
native := nl.NativeEndian()

for {
if _, err := io.ReadAtLeast(r, buf[:], bufSize); err != nil {
return err
}
msg = NsExecSyncMsg(native.Uint32(buf[:]))
if err := fn(msg); err != nil {
return err
}
if msg == syncRecvPidPls {
break
}
}
logrus.Debugf("finished communicating with the nsexec\n")
return nil
}

// ackSyncMsg is used to send a message to the child.
func ackSyncMsg(f *os.File, msg NsExecSyncMsg) error {
var buf [bufSize]byte
native := nl.NativeEndian()
native.PutUint32(buf[:], uint32(msg))
if _, err := unix.Write(int(f.Fd()), buf[:]); err != nil {
logrus.Debugf("failed to write message to nsexec: %v", err)
return err
}
return nil
}

// helpDoingNsExec is used to help the process to communicate with the nsexec.
func (s *NsExecSetup) helpDoingNsExec() error {
return parseNsExecSync(s.process.comm.stage1SockParent, func(msg NsExecSyncMsg) error {
switch msg {
case syncUsermapPls:
logrus.Debugf("stage-1 requested userns mappings")
if err := s.setupUsermap(); err != nil {
return err
}
return ackSyncMsg(s.process.comm.stage1SockParent, syncUsermapAck)
case syncRecvPidPls:
logrus.Debugf("stage-1 reports pid")
if err := json.NewDecoder(s.process.comm.stage1SockParent).Decode(&s.process.childPid); err != nil {
return err
}
return ackSyncMsg(s.process.comm.stage1SockParent, syncRecvPidAck)
case syncTimeOffsetsPls:
logrus.Debugf("stage-1 requested timens offsets to be configured")
if err := system.UpdateTimeNsOffsets(s.process.cmd.Process.Pid, s.process.container.config.TimeOffsets); err != nil {
return err
}
return ackSyncMsg(s.process.comm.stage1SockParent, syncTimeOffsetsAck)
default:
}
return fmt.Errorf("unexpected message %d", msg)
})
}

// setupUsermap is used to set up the user mappings.
func (s *NsExecSetup) setupUsermap() error {
var uidMapPath, gidMapPath string
/*
* Enable setgroups(2) if we've been asked to. But we also
* have to explicitly disable setgroups(2) if we're
* creating a rootless container for single-entry mapping.
* i.e. config.is_setgroup == false.
* (this is required since Linux 3.19).
*
* For rootless multi-entry mapping, config.is_setgroup shall be true and
* newuidmap/newgidmap shall be used.
*/
if s.process.config.RootlessEUID && !requiresRootOrMappingTool(s.process.config.Config.GIDMappings) {
_ = system.UpdateSetgroups(s.process.cmd.Process.Pid, system.SetgroupsDeny)
}

nsMaps := make(map[configs.NamespaceType]string)
for _, ns := range s.process.container.config.Namespaces {
if ns.Path != "" {
nsMaps[ns.Type] = ns.Path
}
}
_, joinExistingUser := nsMaps[configs.NEWUSER]
if !joinExistingUser {
// write uid mappings
if len(s.process.container.config.UIDMappings) > 0 {
if s.process.container.config.RootlessEUID {
if path, err := execabs.LookPath("newuidmap"); err == nil {
uidMapPath = path
}
}
}

// write gid mappings
if len(s.process.container.config.GIDMappings) > 0 {
if s.process.container.config.RootlessEUID {
if path, err := execabs.LookPath("newgidmap"); err == nil {
gidMapPath = path
}
}
}
}

/* Set up mappings. */
if err := system.UpdateUidmap(uidMapPath, s.process.cmd.Process.Pid, s.process.container.config.UIDMappings); err != nil {
return err
}
return system.UpdateGidmap(gidMapPath, s.process.cmd.Process.Pid, s.process.container.config.GIDMappings)
}
29 changes: 24 additions & 5 deletions libcontainer/init_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"strconv"
"strings"
"syscall"
"unsafe"

"github.com/containerd/console"
"github.com/moby/sys/user"
Expand All @@ -35,11 +36,6 @@ const (
initStandard initType = "standard"
)

type pid struct {
Pid int `json:"stage2_pid"`
PidFirstChild int `json:"stage1_pid"`
}

// network is an internal struct used to setup container networks.
type network struct {
configs.Network
Expand Down Expand Up @@ -151,6 +147,11 @@ func startInitialization() (retErr error) {

logrus.SetOutput(logPipe)
logrus.SetFormatter(new(logrus.JSONFormatter))

/* For debugging. */
procName := "runc:[2:INIT]"
_ = unix.Prctl(unix.PR_SET_NAME, uintptr(unsafe.Pointer(&procName)), 0, 0, 0)

logrus.Debug("child process in init()")

// Only init processes have FIFOFD.
Expand Down Expand Up @@ -215,6 +216,24 @@ func startInitialization() (retErr error) {
return err
}

if _, err := unix.Setsid(); err != nil {
return fmt.Errorf("setsid failed: %w", err)
}

if err := unix.Setuid(0); err != nil {
return fmt.Errorf("setuid failed %w", err)
}

if err := unix.Setgid(0); err != nil {
return fmt.Errorf("setgid failed %w", err)
}

if !config.RootlessEUID && requiresRootOrMappingTool(config.Config.GIDMappings) {
if err := unix.Setgroups([]int{0}); err != nil {
return fmt.Errorf("setgroups failed %w", err)
}
}

// If init succeeds, it will not return, hence none of the defers will be called.
return containerInit(it, &config, syncPipe, consoleSocket, pidfdSocket, fifoFile, logPipe, dmzExe)
}
Expand Down
Loading

0 comments on commit bbab957

Please sign in to comment.