Merge pull request #123060 from dims/update-to-runc-v1.1.12

Update to runc 1.1.12
This commit is contained in:
Kubernetes Prow Robot 2024-02-01 16:22:47 -08:00 committed by GitHub
commit 6a4e93e776
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 172 additions and 28 deletions

2
go.mod
View File

@ -49,7 +49,7 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822
github.com/onsi/ginkgo/v2 v2.15.0 github.com/onsi/ginkgo/v2 v2.15.0
github.com/onsi/gomega v1.31.0 github.com/onsi/gomega v1.31.0
github.com/opencontainers/runc v1.1.11 github.com/opencontainers/runc v1.1.12
github.com/opencontainers/selinux v1.11.0 github.com/opencontainers/selinux v1.11.0
github.com/pkg/errors v0.9.1 github.com/pkg/errors v0.9.1
github.com/pmezard/go-difflib v1.0.0 github.com/pmezard/go-difflib v1.0.0

4
go.sum
View File

@ -628,8 +628,8 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM= github.com/opencontainers/image-spec v1.0.2 h1:9yCKha/T5XdGtO0q9Q9a6T5NUCsTn/DrBg0D7ufOcFM=
github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0= github.com/opencontainers/image-spec v1.0.2/go.mod h1:BtxoFyWECRxE4U/7sNtV5W15zMzWCbyJoFRP3s7yZA0=
github.com/opencontainers/runc v1.1.11 h1:9LjxyVlE0BPMRP2wuQDRlHV4941Jp9rc3F0+YKimopA= github.com/opencontainers/runc v1.1.12 h1:BOIssBaW1La0/qbNZHXOOa71dZfZEQOzW7dqQf3phss=
github.com/opencontainers/runc v1.1.11/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8= github.com/opencontainers/runc v1.1.12/go.mod h1:S+lQwSfncpBha7XTy/5lBwWgm5+y5Ma/O44Ekby9FK8=
github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.0.3-0.20200929063507-e6143ca7d51d/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-spec v1.0.3-0.20200929063507-e6143ca7d51d/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0=
github.com/opencontainers/runtime-spec v1.0.3-0.20220909204839-494a5a6aca78 h1:R5M2qXZiK/mWPMT4VldCOiSL9HIAMuxQZWdG0CSM5+4= github.com/opencontainers/runtime-spec v1.0.3-0.20220909204839-494a5a6aca78 h1:R5M2qXZiK/mWPMT4VldCOiSL9HIAMuxQZWdG0CSM5+4=

View File

@ -77,16 +77,16 @@ var (
// TestMode is set to true by unit tests that need "fake" cgroupfs. // TestMode is set to true by unit tests that need "fake" cgroupfs.
TestMode bool TestMode bool
cgroupFd int = -1 cgroupRootHandle *os.File
prepOnce sync.Once prepOnce sync.Once
prepErr error prepErr error
resolveFlags uint64 resolveFlags uint64
) )
func prepareOpenat2() error { func prepareOpenat2() error {
prepOnce.Do(func() { prepOnce.Do(func() {
fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{ fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{
Flags: unix.O_DIRECTORY | unix.O_PATH, Flags: unix.O_DIRECTORY | unix.O_PATH | unix.O_CLOEXEC,
}) })
if err != nil { if err != nil {
prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err} prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err}
@ -97,15 +97,16 @@ func prepareOpenat2() error {
} }
return return
} }
file := os.NewFile(uintptr(fd), cgroupfsDir)
var st unix.Statfs_t var st unix.Statfs_t
if err = unix.Fstatfs(fd, &st); err != nil { if err := unix.Fstatfs(int(file.Fd()), &st); err != nil {
prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err} prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err}
logrus.Warnf("falling back to securejoin: %s", prepErr) logrus.Warnf("falling back to securejoin: %s", prepErr)
return return
} }
cgroupFd = fd cgroupRootHandle = file
resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS
if st.Type == unix.CGROUP2_SUPER_MAGIC { if st.Type == unix.CGROUP2_SUPER_MAGIC {
// cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks // cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks
@ -132,7 +133,7 @@ func openFile(dir, file string, flags int) (*os.File, error) {
return openFallback(path, flags, mode) return openFallback(path, flags, mode)
} }
fd, err := unix.Openat2(cgroupFd, relPath, fd, err := unix.Openat2(int(cgroupRootHandle.Fd()), relPath,
&unix.OpenHow{ &unix.OpenHow{
Resolve: resolveFlags, Resolve: resolveFlags,
Flags: uint64(flags) | unix.O_CLOEXEC, Flags: uint64(flags) | unix.O_CLOEXEC,
@ -140,20 +141,20 @@ func openFile(dir, file string, flags int) (*os.File, error) {
}) })
if err != nil { if err != nil {
err = &os.PathError{Op: "openat2", Path: path, Err: err} err = &os.PathError{Op: "openat2", Path: path, Err: err}
// Check if cgroupFd is still opened to cgroupfsDir // Check if cgroupRootHandle is still opened to cgroupfsDir
// (happens when this package is incorrectly used // (happens when this package is incorrectly used
// across the chroot/pivot_root/mntns boundary, or // across the chroot/pivot_root/mntns boundary, or
// when /sys/fs/cgroup is remounted). // when /sys/fs/cgroup is remounted).
// //
// TODO: if such usage will ever be common, amend this // TODO: if such usage will ever be common, amend this
// to reopen cgroupFd and retry openat2. // to reopen cgroupRootHandle and retry openat2.
fdStr := strconv.Itoa(cgroupFd) fdStr := strconv.Itoa(int(cgroupRootHandle.Fd()))
fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr) fdDest, _ := os.Readlink("/proc/self/fd/" + fdStr)
if fdDest != cgroupfsDir { if fdDest != cgroupfsDir {
// Wrap the error so it is clear that cgroupFd // Wrap the error so it is clear that cgroupRootHandle
// is opened to an unexpected/wrong directory. // is opened to an unexpected/wrong directory.
err = fmt.Errorf("cgroupFd %s unexpectedly opened to %s != %s: %w", err = fmt.Errorf("cgroupRootHandle %d unexpectedly opened to %s != %s: %w",
fdStr, fdDest, cgroupfsDir, err) cgroupRootHandle.Fd(), fdDest, cgroupfsDir, err)
} }
return nil, err return nil, err
} }

View File

@ -83,6 +83,7 @@ func tryDefaultCgroupRoot() string {
if err != nil { if err != nil {
return "" return ""
} }
defer dir.Close()
names, err := dir.Readdirnames(1) names, err := dir.Readdirnames(1)
if err != nil { if err != nil {
return "" return ""

View File

@ -353,6 +353,15 @@ func (c *linuxContainer) start(process *Process) (retErr error) {
}() }()
} }
// Before starting "runc init", mark all non-stdio open files as O_CLOEXEC
// to make sure we don't leak any files into "runc init". Any files to be
// passed to "runc init" through ExtraFiles will get dup2'd by the Go
// runtime and thus their O_CLOEXEC flag will be cleared. This is some
// additional protection against attacks like CVE-2024-21626, by making
// sure we never leak files to "runc init" we didn't intend to.
if err := utils.CloseExecFrom(3); err != nil {
return fmt.Errorf("unable to mark non-stdio fds as cloexec: %w", err)
}
if err := parent.start(); err != nil { if err := parent.start(); err != nil {
return fmt.Errorf("unable to start container process: %w", err) return fmt.Errorf("unable to start container process: %w", err)
} }

View File

@ -8,6 +8,7 @@ import (
"io" "io"
"net" "net"
"os" "os"
"path/filepath"
"strings" "strings"
"unsafe" "unsafe"
@ -135,6 +136,32 @@ func populateProcessEnvironment(env []string) error {
return nil return nil
} }
// verifyCwd ensures that the current directory is actually inside the mount
// namespace root of the current process.
func verifyCwd() error {
// getcwd(2) on Linux detects if cwd is outside of the rootfs of the
// current mount namespace root, and in that case prefixes "(unreachable)"
// to the returned string. glibc's getcwd(3) and Go's Getwd() both detect
// when this happens and return ENOENT rather than returning a non-absolute
// path. In both cases we can therefore easily detect if we have an invalid
// cwd by checking the return value of getcwd(3). See getcwd(3) for more
// details, and CVE-2024-21626 for the security issue that motivated this
// check.
//
// We have to use unix.Getwd() here because os.Getwd() has a workaround for
// $PWD which involves doing stat(.), which can fail if the current
// directory is inaccessible to the container process.
if wd, err := unix.Getwd(); errors.Is(err, unix.ENOENT) {
return errors.New("current working directory is outside of container mount namespace root -- possible container breakout detected")
} else if err != nil {
return fmt.Errorf("failed to verify if current working directory is safe: %w", err)
} else if !filepath.IsAbs(wd) {
// We shouldn't ever hit this, but check just in case.
return fmt.Errorf("current working directory is not absolute -- possible container breakout detected: cwd is %q", wd)
}
return nil
}
// finalizeNamespace drops the caps, sets the correct user // finalizeNamespace drops the caps, sets the correct user
// and working dir, and closes any leaked file descriptors // and working dir, and closes any leaked file descriptors
// before executing the command inside the namespace // before executing the command inside the namespace
@ -193,6 +220,10 @@ func finalizeNamespace(config *initConfig) error {
return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err) return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %w", config.Cwd, err)
} }
} }
// Make sure our final working directory is inside the container.
if err := verifyCwd(); err != nil {
return err
}
if err := system.ClearKeepCaps(); err != nil { if err := system.ClearKeepCaps(); err != nil {
return fmt.Errorf("unable to clear keep caps: %w", err) return fmt.Errorf("unable to clear keep caps: %w", err)
} }

View File

@ -4,6 +4,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"os" "os"
"os/exec"
"strconv" "strconv"
"github.com/opencontainers/selinux/go-selinux" "github.com/opencontainers/selinux/go-selinux"
@ -14,6 +15,7 @@ import (
"github.com/opencontainers/runc/libcontainer/keys" "github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/seccomp" "github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
) )
// linuxSetnsInit performs the container's initialization for running a new process // linuxSetnsInit performs the container's initialization for running a new process
@ -82,6 +84,21 @@ func (l *linuxSetnsInit) Init() error {
if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil {
return err return err
} }
// Check for the arg before waiting to make sure it exists and it is
// returned as a create time error.
name, err := exec.LookPath(l.config.Args[0])
if err != nil {
return err
}
// exec.LookPath in Go < 1.20 might return no error for an executable
// residing on a file system mounted with noexec flag, so perform this
// extra check now while we can still return a proper error.
// TODO: remove this once go < 1.20 is not supported.
if err := eaccess(name); err != nil {
return &os.PathError{Op: "eaccess", Path: name, Err: err}
}
// Set seccomp as close to execve as possible, so as few syscalls take // Set seccomp as close to execve as possible, so as few syscalls take
// place afterward (reducing the amount of syscalls that users need to // place afterward (reducing the amount of syscalls that users need to
// enable in their seccomp profiles). // enable in their seccomp profiles).
@ -101,5 +118,23 @@ func (l *linuxSetnsInit) Init() error {
return &os.PathError{Op: "close log pipe", Path: "fd " + strconv.Itoa(l.logFd), Err: err} return &os.PathError{Op: "close log pipe", Path: "fd " + strconv.Itoa(l.logFd), Err: err}
} }
return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) // Close all file descriptors we are not passing to the container. This is
// necessary because the execve target could use internal runc fds as the
// execve path, potentially giving access to binary files from the host
// (which can then be opened by container processes, leading to container
// escapes). Note that because this operation will close any open file
// descriptors that are referenced by (*os.File) handles from underneath
// the Go runtime, we must not do any file operations after this point
// (otherwise the (*os.File) finaliser could close the wrong file). See
// CVE-2024-21626 for more information as to why this protection is
// necessary.
//
// This is not needed for runc-dmz, because the extra execve(2) step means
// that all O_CLOEXEC file descriptors have already been closed and thus
// the second execve(2) from runc-dmz cannot access internal file
// descriptors from runc.
if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil {
return err
}
return system.Exec(name, l.config.Args[0:], os.Environ())
} }

View File

@ -17,6 +17,7 @@ import (
"github.com/opencontainers/runc/libcontainer/keys" "github.com/opencontainers/runc/libcontainer/keys"
"github.com/opencontainers/runc/libcontainer/seccomp" "github.com/opencontainers/runc/libcontainer/seccomp"
"github.com/opencontainers/runc/libcontainer/system" "github.com/opencontainers/runc/libcontainer/system"
"github.com/opencontainers/runc/libcontainer/utils"
) )
type linuxStandardInit struct { type linuxStandardInit struct {
@ -258,5 +259,23 @@ func (l *linuxStandardInit) Init() error {
return err return err
} }
// Close all file descriptors we are not passing to the container. This is
// necessary because the execve target could use internal runc fds as the
// execve path, potentially giving access to binary files from the host
// (which can then be opened by container processes, leading to container
// escapes). Note that because this operation will close any open file
// descriptors that are referenced by (*os.File) handles from underneath
// the Go runtime, we must not do any file operations after this point
// (otherwise the (*os.File) finaliser could close the wrong file). See
// CVE-2024-21626 for more information as to why this protection is
// necessary.
//
// This is not needed for runc-dmz, because the extra execve(2) step means
// that all O_CLOEXEC file descriptors have already been closed and thus
// the second execve(2) from runc-dmz cannot access internal file
// descriptors from runc.
if err := utils.UnsafeCloseFrom(l.config.PassedFilesCount + 3); err != nil {
return err
}
return system.Exec(name, l.config.Args[0:], os.Environ()) return system.Exec(name, l.config.Args[0:], os.Environ())
} }

View File

@ -7,6 +7,7 @@ import (
"fmt" "fmt"
"os" "os"
"strconv" "strconv"
_ "unsafe" // for go:linkname
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
) )
@ -23,9 +24,11 @@ func EnsureProcHandle(fh *os.File) error {
return nil return nil
} }
// CloseExecFrom applies O_CLOEXEC to all file descriptors currently open for type fdFunc func(fd int)
// the process (except for those below the given fd value).
func CloseExecFrom(minFd int) error { // fdRangeFrom calls the passed fdFunc for each file descriptor that is open in
// the current process.
func fdRangeFrom(minFd int, fn fdFunc) error {
fdDir, err := os.Open("/proc/self/fd") fdDir, err := os.Open("/proc/self/fd")
if err != nil { if err != nil {
return err return err
@ -50,15 +53,60 @@ func CloseExecFrom(minFd int) error {
if fd < minFd { if fd < minFd {
continue continue
} }
// Intentionally ignore errors from unix.CloseOnExec -- the cases where // Ignore the file descriptor we used for readdir, as it will be closed
// this might fail are basically file descriptors that have already // when we return.
// been closed (including and especially the one that was created when if uintptr(fd) == fdDir.Fd() {
// os.ReadDir did the "opendir" syscall). continue
unix.CloseOnExec(fd) }
// Run the closure.
fn(fd)
} }
return nil return nil
} }
// CloseExecFrom sets the O_CLOEXEC flag on all file descriptors greater or
// equal to minFd in the current process.
func CloseExecFrom(minFd int) error {
return fdRangeFrom(minFd, unix.CloseOnExec)
}
//go:linkname runtime_IsPollDescriptor internal/poll.IsPollDescriptor
// In order to make sure we do not close the internal epoll descriptors the Go
// runtime uses, we need to ensure that we skip descriptors that match
// "internal/poll".IsPollDescriptor. Yes, this is a Go runtime internal thing,
// unfortunately there's no other way to be sure we're only keeping the file
// descriptors the Go runtime needs. Hopefully nothing blows up doing this...
func runtime_IsPollDescriptor(fd uintptr) bool //nolint:revive
// UnsafeCloseFrom closes all file descriptors greater or equal to minFd in the
// current process, except for those critical to Go's runtime (such as the
// netpoll management descriptors).
//
// NOTE: That this function is incredibly dangerous to use in most Go code, as
// closing file descriptors from underneath *os.File handles can lead to very
// bad behaviour (the closed file descriptor can be re-used and then any
// *os.File operations would apply to the wrong file). This function is only
// intended to be called from the last stage of runc init.
func UnsafeCloseFrom(minFd int) error {
// We must not close some file descriptors.
return fdRangeFrom(minFd, func(fd int) {
if runtime_IsPollDescriptor(uintptr(fd)) {
// These are the Go runtimes internal netpoll file descriptors.
// These file descriptors are operated on deep in the Go scheduler,
// and closing those files from underneath Go can result in panics.
// There is no issue with keeping them because they are not
// executable and are not useful to an attacker anyway. Also we
// don't have any choice.
return
}
// There's nothing we can do about errors from close(2), and the
// only likely error to be seen is EBADF which indicates the fd was
// already closed (in which case, we got what we wanted).
_ = unix.Close(fd)
})
}
// NewSockPair returns a new unix socket pair // NewSockPair returns a new unix socket pair
func NewSockPair(name string) (parent *os.File, child *os.File, err error) { func NewSockPair(name string) (parent *os.File, child *os.File, err error) {
fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0)

2
vendor/modules.txt vendored
View File

@ -547,7 +547,7 @@ github.com/onsi/gomega/types
# github.com/opencontainers/go-digest v1.0.0 # github.com/opencontainers/go-digest v1.0.0
## explicit; go 1.13 ## explicit; go 1.13
github.com/opencontainers/go-digest github.com/opencontainers/go-digest
# github.com/opencontainers/runc v1.1.11 # github.com/opencontainers/runc v1.1.12
## explicit; go 1.17 ## explicit; go 1.17
github.com/opencontainers/runc/libcontainer github.com/opencontainers/runc/libcontainer
github.com/opencontainers/runc/libcontainer/apparmor github.com/opencontainers/runc/libcontainer/apparmor