From f811026c7773eec059690a7f1390162f81284d36 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Sat, 3 Jul 2021 11:01:13 +0200 Subject: [PATCH 1/7] virtcontainers: Unconditionally create the sandbox cgroup manager Regardless of the sandbox_cgroup_only setting, we create the sandbox cgroup manager and set the sandbox cgroup path at the same time. Without doing this, the hypervisor constraint routine is mostly a NOP as the sandbox state cgroup path is not initialized. Fixes #2184 Signed-off-by: Samuel Ortiz --- src/runtime/virtcontainers/api.go | 4 -- src/runtime/virtcontainers/sandbox.go | 63 ++++++++++++--------------- 2 files changed, 27 insertions(+), 40 deletions(-) diff --git a/src/runtime/virtcontainers/api.go b/src/runtime/virtcontainers/api.go index f8787eac83..5cf85f7040 100644 --- a/src/runtime/virtcontainers/api.go +++ b/src/runtime/virtcontainers/api.go @@ -84,10 +84,6 @@ func createSandboxFromConfig(ctx context.Context, sandboxConfig SandboxConfig, f // Move runtime to sandbox cgroup so all process are created there. if s.config.SandboxCgroupOnly { - if err := s.createCgroupManager(); err != nil { - return nil, err - } - if err := s.setupSandboxCgroup(); err != nil { return nil, err } diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index c4f279da03..8de1ef95b8 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -180,7 +180,7 @@ type Sandbox struct { config *SandboxConfig annotationsLock *sync.RWMutex wg *sync.WaitGroup - cgroupMgr *vccgroups.Manager + sandboxCgroup *vccgroups.Manager cw *consoleWatcher containers map[string]*Container @@ -542,6 +542,11 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor sandboxConfig.HypervisorConfig.EnableVhostUserStore, sandboxConfig.HypervisorConfig.VhostUserStorePath, nil) + // Create the sandbox cgroups + if err := s.createCgroups(); err != nil { + return nil, err + } + // Ignore the error. Restore can fail for a new sandbox if err := s.Restore(); err != nil { s.Logger().WithError(err).Debug("restore sandbox failed") @@ -559,7 +564,7 @@ func newSandbox(ctx context.Context, sandboxConfig SandboxConfig, factory Factor return s, nil } -func (s *Sandbox) createCgroupManager() error { +func (s *Sandbox) createCgroups() error { var err error cgroupPath := "" @@ -632,9 +637,9 @@ func (s *Sandbox) createCgroupManager() error { } } - // Create the cgroup manager, this way it can be used later + // Create the sandbox cgroup, this way it can be used later // to create or detroy cgroups - if s.cgroupMgr, err = vccgroups.New( + if s.sandboxCgroup, err = vccgroups.New( &vccgroups.Config{ Cgroups: s.config.Cgroups, CgroupPaths: s.state.CgroupPaths, @@ -645,6 +650,12 @@ func (s *Sandbox) createCgroupManager() error { return err } + // Now that the cgroup manager is created, we can set the sandbox cgroup root path. + s.state.CgroupPath, err = vccgroups.ValidCgroupPath(cgroupPath, s.config.SystemdCgroup) + if err != nil { + return fmt.Errorf("Invalid cgroup path: %v", err) + } + return nil } @@ -1760,7 +1771,7 @@ func (s *Sandbox) HotplugAddDevice(ctx context.Context, device api.Device, devTy // the device cgroup MUST be updated since the hypervisor // will need access to such device hdev := device.GetHostPath() - if err := s.cgroupMgr.AddDevice(ctx, hdev); err != nil { + if err := s.sandboxCgroup.AddDevice(ctx, hdev); err != nil { s.Logger().WithError(err).WithField("device", hdev). Warn("Could not add device to cgroup") } @@ -1815,7 +1826,7 @@ func (s *Sandbox) HotplugRemoveDevice(ctx context.Context, device api.Device, de // Remove device from cgroup, the hypervisor // should not have access to such device anymore. hdev := device.GetHostPath() - if err := s.cgroupMgr.RemoveDevice(hdev); err != nil { + if err := s.sandboxCgroup.RemoveDevice(hdev); err != nil { s.Logger().WithError(err).WithField("device", hdev). Warn("Could not remove device from cgroup") } @@ -2103,7 +2114,7 @@ func (s *Sandbox) cgroupsUpdate(ctx context.Context) error { return err } - if err := s.cgroupMgr.SetCPUSet(cpuset, memset); err != nil { + if err := s.sandboxCgroup.SetCPUSet(cpuset, memset); err != nil { return err } @@ -2153,8 +2164,8 @@ func (s *Sandbox) cgroupsDelete() error { var path string var cgroupSubsystems cgroups.Hierarchy - if s.config.SandboxCgroupOnly { - return s.cgroupMgr.Destroy() + if err := s.sandboxCgroup.Destroy(); err != nil { + return err } cgroupSubsystems = V1NoConstraints @@ -2334,37 +2345,23 @@ func (s *Sandbox) cpuResources() *specs.LinuxCPU { // setupSandboxCgroup creates and joins sandbox cgroups for the sandbox config func (s *Sandbox) setupSandboxCgroup() error { var err error - spec := s.GetPatchedOCISpec() - if spec == nil { - return errorMissingOCISpec - } - - if spec.Linux == nil { - s.Logger().WithField("sandboxid", s.id).Warning("no cgroup path provided for pod sandbox, not creating sandbox cgroup") - return nil - } - - s.state.CgroupPath, err = vccgroups.ValidCgroupPath(spec.Linux.CgroupsPath, s.config.SystemdCgroup) - if err != nil { - return fmt.Errorf("Invalid cgroup path: %v", err) - } runtimePid := os.Getpid() // Add the runtime to the Kata sandbox cgroup - if err = s.cgroupMgr.Add(runtimePid); err != nil { + if err := s.sandboxCgroup.Add(runtimePid); err != nil { return fmt.Errorf("Could not add runtime PID %d to sandbox cgroup: %v", runtimePid, err) } - // `Apply` updates manager's Cgroups and CgroupPaths, - // they both need to be saved since are used to create - // or restore a cgroup managers. - if s.config.Cgroups, err = s.cgroupMgr.GetCgroups(); err != nil { + // `Apply` updates the sandbox cgroup Cgroups and CgroupPaths, + // they both need to be saved since they are used to create + // or restore the sandbox cgroup. + if s.config.Cgroups, err = s.sandboxCgroup.GetCgroups(); err != nil { return fmt.Errorf("Could not get cgroup configuration: %v", err) } - s.state.CgroupPaths = s.cgroupMgr.GetPaths() + s.state.CgroupPaths = s.sandboxCgroup.GetPaths() - if err = s.cgroupMgr.Apply(); err != nil { + if err := s.sandboxCgroup.Apply(); err != nil { return fmt.Errorf("Could not constrain cgroup: %v", err) } @@ -2457,12 +2454,6 @@ func fetchSandbox(ctx context.Context, sandboxID string) (sandbox *Sandbox, err return nil, fmt.Errorf("failed to create sandbox with config %+v: %v", config, err) } - if sandbox.config.SandboxCgroupOnly { - if err := sandbox.createCgroupManager(); err != nil { - return nil, err - } - } - // This sandbox already exists, we don't need to recreate the containers in the guest. // We only need to fetch the containers from storage and create the container structs. if err := sandbox.fetchContainers(ctx); err != nil { From dc7e9bce73b4818a567b387133694d7ae2ab5eec Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Sat, 3 Jul 2021 15:36:20 +0200 Subject: [PATCH 2/7] virtcontainers: sandbox: Host cgroups partitioning This is a simplification of the host cgroup handling by partitioning the host cgroups into 2: A sandbox cgroup and an overhead cgroup. The sandbox cgroup is always created and initialized. The overhead cgroup is only available when sandbox_cgroup_only is unset, and is unconstrained on all controllers. The goal of having an overhead cgroup is to be more flexible on how we manage a pod overhead. Having such cgroup will allow for setting a fixed overhead per pod, for a subset of controllers, while at the same time not having the pod being accounted for those resources. When sandbox_cgroup_only is not set, we move all non vCPU threads to the overhead cgroup and let them run unconstrained. When it is set, all pod related processes and threads will run in the sandbox cgroup. Signed-off-by: Samuel Ortiz --- src/runtime/virtcontainers/api.go | 8 +- src/runtime/virtcontainers/sandbox.go | 295 +++++++------------------- 2 files changed, 85 insertions(+), 218 deletions(-) diff --git a/src/runtime/virtcontainers/api.go b/src/runtime/virtcontainers/api.go index 5cf85f7040..0f549992b7 100644 --- a/src/runtime/virtcontainers/api.go +++ b/src/runtime/virtcontainers/api.go @@ -82,11 +82,9 @@ func createSandboxFromConfig(ctx context.Context, sandboxConfig SandboxConfig, f } }() - // Move runtime to sandbox cgroup so all process are created there. - if s.config.SandboxCgroupOnly { - if err := s.setupSandboxCgroup(); err != nil { - return nil, err - } + // Set the sandbox host cgroups. + if err := s.setupCgroups(); err != nil { + return nil, err } // Start the VM diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index 8de1ef95b8..de113a699b 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -17,7 +17,6 @@ import ( "os" "os/exec" "path/filepath" - "strings" "sync" "syscall" @@ -66,6 +65,15 @@ const ( mkswapPath = "/sbin/mkswap" rwm = "rwm" + + // When the Kata overhead threads (I/O, VMM, etc) are not + // placed in the sandbox cgroup, they are moved to a specific, + // unconstrained cgroup hierarchy. + // Assuming the cgroup mount point is at /sys/fs/cgroup/, on a + // cgroup v1 system, the Kata overhead memory cgroup will be at + // /sys/fs/cgroup/memory/kata_overhead/$CGPATH where $CGPATH is + // defined by the orchestrator. + cgroupKataOverheadPath = "/kata_overhead/" ) var ( @@ -181,6 +189,7 @@ type Sandbox struct { annotationsLock *sync.RWMutex wg *sync.WaitGroup sandboxCgroup *vccgroups.Manager + overheadCgroup *vccgroups.Manager cw *consoleWatcher containers map[string]*Container @@ -637,8 +646,10 @@ func (s *Sandbox) createCgroups() error { } } - // Create the sandbox cgroup, this way it can be used later - // to create or detroy cgroups + // Create the sandbox cgroup. + // Depending on the SandboxCgroupOnly value, this cgroup + // will either hold all the pod threads (SandboxCgroupOnly is true) + // or only the virtual CPU ones (SandboxCgroupOnly is false). if s.sandboxCgroup, err = vccgroups.New( &vccgroups.Config{ Cgroups: s.config.Cgroups, @@ -650,12 +661,31 @@ func (s *Sandbox) createCgroups() error { return err } - // Now that the cgroup manager is created, we can set the sandbox cgroup root path. + // Now that the sandbox cgroup is created, we can set the state cgroup root path. s.state.CgroupPath, err = vccgroups.ValidCgroupPath(cgroupPath, s.config.SystemdCgroup) if err != nil { return fmt.Errorf("Invalid cgroup path: %v", err) } + if s.config.SandboxCgroupOnly { + s.overheadCgroup = nil + } else { + // The shim configuration is requesting that we do not put all threads + // into the sandbox cgroup. + // We're creating an overhead cgroup, with no constraints. Everything but + // the vCPU threads will eventually make it there. + if s.overheadCgroup, err = vccgroups.New( + &vccgroups.Config{ + Cgroups: nil, + CgroupPaths: nil, + Resources: specs.LinuxResources{}, + CgroupPath: cgroupKataOverheadPath, + }, + ); err != nil { + return err + } + } + return nil } @@ -2099,54 +2129,28 @@ func (s *Sandbox) GetHypervisorType() string { return string(s.config.HypervisorType) } -// cgroupsUpdate will: -// 1) get the v1constraints cgroup associated with the stored cgroup path -// 2) (re-)add hypervisor vCPU threads to the appropriate cgroup -// 3) If we are managing sandbox cgroup, update the v1constraints cgroup size +// cgroupsUpdate updates the sandbox cpuset cgroup subsystem. +// Also, if the sandbox has an overhead cgroup, it updates the hypervisor +// constraints by moving the potentially new vCPU threads back to the sandbox +// cgroup. func (s *Sandbox) cgroupsUpdate(ctx context.Context) error { - - // If Kata is configured for SandboxCgroupOnly, the VMM and its processes are already - // in the Kata sandbox cgroup (inherited). Check to see if sandbox cpuset needs to be - // updated. - if s.config.SandboxCgroupOnly { - cpuset, memset, err := s.getSandboxCPUSet() - if err != nil { - return err - } - - if err := s.sandboxCgroup.SetCPUSet(cpuset, memset); err != nil { - return err - } - - return nil - } - - if s.state.CgroupPath == "" { - s.Logger().Warn("sandbox's cgroup won't be updated: cgroup path is empty") - return nil - } - - cgroup, err := cgroupsLoadFunc(V1Constraints, cgroups.StaticPath(s.state.CgroupPath)) - if err != nil { - return fmt.Errorf("Could not load cgroup %v: %v", s.state.CgroupPath, err) - } - - if err := s.constrainHypervisor(ctx, cgroup); err != nil { - return err - } - - if len(s.containers) <= 1 { - // nothing to update - return nil - } - - resources, err := s.resources() + cpuset, memset, err := s.getSandboxCPUSet() if err != nil { return err } - if err := cgroup.Update(&resources); err != nil { - return fmt.Errorf("Could not update sandbox cgroup path='%v' error='%v'", s.state.CgroupPath, err) + // We update the sandbox cgroup with potentially new virtual CPUs. + if err := s.sandboxCgroup.SetCPUSet(cpuset, memset); err != nil { + return err + } + + if s.overheadCgroup != nil { + // If we have an overhead cgroup, new vCPU threads would start there, + // as being children of the VMM PID. + // We need to constrain them by moving them into the sandbox cgroup. + if err := s.constrainHypervisor(ctx); err != nil { + return err + } } return nil @@ -2161,110 +2165,29 @@ func (s *Sandbox) cgroupsDelete() error { return nil } - var path string - var cgroupSubsystems cgroups.Hierarchy + if s.overheadCgroup != nil { + if err := s.overheadCgroup.Destroy(); err != nil { + return err + } + } if err := s.sandboxCgroup.Destroy(); err != nil { return err } - cgroupSubsystems = V1NoConstraints - path = cgroupNoConstraintsPath(s.state.CgroupPath) - s.Logger().WithField("path", path).Debug("Deleting no constraints cgroup") - - sandboxCgroups, err := cgroupsLoadFunc(cgroupSubsystems, cgroups.StaticPath(path)) - if err == cgroups.ErrCgroupDeleted { - // cgroup already deleted - s.Logger().Warnf("cgroup already deleted: '%s'", err) - return nil - } - - if err != nil { - return fmt.Errorf("Could not load cgroups %v: %v", path, err) - } - - // move running process here, that way cgroup can be removed - parent, err := parentCgroup(cgroupSubsystems, path) - if err != nil { - // parent cgroup doesn't exist, that means there are no process running - // and the no constraints cgroup was removed. - s.Logger().WithError(err).Warn("Parent cgroup doesn't exist") - return nil - } - - if err := sandboxCgroups.MoveTo(parent); err != nil { - // Don't fail, cgroup can be deleted - s.Logger().WithError(err).Warnf("Could not move process from %s to parent cgroup", path) - } - - return sandboxCgroups.Delete() + return nil } // constrainHypervisor will place the VMM and vCPU threads into cgroups. -func (s *Sandbox) constrainHypervisor(ctx context.Context, cgroup cgroups.Cgroup) error { - // VMM threads are only placed into the constrained cgroup if SandboxCgroupOnly is being set. - // This is the "correct" behavior, but if the parent cgroup isn't set up correctly to take - // Kata/VMM into account, Kata may fail to boot due to being overconstrained. - // If !SandboxCgroupOnly, place the VMM into an unconstrained cgroup, and the vCPU threads into constrained - // cgroup - if s.config.SandboxCgroupOnly { - // Kata components were moved into the sandbox-cgroup already, so VMM - // will already land there as well. No need to take action - return nil - } - - pids := s.hypervisor.getPids() - if len(pids) == 0 || pids[0] == 0 { - return fmt.Errorf("Invalid hypervisor PID: %+v", pids) - } - - // VMM threads are only placed into the constrained cgroup if SandboxCgroupOnly is being set. - // This is the "correct" behavior, but if the parent cgroup isn't set up correctly to take - // Kata/VMM into account, Kata may fail to boot due to being overconstrained. - // If !SandboxCgroupOnly, place the VMM into an unconstrained cgroup, and the vCPU threads into constrained - // cgroup - // Move the VMM into cgroups without constraints, those cgroups are not yet supported. - resources := &specs.LinuxResources{} - path := cgroupNoConstraintsPath(s.state.CgroupPath) - vmmCgroup, err := cgroupsNewFunc(V1NoConstraints, cgroups.StaticPath(path), resources) - if err != nil { - return fmt.Errorf("Could not create cgroup %v: %v", path, err) - } - - for _, pid := range pids { - if pid <= 0 { - s.Logger().Warnf("Invalid hypervisor pid: %d", pid) - continue - } - - if err := vmmCgroup.Add(cgroups.Process{Pid: pid}); err != nil { - return fmt.Errorf("Could not add hypervisor PID %d to cgroup: %v", pid, err) - } - } - - // when new container joins, new CPU could be hotplugged, so we - // have to query fresh vcpu info from hypervisor every time. +func (s *Sandbox) constrainHypervisor(ctx context.Context) error { tids, err := s.hypervisor.getThreadIDs(ctx) if err != nil { return fmt.Errorf("failed to get thread ids from hypervisor: %v", err) } - if len(tids.vcpus) == 0 { - // If there's no tid returned from the hypervisor, this is not - // a bug. It simply means there is nothing to constrain, hence - // let's return without any error from here. - return nil - } - // Move vcpus (threads) into cgroups with constraints. - // Move whole hypervisor process would be easier but the IO/network performance - // would be over-constrained. + // All vCPU threads move to the sandbox cgroup. for _, i := range tids.vcpus { - // In contrast, AddTask will write thread id to `tasks` - // After this, vcpu threads are in "vcpu" sub-cgroup, other threads in - // qemu will be left in parent cgroup untouched. - if err := cgroup.AddTask(cgroups.Process{ - Pid: i, - }); err != nil { + if err := s.sandboxCgroup.Add(i); err != nil { return err } } @@ -2272,99 +2195,45 @@ func (s *Sandbox) constrainHypervisor(ctx context.Context, cgroup cgroups.Cgroup return nil } -func (s *Sandbox) resources() (specs.LinuxResources, error) { - resources := specs.LinuxResources{ - CPU: s.cpuResources(), - } - - return resources, nil -} - -func (s *Sandbox) cpuResources() *specs.LinuxCPU { - // Use default period and quota if they are not specified. - // Container will inherit the constraints from its parent. - quota := int64(0) - period := uint64(0) - shares := uint64(0) - realtimePeriod := uint64(0) - realtimeRuntime := int64(0) - - cpu := &specs.LinuxCPU{ - Quota: "a, - Period: &period, - Shares: &shares, - RealtimePeriod: &realtimePeriod, - RealtimeRuntime: &realtimeRuntime, - } - - for _, c := range s.containers { - ann := c.GetAnnotations() - if ann[annotations.ContainerTypeKey] == string(PodSandbox) { - // skip sandbox container - continue - } - - if c.config.Resources.CPU == nil { - continue - } - - if c.config.Resources.CPU.Shares != nil { - shares = uint64(math.Max(float64(*c.config.Resources.CPU.Shares), float64(shares))) - } - - if c.config.Resources.CPU.Quota != nil { - quota += *c.config.Resources.CPU.Quota - } - - if c.config.Resources.CPU.Period != nil { - period = uint64(math.Max(float64(*c.config.Resources.CPU.Period), float64(period))) - } - - if c.config.Resources.CPU.Cpus != "" { - cpu.Cpus += c.config.Resources.CPU.Cpus + "," - } - - if c.config.Resources.CPU.RealtimeRuntime != nil { - realtimeRuntime += *c.config.Resources.CPU.RealtimeRuntime - } - - if c.config.Resources.CPU.RealtimePeriod != nil { - realtimePeriod += *c.config.Resources.CPU.RealtimePeriod - } - - if c.config.Resources.CPU.Mems != "" { - cpu.Mems += c.config.Resources.CPU.Mems + "," - } - } - - cpu.Cpus = strings.Trim(cpu.Cpus, " \n\t,") - - return validCPUResources(cpu) -} - -// setupSandboxCgroup creates and joins sandbox cgroups for the sandbox config -func (s *Sandbox) setupSandboxCgroup() error { +// setupCgroups adds the runtime process to either the sandbox cgroup or the overhead one, +// depending on the sandbox_cgroup_only configuration setting. +func (s *Sandbox) setupCgroups() error { var err error + vmmCgroup := s.sandboxCgroup + if s.overheadCgroup != nil { + vmmCgroup = s.overheadCgroup + } + + // By adding the runtime process to either the sandbox or overhead cgroup, we are making + // sure that any child process of the runtime (i.e. *all* processes serving a Kata pod) + // will initially live in this cgroup. Depending on the sandbox_cgroup settings, we will + // then move the vCPU threads between cgroups. runtimePid := os.Getpid() - // Add the runtime to the Kata sandbox cgroup - if err := s.sandboxCgroup.Add(runtimePid); err != nil { + // Add the runtime to the VMM sandbox cgroup + if err := vmmCgroup.Add(runtimePid); err != nil { return fmt.Errorf("Could not add runtime PID %d to sandbox cgroup: %v", runtimePid, err) } // `Apply` updates the sandbox cgroup Cgroups and CgroupPaths, // they both need to be saved since they are used to create // or restore the sandbox cgroup. - if s.config.Cgroups, err = s.sandboxCgroup.GetCgroups(); err != nil { + if s.config.Cgroups, err = vmmCgroup.GetCgroups(); err != nil { return fmt.Errorf("Could not get cgroup configuration: %v", err) } - s.state.CgroupPaths = s.sandboxCgroup.GetPaths() + s.state.CgroupPaths = vmmCgroup.GetPaths() if err := s.sandboxCgroup.Apply(); err != nil { return fmt.Errorf("Could not constrain cgroup: %v", err) } + if s.overheadCgroup != nil { + if err = s.overheadCgroup.Apply(); err != nil { + return fmt.Errorf("Could not constrain cgroup: %v", err) + } + } + return nil } From f17752b0dca620fc7be75420cd23fc716042f175 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Sat, 3 Jul 2021 16:04:05 +0200 Subject: [PATCH 3/7] virtcontainers: container: Do not create and manage container host cgroups The only process we are adding there is the container host one, and there is no such thing anymore. Signed-off-by: Samuel Ortiz --- src/runtime/virtcontainers/container.go | 140 ------------------------ 1 file changed, 140 deletions(-) diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index 8525aa105a..85144e9626 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -22,13 +22,10 @@ import ( "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/device/manager" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc" vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" - vccgroups "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cgroups" - "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless" vcTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/types" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/utils" - "github.com/containerd/cgroups" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -404,14 +401,6 @@ func (c *Container) GetPatchedOCISpec() *specs.Spec { return c.config.CustomSpec } -// storeContainer stores a container config. -func (c *Container) storeContainer() error { - if err := c.sandbox.Save(); err != nil { - return err - } - return nil -} - // setContainerState sets both the in-memory and on-disk state of the // container. func (c *Container) setContainerState(state types.StateString) error { @@ -954,12 +943,6 @@ func (c *Container) create(ctx context.Context) (err error) { } } - if !rootless.IsRootless() && !c.sandbox.config.SandboxCgroupOnly { - if err = c.cgroupsCreate(); err != nil { - return - } - } - if err = c.setContainerState(types.StateReady); err != nil { return } @@ -978,13 +961,6 @@ func (c *Container) delete(ctx context.Context) error { return err } - // If running rootless, there are no cgroups to remove - if !c.sandbox.config.SandboxCgroupOnly || !rootless.IsRootless() { - if err := c.cgroupsDelete(); err != nil { - return err - } - } - return c.sandbox.storeSandbox(ctx) } @@ -1228,12 +1204,6 @@ func (c *Container) update(ctx context.Context, resources specs.LinuxResources) return err } - if !c.sandbox.config.SandboxCgroupOnly { - if err := c.cgroupsUpdate(resources); err != nil { - return err - } - } - // There currently isn't a notion of cpusets.cpus or mems being tracked // inside of the guest. Make sure we clear these before asking agent to update // the container's cgroups. @@ -1443,113 +1413,3 @@ func (c *Container) detachDevices(ctx context.Context) error { } return nil } - -// cgroupsCreate creates cgroups on the host for the associated container -func (c *Container) cgroupsCreate() (err error) { - spec := c.GetPatchedOCISpec() - if spec == nil { - return errorMissingOCISpec - } - - // https://github.com/kata-containers/runtime/issues/168 - resources := specs.LinuxResources{ - CPU: nil, - } - - if spec.Linux != nil && spec.Linux.Resources != nil { - resources.CPU = validCPUResources(spec.Linux.Resources.CPU) - } - - c.state.CgroupPath, err = vccgroups.ValidCgroupPath(spec.Linux.CgroupsPath, c.sandbox.config.SystemdCgroup) - if err != nil { - return fmt.Errorf("Invalid cgroup path: %v", err) - } - - cgroup, err := cgroupsNewFunc(cgroups.V1, - cgroups.StaticPath(c.state.CgroupPath), &resources) - if err != nil { - return fmt.Errorf("Could not create cgroup for %v: %v", c.state.CgroupPath, err) - } - - // Add shim into cgroup - if c.process.Pid > 0 { - if err := cgroup.Add(cgroups.Process{Pid: c.process.Pid}); err != nil { - return fmt.Errorf("Could not add PID %d to cgroup %v: %v", c.process.Pid, spec.Linux.CgroupsPath, err) - } - } - - return nil -} - -// cgroupsDelete deletes the cgroups on the host for the associated container -func (c *Container) cgroupsDelete() error { - - if c.state.CgroupPath == "" { - c.Logger().Debug("container does not have host cgroups: nothing to update") - return nil - } - - cgroup, err := cgroupsLoadFunc(cgroups.V1, - cgroups.StaticPath(c.state.CgroupPath)) - - if err == cgroups.ErrCgroupDeleted { - // cgroup already deleted - return nil - } - - if err != nil { - return fmt.Errorf("Could not load container cgroup %v: %v", c.state.CgroupPath, err) - } - - // move running process here, that way cgroup can be removed - parent, err := parentCgroup(cgroups.V1, c.state.CgroupPath) - if err != nil { - // parent cgroup doesn't exist, that means there are no process running - // and the container cgroup was removed. - c.Logger().WithError(err).Warn("Container cgroup doesn't exist") - return nil - } - - if err := cgroup.MoveTo(parent); err != nil { - // Don't fail, cgroup can be deleted - c.Logger().WithError(err).Warn("Could not move container process into parent cgroup") - } - - if err := cgroup.Delete(); err != nil { - return fmt.Errorf("Could not delete container cgroup path='%v': error='%v'", c.state.CgroupPath, err) - } - - return nil -} - -// cgroupsUpdate updates cgroups on the host for the associated container -func (c *Container) cgroupsUpdate(resources specs.LinuxResources) error { - - if c.state.CgroupPath == "" { - c.Logger().Debug("container does not have host cgroups: nothing to update") - return nil - } - cgroup, err := cgroupsLoadFunc(cgroups.V1, - cgroups.StaticPath(c.state.CgroupPath)) - if err != nil { - return fmt.Errorf("Could not load cgroup %v: %v", c.state.CgroupPath, err) - } - - // Issue: https://github.com/kata-containers/runtime/issues/168 - r := specs.LinuxResources{ - CPU: validCPUResources(resources.CPU), - } - - // update cgroup - if err := cgroup.Update(&r); err != nil { - return fmt.Errorf("Could not update container cgroup path='%v': error='%v'", c.state.CgroupPath, err) - } - - // store new resources - c.config.Resources = r - if err := c.storeContainer(); err != nil { - return err - } - - return nil -} From b42ed39349bceae80d54c518be79bda31026a55d Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 19 Jul 2021 16:57:28 +0200 Subject: [PATCH 4/7] virtcontainers: cgroups: Add a containerd API based cgroups package Eventually, we will convert the virtcontainers and the whole Kata runtime code base to only rely on that package. This will make Kata only depends on the simpler containerd cgroups API. Signed-off-by: Samuel Ortiz --- .../virtcontainers/pkg/cgroups/cgroups.go | 289 ++++++++++++++++++ 1 file changed, 289 insertions(+) create mode 100644 src/runtime/virtcontainers/pkg/cgroups/cgroups.go diff --git a/src/runtime/virtcontainers/pkg/cgroups/cgroups.go b/src/runtime/virtcontainers/pkg/cgroups/cgroups.go new file mode 100644 index 0000000000..a138be092f --- /dev/null +++ b/src/runtime/virtcontainers/pkg/cgroups/cgroups.go @@ -0,0 +1,289 @@ +// Copyright (c) 2021 Apple Inc. +// +// SPDX-License-Identifier: Apache-2.0 +// + +package cgroups + +import ( + "path/filepath" + "sync" + + "github.com/containerd/cgroups" + v1 "github.com/containerd/cgroups/stats/v1" + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/sirupsen/logrus" + + "golang.org/x/sys/unix" +) + +type Cgroup struct { + cgroup cgroups.Cgroup + path string + cpusets *specs.LinuxCPU + devices []specs.LinuxDeviceCgroup + + sync.Mutex +} + +func deviceToDeviceCgroup(device string) (*specs.LinuxDeviceCgroup, error) { + var st unix.Stat_t + + if err := unix.Stat(device, &st); err != nil { + return nil, err + } + + devType := "" + switch st.Mode & unix.S_IFMT { + case unix.S_IFCHR: + devType = "c" + case unix.S_IFBLK: + devType = "b" + } + + major := int64(unix.Major(st.Rdev)) + minor := int64(unix.Minor(st.Rdev)) + + return &specs.LinuxDeviceCgroup{ + Allow: true, + Type: devType, + Major: &major, + Minor: &minor, + Access: "rwm", + }, nil +} + +func sandboxDevices() []specs.LinuxDeviceCgroup { + devices := []specs.LinuxDeviceCgroup{} + + defaultDevices := []string{ + "/dev/null", + "/dev/random", + "/dev/full", + "/dev/tty", + "/dev/zero", + "/dev/urandom", + "/dev/console", + } + + // Processes running in a device-cgroup are constrained, they have acccess + // only to the devices listed in the devices.list file. + // In order to run Virtual Machines and create virtqueues, hypervisors + // need access to certain character devices in the host, like kvm and vhost-net. + hypervisorDevices := []string{ + "/dev/kvm", // To run virtual machines + "/dev/vhost-net", // To create virtqueues + "/dev/vfio/vfio", // To access VFIO devices + } + + defaultDevices = append(defaultDevices, hypervisorDevices...) + + for _, device := range defaultDevices { + ldevice, err := deviceToDeviceCgroup(device) + if err != nil { + cgroupsLogger.WithField("source", "cgroups").Warnf("Could not add %s to the devices cgroup", device) + continue + } + devices = append(devices, *ldevice) + } + + wildcardMajor := int64(-1) + wildcardMinor := int64(-1) + ptsMajor := int64(136) + tunMajor := int64(10) + tunMinor := int64(200) + + wildcardDevices := []specs.LinuxDeviceCgroup{ + // allow mknod for any device + { + Allow: true, + Type: "c", + Major: &wildcardMajor, + Minor: &wildcardMinor, + Access: "m", + }, + { + Allow: true, + Type: "b", + Major: &wildcardMajor, + Minor: &wildcardMinor, + Access: "m", + }, + // /dev/pts/ - pts namespaces are "coming soon" + { + Allow: true, + Type: "c", + Major: &ptsMajor, + Minor: &wildcardMinor, + Access: "rwm", + }, + // tuntap + { + Allow: true, + Type: "c", + Major: &tunMajor, + Minor: &tunMinor, + Access: "rwm", + }, + } + + devices = append(devices, wildcardDevices...) + + return devices +} + +func NewCgroup(path string, resources *specs.LinuxResources) (*Cgroup, error) { + var err error + + cgroupPath, err := ValidCgroupPath(path, IsSystemdCgroup(path)) + if err != nil { + return nil, err + } + + cgroup, err := cgroups.New(cgroups.V1, cgroups.StaticPath(cgroupPath), resources) + if err != nil { + return nil, err + } + + return &Cgroup{ + path: cgroupPath, + devices: resources.Devices, + cpusets: resources.CPU, + cgroup: cgroup, + }, nil +} + +func NewSandboxCgroup(path string, resources *specs.LinuxResources) (*Cgroup, error) { + sandboxResources := *resources + sandboxResources.Devices = append(sandboxResources.Devices, sandboxDevices()...) + + return NewCgroup(path, &sandboxResources) +} + +func Load(path string) (*Cgroup, error) { + cgroup, err := cgroups.Load(cgroups.V1, cgroups.StaticPath(path)) + if err != nil { + return nil, err + } + + return &Cgroup{ + path: path, + cgroup: cgroup, + }, nil +} + +func (c *Cgroup) Logger() *logrus.Entry { + return cgroupsLogger.WithField("source", "cgroups") +} + +func (c *Cgroup) Delete() error { + return c.cgroup.Delete() +} + +func (c *Cgroup) Stat() (*v1.Metrics, error) { + return c.cgroup.Stat(cgroups.ErrorHandler(cgroups.IgnoreNotExist)) +} + +func (c *Cgroup) AddProcess(pid int, subsystems ...string) error { + return c.cgroup.Add(cgroups.Process{Pid: pid}) +} + +func (c *Cgroup) AddTask(pid int, subsystems ...string) error { + return c.cgroup.AddTask(cgroups.Process{Pid: pid}) +} + +func (c *Cgroup) Update(resources *specs.LinuxResources) error { + return c.cgroup.Update(resources) +} + +func (c *Cgroup) MoveTo(path string) error { + newCgroup, err := cgroups.Load(cgroups.V1, cgroups.StaticPath(path)) + if err != nil { + return err + } + + return c.cgroup.MoveTo(newCgroup) +} + +func (c *Cgroup) MoveToParent() error { + parentPath := filepath.Dir(c.path) + + return c.MoveTo(parentPath) +} + +func (c *Cgroup) AddDevice(deviceHostPath string) error { + deviceResource, err := DeviceToLinuxDevice(deviceHostPath) + if err != nil { + return err + } + + c.Lock() + defer c.Unlock() + + c.devices = append(c.devices, deviceResource) + + if err := c.cgroup.Update(&specs.LinuxResources{ + Devices: c.devices, + }); err != nil { + return err + } + + return nil +} + +func (c *Cgroup) RemoveDevice(deviceHostPath string) error { + deviceResource, err := DeviceToLinuxDevice(deviceHostPath) + if err != nil { + return err + } + + c.Lock() + defer c.Unlock() + + for i, d := range c.devices { + if d.Type == deviceResource.Type && + d.Major == deviceResource.Major && + d.Minor == deviceResource.Minor { + c.devices = append(c.devices[:i], c.devices[i+1:]...) + } + } + + if err := c.cgroup.Update(&specs.LinuxResources{ + Devices: c.devices, + }); err != nil { + return err + } + + return nil +} + +func (c *Cgroup) UpdateCpuSet(cpuset, memset string) error { + c.Lock() + defer c.Unlock() + + if len(cpuset) > 0 { + // If we didn't have a cpuset defined, let's create: + if c.cpusets == nil { + c.cpusets = &specs.LinuxCPU{} + } + + c.cpusets.Cpus = cpuset + } + + if len(memset) > 0 { + // If we didn't have a cpuset defined, let's now create: + if c.cpusets == nil { + c.cpusets = &specs.LinuxCPU{} + } + + c.cpusets.Mems = memset + } + + return c.cgroup.Update(&specs.LinuxResources{ + CPU: c.cpusets, + }) +} + +func (c *Cgroup) Path() string { + return c.path +} From 9bed2ade0f7502b49e52228ee92dd934cee3daa3 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Thu, 8 Jul 2021 11:36:29 +0200 Subject: [PATCH 5/7] virtcontainers: Convert to the new cgroups package API The new API is based on containerd's cgroups package. With that conversion we can simpligy the virtcontainers sandbox code and also uniformize our cgroups external API dependency. We now only depend on containerd/cgroups for everything cgroups related. Depends-on: github.com/kata-containers/tests#3805 Signed-off-by: Samuel Ortiz Signed-off-by: Eric Ernst --- src/runtime/cli/kata-check.go | 6 - src/runtime/virtcontainers/api_test.go | 15 +- src/runtime/virtcontainers/cgroups.go | 167 -------- src/runtime/virtcontainers/cgroups_test.go | 207 ---------- src/runtime/virtcontainers/persist.go | 10 +- .../virtcontainers/persist/api/sandbox.go | 9 +- .../virtcontainers/pkg/cgroups/cgroups.go | 11 + .../virtcontainers/pkg/cgroups/manager.go | 355 ------------------ .../pkg/cgroups/manager_test.go | 38 -- .../virtcontainers/pkg/cgroups/utils.go | 4 +- .../virtcontainers/pkg/cgroups/utils_test.go | 8 +- src/runtime/virtcontainers/sandbox.go | 159 +++----- src/runtime/virtcontainers/sandbox_test.go | 41 +- src/runtime/virtcontainers/types/sandbox.go | 12 +- 14 files changed, 127 insertions(+), 915 deletions(-) delete mode 100644 src/runtime/virtcontainers/cgroups.go delete mode 100644 src/runtime/virtcontainers/cgroups_test.go delete mode 100644 src/runtime/virtcontainers/pkg/cgroups/manager.go delete mode 100644 src/runtime/virtcontainers/pkg/cgroups/manager_test.go diff --git a/src/runtime/cli/kata-check.go b/src/runtime/cli/kata-check.go index b720fa74de..076fb0525d 100644 --- a/src/runtime/cli/kata-check.go +++ b/src/runtime/cli/kata-check.go @@ -25,7 +25,6 @@ import ( "strings" "syscall" - "github.com/containerd/cgroups" "github.com/kata-containers/kata-containers/src/runtime/pkg/katautils" vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/oci" @@ -415,11 +414,6 @@ EXAMPLES: return errors.New("check: cannot determine runtime config") } - // check if cgroup can work use the same logic for creating containers - if _, err := vc.V1Constraints(); err != nil && err == cgroups.ErrMountPointNotExist && !runtimeConfig.SandboxCgroupOnly { - return fmt.Errorf("Cgroup v2 requires the following configuration: `sandbox_cgroup_only=true`.") - } - err := setCPUtype(runtimeConfig.HypervisorType) if err != nil { return err diff --git a/src/runtime/virtcontainers/api_test.go b/src/runtime/virtcontainers/api_test.go index 8e0f0e86f2..d3999927f0 100644 --- a/src/runtime/virtcontainers/api_test.go +++ b/src/runtime/virtcontainers/api_test.go @@ -130,8 +130,11 @@ func newTestSandboxConfigKataAgent() SandboxConfig { } func TestCreateSandboxNoopAgentSuccessful(t *testing.T) { - defer cleanUp() assert := assert.New(t) + if tc.NotValid(ktu.NeedRoot()) { + t.Skip(testDisabledAsNonRoot) + } + defer cleanUp() config := newTestSandboxConfigNoop() @@ -181,6 +184,9 @@ func TestCreateSandboxKataAgentSuccessful(t *testing.T) { } func TestCreateSandboxFailing(t *testing.T) { + if tc.NotValid(ktu.NeedRoot()) { + t.Skip(testDisabledAsNonRoot) + } defer cleanUp() assert := assert.New(t) @@ -240,6 +246,9 @@ func createAndStartSandbox(ctx context.Context, config SandboxConfig) (sandbox V } func TestReleaseSandbox(t *testing.T) { + if tc.NotValid(ktu.NeedRoot()) { + t.Skip(testDisabledAsNonRoot) + } defer cleanUp() config := newTestSandboxConfigNoop() @@ -254,6 +263,10 @@ func TestReleaseSandbox(t *testing.T) { } func TestCleanupContainer(t *testing.T) { + if tc.NotValid(ktu.NeedRoot()) { + t.Skip(testDisabledAsNonRoot) + } + config := newTestSandboxConfigNoop() assert := assert.New(t) diff --git a/src/runtime/virtcontainers/cgroups.go b/src/runtime/virtcontainers/cgroups.go deleted file mode 100644 index eeaf095da9..0000000000 --- a/src/runtime/virtcontainers/cgroups.go +++ /dev/null @@ -1,167 +0,0 @@ -// Copyright (c) 2018 Huawei Corporation -// Copyright (c) 2019 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -// - -package virtcontainers - -import ( - "bufio" - "fmt" - "os" - "path/filepath" - "strings" - - "github.com/containerd/cgroups" - specs "github.com/opencontainers/runtime-spec/specs-go" -) - -type cgroupPather interface { - cgroups.Subsystem - Path(path string) string -} - -// unconstrained cgroups are placed here. -// for example /sys/fs/cgroup/memory/kata/$CGPATH -// where path is defined by the containers manager -const cgroupKataPath = "/kata/" - -var cgroupsLoadFunc = cgroups.Load -var cgroupsNewFunc = cgroups.New - -// V1Constraints returns the cgroups that are compatible with the VC architecture -// and hypervisor, constraints can be applied to these cgroups. -func V1Constraints() ([]cgroups.Subsystem, error) { - root, err := cgroupV1MountPoint() - if err != nil { - return nil, err - } - subsystems := []cgroups.Subsystem{ - cgroups.NewCpuset(root), - cgroups.NewCpu(root), - cgroups.NewCpuacct(root), - } - return cgroupsSubsystems(subsystems) -} - -// V1NoConstraints returns the cgroups that are *not* compatible with the VC -// architecture and hypervisor, constraints MUST NOT be applied to these cgroups. -func V1NoConstraints() ([]cgroups.Subsystem, error) { - root, err := cgroupV1MountPoint() - if err != nil { - return nil, err - } - subsystems := []cgroups.Subsystem{ - // Some constainers managers, like k8s, take the control of cgroups. - // k8s: the memory cgroup for the dns containers is small to place - // a hypervisor there. - cgroups.NewMemory(root), - } - return cgroupsSubsystems(subsystems) -} - -func cgroupsSubsystems(subsystems []cgroups.Subsystem) ([]cgroups.Subsystem, error) { - var enabled []cgroups.Subsystem - for _, s := range cgroupPathers(subsystems) { - // check and remove the default groups that do not exist - if _, err := os.Lstat(s.Path("/")); err == nil { - enabled = append(enabled, s) - } - } - return enabled, nil -} - -func cgroupPathers(subystems []cgroups.Subsystem) []cgroupPather { - var out []cgroupPather - for _, s := range subystems { - if p, ok := s.(cgroupPather); ok { - out = append(out, p) - } - } - return out -} - -// v1MountPoint returns the mount point where the cgroup -// mountpoints are mounted in a single hiearchy -func cgroupV1MountPoint() (string, error) { - f, err := os.Open("/proc/self/mountinfo") - if err != nil { - return "", err - } - defer f.Close() - scanner := bufio.NewScanner(f) - for scanner.Scan() { - if err := scanner.Err(); err != nil { - return "", err - } - var ( - text = scanner.Text() - fields = strings.Split(text, " ") - // safe as mountinfo encodes mountpoints with spaces as \040. - index = strings.Index(text, " - ") - postSeparatorFields = strings.Fields(text[index+3:]) - numPostFields = len(postSeparatorFields) - ) - // this is an error as we can't detect if the mount is for "cgroup" - if numPostFields == 0 { - return "", fmt.Errorf("Found no fields post '-' in %q", text) - } - if postSeparatorFields[0] == "cgroup" { - // check that the mount is properly formated. - if numPostFields < 3 { - return "", fmt.Errorf("Error found less than 3 fields post '-' in %q", text) - } - return filepath.Dir(fields[4]), nil - } - } - return "", cgroups.ErrMountPointNotExist -} - -func cgroupNoConstraintsPath(path string) string { - return filepath.Join(cgroupKataPath, path) -} - -// return the parent cgroup for the given path -func parentCgroup(hierarchy cgroups.Hierarchy, path string) (cgroups.Cgroup, error) { - // append '/' just in case CgroupsPath doesn't start with it - parent := filepath.Dir("/" + path) - - parentCgroup, err := cgroupsLoadFunc(hierarchy, - cgroups.StaticPath(parent)) - if err != nil { - return nil, fmt.Errorf("Could not load parent cgroup %v: %v", parent, err) - } - - return parentCgroup, nil -} - -// validCPUResources checks CPU resources coherency -func validCPUResources(cpuSpec *specs.LinuxCPU) *specs.LinuxCPU { - if cpuSpec == nil { - return nil - } - - cpu := *cpuSpec - if cpu.Period != nil && *cpu.Period < 1 { - cpu.Period = nil - } - - if cpu.Quota != nil && *cpu.Quota < 1 { - cpu.Quota = nil - } - - if cpu.Shares != nil && *cpu.Shares < 1 { - cpu.Shares = nil - } - - if cpu.RealtimePeriod != nil && *cpu.RealtimePeriod < 1 { - cpu.RealtimePeriod = nil - } - - if cpu.RealtimeRuntime != nil && *cpu.RealtimeRuntime < 1 { - cpu.RealtimeRuntime = nil - } - - return &cpu -} diff --git a/src/runtime/virtcontainers/cgroups_test.go b/src/runtime/virtcontainers/cgroups_test.go deleted file mode 100644 index 582e93fc99..0000000000 --- a/src/runtime/virtcontainers/cgroups_test.go +++ /dev/null @@ -1,207 +0,0 @@ -// Copyright (c) 2018 Huawei Corporation -// -// SPDX-License-Identifier: Apache-2.0 -// - -package virtcontainers - -import ( - "context" - "fmt" - "os" - "os/exec" - "path/filepath" - "testing" - - "github.com/containerd/cgroups" - cgroupsstatsv1 "github.com/containerd/cgroups/stats/v1" - "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/types" - specs "github.com/opencontainers/runtime-spec/specs-go" - "github.com/stretchr/testify/assert" -) - -type mockCgroup struct { -} - -func (m *mockCgroup) New(string, *specs.LinuxResources) (cgroups.Cgroup, error) { - return &mockCgroup{}, nil -} -func (m *mockCgroup) Add(cgroups.Process) error { - return nil -} - -func (m *mockCgroup) AddTask(cgroups.Process) error { - return nil -} - -func (m *mockCgroup) Delete() error { - return nil -} - -func (m *mockCgroup) MoveTo(cgroups.Cgroup) error { - return nil -} - -func (m *mockCgroup) Stat(...cgroups.ErrorHandler) (*cgroupsstatsv1.Metrics, error) { - return &cgroupsstatsv1.Metrics{}, nil -} - -func (m *mockCgroup) Update(resources *specs.LinuxResources) error { - return nil -} - -func (m *mockCgroup) Processes(cgroups.Name, bool) ([]cgroups.Process, error) { - return nil, nil -} - -func (m *mockCgroup) Freeze() error { - return nil -} - -func (m *mockCgroup) Thaw() error { - return nil -} - -func (m *mockCgroup) OOMEventFD() (uintptr, error) { - return 0, nil -} - -func (m *mockCgroup) RegisterMemoryEvent(event cgroups.MemoryEvent) (uintptr, error) { - return 0, nil -} - -func (m *mockCgroup) State() cgroups.State { - return "" -} - -func (m *mockCgroup) Subsystems() []cgroups.Subsystem { - return nil -} - -func (m *mockCgroup) Tasks(cgroups.Name, bool) ([]cgroups.Task, error) { - return nil, nil -} - -func mockCgroupNew(hierarchy cgroups.Hierarchy, path cgroups.Path, resources *specs.LinuxResources, opts ...cgroups.InitOpts) (cgroups.Cgroup, error) { - return &mockCgroup{}, nil -} - -func mockCgroupLoad(hierarchy cgroups.Hierarchy, path cgroups.Path, opts ...cgroups.InitOpts) (cgroups.Cgroup, error) { - return &mockCgroup{}, nil -} - -func init() { - cgroupsNewFunc = mockCgroupNew - cgroupsLoadFunc = mockCgroupLoad -} - -func TestV1Constraints(t *testing.T) { - assert := assert.New(t) - - systems, err := V1Constraints() - assert.NoError(err) - assert.NotEmpty(systems) -} - -func TestV1NoConstraints(t *testing.T) { - assert := assert.New(t) - - systems, err := V1NoConstraints() - assert.NoError(err) - assert.NotEmpty(systems) -} - -func TestCgroupNoConstraintsPath(t *testing.T) { - assert := assert.New(t) - - cgrouPath := "abc" - expectedPath := filepath.Join(cgroupKataPath, cgrouPath) - path := cgroupNoConstraintsPath(cgrouPath) - assert.Equal(expectedPath, path) -} - -func TestUpdateCgroups(t *testing.T) { - assert := assert.New(t) - - oldCgroupsNew := cgroupsNewFunc - oldCgroupsLoad := cgroupsLoadFunc - cgroupsNewFunc = cgroups.New - cgroupsLoadFunc = cgroups.Load - defer func() { - cgroupsNewFunc = oldCgroupsNew - cgroupsLoadFunc = oldCgroupsLoad - }() - - s := &Sandbox{ - state: types.SandboxState{ - CgroupPath: "", - }, - config: &SandboxConfig{SandboxCgroupOnly: false}, - } - - ctx := context.Background() - - // empty path - err := s.cgroupsUpdate(ctx) - assert.NoError(err) - - // path doesn't exist - s.state.CgroupPath = "/abc/123/rgb" - err = s.cgroupsUpdate(ctx) - assert.Error(err) - - if os.Getuid() != 0 { - return - } - - s.state.CgroupPath = fmt.Sprintf("/kata-tests-%d", os.Getpid()) - testCgroup, err := cgroups.New(cgroups.V1, cgroups.StaticPath(s.state.CgroupPath), &specs.LinuxResources{}) - assert.NoError(err) - defer testCgroup.Delete() - s.hypervisor = &mockHypervisor{mockPid: 0} - - // bad pid - err = s.cgroupsUpdate(ctx) - assert.Error(err) - - // fake workload - cmd := exec.Command("tail", "-f", "/dev/null") - assert.NoError(cmd.Start()) - s.hypervisor = &mockHypervisor{mockPid: cmd.Process.Pid} - - // no containers - err = s.cgroupsUpdate(ctx) - assert.NoError(err) - - s.config = &SandboxConfig{} - s.config.HypervisorConfig.NumVCPUs = 1 - - s.containers = map[string]*Container{ - "abc": { - process: Process{ - Pid: cmd.Process.Pid, - }, - config: &ContainerConfig{ - Annotations: containerAnnotations, - CustomSpec: newEmptySpec(), - }, - }, - "xyz": { - process: Process{ - Pid: cmd.Process.Pid, - }, - config: &ContainerConfig{ - Annotations: containerAnnotations, - CustomSpec: newEmptySpec(), - }, - }, - } - - err = s.cgroupsUpdate(context.Background()) - assert.NoError(err) - - // cleanup - assert.NoError(cmd.Process.Kill()) - err = s.cgroupsDelete() - assert.NoError(err) -} diff --git a/src/runtime/virtcontainers/persist.go b/src/runtime/virtcontainers/persist.go index 203495e823..f72de590fe 100644 --- a/src/runtime/virtcontainers/persist.go +++ b/src/runtime/virtcontainers/persist.go @@ -33,8 +33,8 @@ func (s *Sandbox) dumpState(ss *persistapi.SandboxState, cs map[string]persistap ss.GuestMemoryBlockSizeMB = s.state.GuestMemoryBlockSizeMB ss.GuestMemoryHotplugProbe = s.state.GuestMemoryHotplugProbe ss.State = string(s.state.State) - ss.CgroupPath = s.state.CgroupPath - ss.CgroupPaths = s.state.CgroupPaths + ss.SandboxCgroupPath = s.state.SandboxCgroupPath + ss.OverheadCgroupPath = s.state.OverheadCgroupPath for id, cont := range s.containers { state := persistapi.ContainerState{} @@ -188,7 +188,6 @@ func (s *Sandbox) dumpConfig(ss *persistapi.SandboxState) { SystemdCgroup: sconfig.SystemdCgroup, SandboxCgroupOnly: sconfig.SandboxCgroupOnly, DisableGuestSeccomp: sconfig.DisableGuestSeccomp, - Cgroups: sconfig.Cgroups, } ss.Config.SandboxBindMounts = append(ss.Config.SandboxBindMounts, sconfig.SandboxBindMounts...) @@ -302,8 +301,8 @@ func (s *Sandbox) loadState(ss persistapi.SandboxState) { s.state.GuestMemoryBlockSizeMB = ss.GuestMemoryBlockSizeMB s.state.BlockIndexMap = ss.HypervisorState.BlockIndexMap s.state.State = types.StateString(ss.State) - s.state.CgroupPath = ss.CgroupPath - s.state.CgroupPaths = ss.CgroupPaths + s.state.SandboxCgroupPath = ss.SandboxCgroupPath + s.state.OverheadCgroupPath = ss.OverheadCgroupPath s.state.GuestMemoryHotplugProbe = ss.GuestMemoryHotplugProbe } @@ -459,7 +458,6 @@ func loadSandboxConfig(id string) (*SandboxConfig, error) { SystemdCgroup: savedConf.SystemdCgroup, SandboxCgroupOnly: savedConf.SandboxCgroupOnly, DisableGuestSeccomp: savedConf.DisableGuestSeccomp, - Cgroups: savedConf.Cgroups, } sconfig.SandboxBindMounts = append(sconfig.SandboxBindMounts, savedConf.SandboxBindMounts...) diff --git a/src/runtime/virtcontainers/persist/api/sandbox.go b/src/runtime/virtcontainers/persist/api/sandbox.go index 61b4afe88e..1398cc20f4 100644 --- a/src/runtime/virtcontainers/persist/api/sandbox.go +++ b/src/runtime/virtcontainers/persist/api/sandbox.go @@ -30,9 +30,12 @@ type SandboxState struct { // SandboxContainer specifies which container is used to start the sandbox/vm SandboxContainer string - // CgroupPath is the cgroup hierarchy where sandbox's processes - // including the hypervisor are placed. - CgroupPath string + // SandboxCgroupPath is the sandbox cgroup path + SandboxCgroupPath string + + // OverheadCgroupPath is the sandbox overhead cgroup path. + // It can be an empty string if sandbox_cgroup_only is set. + OverheadCgroupPath string // HypervisorState saves hypervisor specific data HypervisorState HypervisorState diff --git a/src/runtime/virtcontainers/pkg/cgroups/cgroups.go b/src/runtime/virtcontainers/pkg/cgroups/cgroups.go index a138be092f..7d2681b962 100644 --- a/src/runtime/virtcontainers/pkg/cgroups/cgroups.go +++ b/src/runtime/virtcontainers/pkg/cgroups/cgroups.go @@ -26,6 +26,17 @@ type Cgroup struct { sync.Mutex } +var ( + cgroupsLogger = logrus.WithField("source", "virtcontainers/pkg/cgroups") +) + +// SetLogger sets up a logger for this pkg +func SetLogger(logger *logrus.Entry) { + fields := cgroupsLogger.Data + + cgroupsLogger = logger.WithFields(fields) +} + func deviceToDeviceCgroup(device string) (*specs.LinuxDeviceCgroup, error) { var st unix.Stat_t diff --git a/src/runtime/virtcontainers/pkg/cgroups/manager.go b/src/runtime/virtcontainers/pkg/cgroups/manager.go deleted file mode 100644 index dad8bd4237..0000000000 --- a/src/runtime/virtcontainers/pkg/cgroups/manager.go +++ /dev/null @@ -1,355 +0,0 @@ -// Copyright (c) 2020 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -// - -package cgroups - -import ( - "bufio" - "context" - "errors" - "fmt" - "io/ioutil" - "os" - "path/filepath" - "strconv" - "strings" - "sync" - - "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless" - "github.com/opencontainers/runc/libcontainer" - libcontcgroups "github.com/opencontainers/runc/libcontainer/cgroups" - libcontcgroupsfs "github.com/opencontainers/runc/libcontainer/cgroups/fs" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/specconv" - "github.com/opencontainers/runtime-spec/specs-go" - "github.com/sirupsen/logrus" -) - -type Config struct { - // Cgroups specifies specific cgroup settings for the various subsystems that the container is - // placed into to limit the resources the container has available - // If nil, New() will create one. - Cgroups *configs.Cgroup - - // CgroupPaths contains paths to all the cgroups setup for a container. Key is cgroup subsystem name - // with the value as the path. - CgroupPaths map[string]string - - // Resources represents the runtime resource constraints - Resources specs.LinuxResources - - // CgroupPath is the OCI spec cgroup path - CgroupPath string -} - -type Manager struct { - mgr libcontcgroups.Manager - sync.Mutex -} - -const ( - // file in the cgroup that contains the pids - cgroupProcs = "cgroup.procs" -) - -var ( - cgroupsLogger = logrus.WithField("source", "virtcontainers/pkg/cgroups") -) - -// SetLogger sets up a logger for this pkg -func SetLogger(logger *logrus.Entry) { - fields := cgroupsLogger.Data - - cgroupsLogger = logger.WithFields(fields) -} - -// returns the list of devices that a hypervisor may need -func hypervisorDevices() []specs.LinuxDeviceCgroup { - devices := []specs.LinuxDeviceCgroup{} - - // Processes running in a device-cgroup are constrained, they have acccess - // only to the devices listed in the devices.list file. - // In order to run Virtual Machines and create virtqueues, hypervisors - // need access to certain character devices in the host, like kvm and vhost-net. - hypervisorDevices := []string{ - "/dev/kvm", // To run virtual machines - "/dev/vhost-net", // To create virtqueues - "/dev/vfio/vfio", // To access VFIO devices - } - - for _, device := range hypervisorDevices { - ldevice, err := DeviceToLinuxDevice(device) - if err != nil { - cgroupsLogger.WithError(err).Warnf("Could not get device information") - continue - } - devices = append(devices, ldevice) - } - - return devices -} - -// New creates a new CgroupManager -func New(config *Config) (*Manager, error) { - var err error - - devices := config.Resources.Devices - devices = append(devices, hypervisorDevices()...) - // Do not modify original devices - config.Resources.Devices = devices - - newSpec := specs.Spec{ - Linux: &specs.Linux{ - Resources: &config.Resources, - }, - } - - rootless := rootless.IsRootless() - - cgroups := config.Cgroups - cgroupPaths := config.CgroupPaths - - // determine if we are utilizing systemd managed cgroups based on the path provided - useSystemdCgroup := IsSystemdCgroup(config.CgroupPath) - - // Create a new cgroup if the current one is nil - // this cgroups must be saved later - if cgroups == nil { - if config.CgroupPath == "" && !rootless { - cgroupsLogger.Warn("cgroups have not been created and cgroup path is empty") - } - - newSpec.Linux.CgroupsPath, err = ValidCgroupPath(config.CgroupPath, useSystemdCgroup) - if err != nil { - return nil, fmt.Errorf("Invalid cgroup path: %v", err) - } - - if cgroups, err = specconv.CreateCgroupConfig(&specconv.CreateOpts{ - // cgroup name is taken from spec - CgroupName: "", - UseSystemdCgroup: useSystemdCgroup, - Spec: &newSpec, - RootlessCgroups: rootless, - }, nil); err != nil { - return nil, fmt.Errorf("Could not create cgroup config: %v", err) - } - } - - // Set cgroupPaths to nil when the map is empty, it can and will be - // populated by `Manager.Apply()` when the runtime or any other process - // is moved to the cgroup. - if len(cgroupPaths) == 0 { - cgroupPaths = nil - } - - if useSystemdCgroup { - factory, err := libcontainer.New("") - if err != nil { - return nil, fmt.Errorf("Could not create linux factory for systemd cgroup manager: %v", err) - } - lfactory, ok := factory.(*libcontainer.LinuxFactory) - if !ok { - return nil, errors.New("expected linux factory returned on linux based systems") - } - - err = libcontainer.SystemdCgroups(lfactory) - - if err != nil { - return nil, fmt.Errorf("Could not create systemd cgroup manager: %v", err) - } - - return &Manager{ - mgr: lfactory.NewCgroupsManager(cgroups, cgroupPaths), - }, nil - } - - return &Manager{ - mgr: libcontcgroupsfs.NewManager(cgroups, cgroupPaths, rootless), - }, nil -} - -// read all the pids in cgroupPath -func readPids(cgroupPath string) ([]int, error) { - pids := []int{} - f, err := os.Open(filepath.Join(cgroupPath, cgroupProcs)) - if err != nil { - return nil, err - } - defer f.Close() - buf := bufio.NewScanner(f) - - for buf.Scan() { - if t := buf.Text(); t != "" { - pid, err := strconv.Atoi(t) - if err != nil { - return nil, err - } - pids = append(pids, pid) - } - } - return pids, nil -} - -// write the pids into cgroup.procs -func writePids(pids []int, cgroupPath string) error { - cgroupProcsPath := filepath.Join(cgroupPath, cgroupProcs) - for _, pid := range pids { - if err := ioutil.WriteFile(cgroupProcsPath, - []byte(strconv.Itoa(pid)), - os.FileMode(0), - ); err != nil { - return err - } - } - return nil -} - -func (m *Manager) logger() *logrus.Entry { - return cgroupsLogger.WithField("source", "cgroup-manager") -} - -// move all the processes in the current cgroup to the parent -func (m *Manager) moveToParent() error { - m.Lock() - defer m.Unlock() - for _, cgroupPath := range m.mgr.GetPaths() { - - pids, err := readPids(cgroupPath) - // possible that the cgroupPath doesn't exist. If so, skip: - if os.IsNotExist(err) { - // The cgroup is not present on the filesystem: no pids to move. The systemd cgroup - // manager lists all of the subsystems, including those that are not actually being managed. - continue - } - if err != nil { - return err - } - - if len(pids) == 0 { - // no pids in this cgroup - continue - } - - cgroupParentPath := filepath.Dir(filepath.Clean(cgroupPath)) - if err = writePids(pids, cgroupParentPath); err != nil { - if !strings.Contains(err.Error(), "no such process") { - return err - } - } - } - return nil -} - -// Add pid to cgroups -func (m *Manager) Add(pid int) error { - if rootless.IsRootless() { - m.logger().Debug("Unable to setup add pids to cgroup: running rootless") - return nil - } - - m.Lock() - defer m.Unlock() - return m.mgr.Apply(pid) -} - -// Apply constraints -func (m *Manager) Apply() error { - if rootless.IsRootless() { - m.logger().Debug("Unable to apply constraints: running rootless") - return nil - } - - cgroups, err := m.GetCgroups() - if err != nil { - return err - } - - m.Lock() - defer m.Unlock() - return m.mgr.Set(cgroups.Resources) -} - -func (m *Manager) GetCgroups() (*configs.Cgroup, error) { - m.Lock() - defer m.Unlock() - return m.mgr.GetCgroups() -} - -func (m *Manager) GetPaths() map[string]string { - m.Lock() - defer m.Unlock() - return m.mgr.GetPaths() -} - -func (m *Manager) Destroy() error { - // cgroup can't be destroyed if it contains running processes - if err := m.moveToParent(); err != nil { - // If the process migration to the parent cgroup fails, then - // we expect the Destroy to fail as well. Let's log an error here - // and attempt to execute the Destroy still to help cleanup the hosts' FS. - m.logger().WithError(err).Error("Could not move processes into parent cgroup") - } - - m.Lock() - defer m.Unlock() - return m.mgr.Destroy() -} - -// AddDevice adds a device to the device cgroup -func (m *Manager) AddDevice(ctx context.Context, device string) error { - cgroups, err := m.GetCgroups() - if err != nil { - return err - } - - ld, err := DeviceToCgroupDeviceRule(device) - if err != nil { - return err - } - - m.Lock() - cgroups.Devices = append(cgroups.Devices, ld) - m.Unlock() - - return m.Apply() -} - -// RemoceDevice removed a device from the device cgroup -func (m *Manager) RemoveDevice(device string) error { - cgroups, err := m.GetCgroups() - if err != nil { - return err - } - - ld, err := DeviceToCgroupDeviceRule(device) - if err != nil { - return err - } - - m.Lock() - for i, d := range cgroups.Devices { - if d.Major == ld.Major && d.Minor == ld.Minor { - cgroups.Devices = append(cgroups.Devices[:i], cgroups.Devices[i+1:]...) - m.Unlock() - return m.Apply() - } - } - m.Unlock() - return fmt.Errorf("device %v not found in the cgroup", device) -} - -func (m *Manager) SetCPUSet(cpuset, memset string) error { - cgroups, err := m.GetCgroups() - if err != nil { - return err - } - - m.Lock() - cgroups.CpusetCpus = cpuset - cgroups.CpusetMems = memset - m.Unlock() - - return m.Apply() -} diff --git a/src/runtime/virtcontainers/pkg/cgroups/manager_test.go b/src/runtime/virtcontainers/pkg/cgroups/manager_test.go deleted file mode 100644 index 1e868cf692..0000000000 --- a/src/runtime/virtcontainers/pkg/cgroups/manager_test.go +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright (c) 2020 Intel Corporation -// -// SPDX-License-Identifier: Apache-2.0 -// - -package cgroups - -import ( - "testing" - - "github.com/stretchr/testify/assert" -) - -//very very basic test; should be expanded -func TestNew(t *testing.T) { - assert := assert.New(t) - - // create a cgroupfs cgroup manager - c := &Config{ - Cgroups: nil, - CgroupPath: "", - } - - mgr, err := New(c) - assert.NoError(err) - assert.NotNil(mgr.mgr) - - // create a systemd cgroup manager - s := &Config{ - Cgroups: nil, - CgroupPath: "system.slice:kubepod:container", - } - - mgr, err = New(s) - assert.NoError(err) - assert.NotNil(mgr.mgr) - -} diff --git a/src/runtime/virtcontainers/pkg/cgroups/utils.go b/src/runtime/virtcontainers/pkg/cgroups/utils.go index f5540f1730..2915c471c5 100644 --- a/src/runtime/virtcontainers/pkg/cgroups/utils.go +++ b/src/runtime/virtcontainers/pkg/cgroups/utils.go @@ -49,13 +49,13 @@ func ValidCgroupPath(path string, systemdCgroup bool) (string, error) { // In the case of an absolute path (starting with /), the runtime MUST // take the path to be relative to the cgroups mount point. if filepath.IsAbs(path) { - return RenameCgroupPath(filepath.Clean(path)) + return filepath.Clean(path), nil } // In the case of a relative path (not starting with /), the runtime MAY // interpret the path relative to a runtime-determined location in the cgroups hierarchy. // clean up path and return a new path relative to DefaultCgroupPath - return RenameCgroupPath(filepath.Join(DefaultCgroupPath, filepath.Clean("/"+path))) + return filepath.Join(DefaultCgroupPath, filepath.Clean("/"+path)), nil } func IsSystemdCgroup(cgroupPath string) bool { diff --git a/src/runtime/virtcontainers/pkg/cgroups/utils_test.go b/src/runtime/virtcontainers/pkg/cgroups/utils_test.go index 27c7aa709a..f02623d908 100644 --- a/src/runtime/virtcontainers/pkg/cgroups/utils_test.go +++ b/src/runtime/virtcontainers/pkg/cgroups/utils_test.go @@ -62,6 +62,8 @@ func TestValidCgroupPath(t *testing.T) { {"/../hi", false, false}, {"/../hi/foo", false, false}, {"o / m /../ g", false, false}, + {"/overhead/foobar", false, false}, + {"/sys/fs/cgroup/cpu/sandbox/kata_foobar", false, false}, // invalid systemd paths {"o / m /../ g", true, true}, @@ -93,13 +95,13 @@ func TestValidCgroupPath(t *testing.T) { if filepath.IsAbs(t.path) { cleanPath := filepath.Dir(filepath.Clean(t.path)) assert.True(strings.HasPrefix(path, cleanPath), - "%v should have prefix %v", cleanPath) + "%v should have prefix %v", path, cleanPath) } else if t.systemdCgroup { assert.Equal(t.path, path) } else { - assert.True(strings.HasPrefix(path, "/"+CgroupKataPrefix) || + assert.True( strings.HasPrefix(path, DefaultCgroupPath), - "%v should have prefix /%v or %v", path, CgroupKataPrefix, DefaultCgroupPath) + "%v should have prefix /%v", path, DefaultCgroupPath) } } diff --git a/src/runtime/virtcontainers/sandbox.go b/src/runtime/virtcontainers/sandbox.go index de113a699b..4812d5a51a 100644 --- a/src/runtime/virtcontainers/sandbox.go +++ b/src/runtime/virtcontainers/sandbox.go @@ -20,9 +20,7 @@ import ( "sync" "syscall" - "github.com/containerd/cgroups" "github.com/containernetworking/plugins/pkg/ns" - "github.com/opencontainers/runc/libcontainer/configs" specs "github.com/opencontainers/runtime-spec/specs-go" "github.com/pkg/errors" "github.com/sirupsen/logrus" @@ -39,7 +37,7 @@ import ( pbTypes "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/agent/protocols/grpc" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" - vccgroups "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cgroups" + "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cgroups" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/compatoci" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/cpuset" "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/rootless" @@ -118,10 +116,6 @@ type SandboxConfig struct { // Experimental features enabled Experimental []exp.Feature - // Cgroups specifies specific cgroup settings for the various subsystems that the container is - // placed into to limit the resources the container has available - Cgroups *configs.Cgroup - // Annotations keys must be unique strings and must be name-spaced // with e.g. reverse domain notation (org.clearlinux.key). Annotations map[string]string @@ -188,8 +182,8 @@ type Sandbox struct { config *SandboxConfig annotationsLock *sync.RWMutex wg *sync.WaitGroup - sandboxCgroup *vccgroups.Manager - overheadCgroup *vccgroups.Manager + sandboxCgroup *cgroups.Cgroup + overheadCgroup *cgroups.Cgroup cw *consoleWatcher containers map[string]*Container @@ -592,6 +586,13 @@ func (s *Sandbox) createCgroups() error { // Kata relies on the cgroup parent created and configured by the container // engine by default. The exception is for devices whitelist as well as sandbox-level // CPUSet. + // For the sandbox cgroups we create and manage, rename the base of the cgroup path to + // include "kata_" + cgroupPath, err = cgroups.RenameCgroupPath(cgroupPath) + if err != nil { + return err + } + if spec.Linux.Resources != nil { resources.Devices = spec.Linux.Resources.Devices @@ -637,7 +638,7 @@ func (s *Sandbox) createCgroups() error { if s.devManager != nil { for _, d := range s.devManager.GetAllDevices() { - dev, err := vccgroups.DeviceToLinuxDevice(d.GetHostPath()) + dev, err := cgroups.DeviceToLinuxDevice(d.GetHostPath()) if err != nil { s.Logger().WithError(err).WithField("device", d.GetHostPath()).Warn("Could not add device to sandbox resources") continue @@ -650,22 +651,14 @@ func (s *Sandbox) createCgroups() error { // Depending on the SandboxCgroupOnly value, this cgroup // will either hold all the pod threads (SandboxCgroupOnly is true) // or only the virtual CPU ones (SandboxCgroupOnly is false). - if s.sandboxCgroup, err = vccgroups.New( - &vccgroups.Config{ - Cgroups: s.config.Cgroups, - CgroupPaths: s.state.CgroupPaths, - Resources: resources, - CgroupPath: cgroupPath, - }, - ); err != nil { - return err + s.sandboxCgroup, err = cgroups.NewSandboxCgroup(cgroupPath, &resources) + if err != nil { + return fmt.Errorf("Could not create the sandbox cgroup %v", err) } - // Now that the sandbox cgroup is created, we can set the state cgroup root path. - s.state.CgroupPath, err = vccgroups.ValidCgroupPath(cgroupPath, s.config.SystemdCgroup) - if err != nil { - return fmt.Errorf("Invalid cgroup path: %v", err) - } + // Now that the sandbox cgroup is created, we can set the state cgroup root paths. + s.state.SandboxCgroupPath = s.sandboxCgroup.Path() + s.state.OverheadCgroupPath = "" if s.config.SandboxCgroupOnly { s.overheadCgroup = nil @@ -674,16 +667,12 @@ func (s *Sandbox) createCgroups() error { // into the sandbox cgroup. // We're creating an overhead cgroup, with no constraints. Everything but // the vCPU threads will eventually make it there. - if s.overheadCgroup, err = vccgroups.New( - &vccgroups.Config{ - Cgroups: nil, - CgroupPaths: nil, - Resources: specs.LinuxResources{}, - CgroupPath: cgroupKataOverheadPath, - }, - ); err != nil { + overheadCgroup, err := cgroups.NewCgroup(fmt.Sprintf("/%s/%s", cgroupKataOverheadPath, s.id), &specs.LinuxResources{}) + if err != nil { return err } + s.overheadCgroup = overheadCgroup + s.state.OverheadCgroupPath = s.overheadCgroup.Path() } return nil @@ -1540,33 +1529,15 @@ func (s *Sandbox) StatsContainer(ctx context.Context, containerID string) (Conta // Stats returns the stats of a running sandbox func (s *Sandbox) Stats(ctx context.Context) (SandboxStats, error) { - if s.state.CgroupPath == "" { - return SandboxStats{}, fmt.Errorf("sandbox cgroup path is empty") - } - var path string - var cgroupSubsystems cgroups.Hierarchy - - if s.config.SandboxCgroupOnly { - cgroupSubsystems = cgroups.V1 - path = s.state.CgroupPath - } else { - cgroupSubsystems = V1NoConstraints - path = cgroupNoConstraintsPath(s.state.CgroupPath) - } - - cgroup, err := cgroupsLoadFunc(cgroupSubsystems, cgroups.StaticPath(path)) - if err != nil { - return SandboxStats{}, fmt.Errorf("Could not load sandbox cgroup in %v: %v", s.state.CgroupPath, err) - } - - metrics, err := cgroup.Stat(cgroups.ErrorHandler(cgroups.IgnoreNotExist)) + metrics, err := s.sandboxCgroup.Stat() if err != nil { return SandboxStats{}, err } stats := SandboxStats{} + // TODO Do we want to aggregate the overhead cgroup stats to the sandbox ones? stats.CgroupStats.CPUStats.CPUUsage.TotalUsage = metrics.CPU.Usage.Total stats.CgroupStats.MemoryStats.Usage.Usage = metrics.Memory.Usage.Usage tids, err := s.hypervisor.getThreadIDs(ctx) @@ -1796,15 +1767,9 @@ func (s *Sandbox) HotplugAddDevice(ctx context.Context, device api.Device, devTy span, ctx := katatrace.Trace(ctx, s.Logger(), "HotplugAddDevice", sandboxTracingTags, map[string]string{"sandbox_id": s.id}) defer span.End() - if s.config.SandboxCgroupOnly { - // We are about to add a device to the hypervisor, - // the device cgroup MUST be updated since the hypervisor - // will need access to such device - hdev := device.GetHostPath() - if err := s.sandboxCgroup.AddDevice(ctx, hdev); err != nil { - s.Logger().WithError(err).WithField("device", hdev). - Warn("Could not add device to cgroup") - } + if err := s.sandboxCgroup.AddDevice(device.GetHostPath()); err != nil { + s.Logger().WithError(err).WithField("device", device). + Warn("Could not add device to cgroup") } switch devType { @@ -1852,14 +1817,9 @@ func (s *Sandbox) HotplugAddDevice(ctx context.Context, device api.Device, devTy // Sandbox implement DeviceReceiver interface from device/api/interface.go func (s *Sandbox) HotplugRemoveDevice(ctx context.Context, device api.Device, devType config.DeviceType) error { defer func() { - if s.config.SandboxCgroupOnly { - // Remove device from cgroup, the hypervisor - // should not have access to such device anymore. - hdev := device.GetHostPath() - if err := s.sandboxCgroup.RemoveDevice(hdev); err != nil { - s.Logger().WithError(err).WithField("device", hdev). - Warn("Could not remove device from cgroup") - } + if err := s.sandboxCgroup.RemoveDevice(device.GetHostPath()); err != nil { + s.Logger().WithError(err).WithField("device", device). + Warn("Could not add device to cgroup") } }() @@ -2140,7 +2100,7 @@ func (s *Sandbox) cgroupsUpdate(ctx context.Context) error { } // We update the sandbox cgroup with potentially new virtual CPUs. - if err := s.sandboxCgroup.SetCPUSet(cpuset, memset); err != nil { + if err := s.sandboxCgroup.UpdateCpuSet(cpuset, memset); err != nil { return err } @@ -2160,21 +2120,39 @@ func (s *Sandbox) cgroupsUpdate(ctx context.Context) error { // to the parent and then delete the sandbox cgroup func (s *Sandbox) cgroupsDelete() error { s.Logger().Debug("Deleting sandbox cgroup") - if s.state.CgroupPath == "" { - s.Logger().Warnf("sandbox cgroups path is empty") + if s.state.SandboxCgroupPath == "" { + s.Logger().Warnf("sandbox cgroup path is empty") return nil } - if s.overheadCgroup != nil { - if err := s.overheadCgroup.Destroy(); err != nil { - return err - } + sandboxCgroup, err := cgroups.Load(s.state.SandboxCgroupPath) + if err != nil { + return err } - if err := s.sandboxCgroup.Destroy(); err != nil { + if err := sandboxCgroup.MoveToParent(); err != nil { return err } + if err := sandboxCgroup.Delete(); err != nil { + return err + } + + if s.state.OverheadCgroupPath != "" { + overheadCgroup, err := cgroups.Load(s.state.OverheadCgroupPath) + if err != nil { + return err + } + + if err := s.overheadCgroup.MoveToParent(); err != nil { + return err + } + + if err := overheadCgroup.Delete(); err != nil { + return err + } + } + return nil } @@ -2187,7 +2165,7 @@ func (s *Sandbox) constrainHypervisor(ctx context.Context) error { // All vCPU threads move to the sandbox cgroup. for _, i := range tids.vcpus { - if err := s.sandboxCgroup.Add(i); err != nil { + if err := s.sandboxCgroup.AddTask(i); err != nil { return err } } @@ -2198,8 +2176,6 @@ func (s *Sandbox) constrainHypervisor(ctx context.Context) error { // setupCgroups adds the runtime process to either the sandbox cgroup or the overhead one, // depending on the sandbox_cgroup_only configuration setting. func (s *Sandbox) setupCgroups() error { - var err error - vmmCgroup := s.sandboxCgroup if s.overheadCgroup != nil { vmmCgroup = s.overheadCgroup @@ -2211,27 +2187,8 @@ func (s *Sandbox) setupCgroups() error { // then move the vCPU threads between cgroups. runtimePid := os.Getpid() // Add the runtime to the VMM sandbox cgroup - if err := vmmCgroup.Add(runtimePid); err != nil { - return fmt.Errorf("Could not add runtime PID %d to sandbox cgroup: %v", runtimePid, err) - } - - // `Apply` updates the sandbox cgroup Cgroups and CgroupPaths, - // they both need to be saved since they are used to create - // or restore the sandbox cgroup. - if s.config.Cgroups, err = vmmCgroup.GetCgroups(); err != nil { - return fmt.Errorf("Could not get cgroup configuration: %v", err) - } - - s.state.CgroupPaths = vmmCgroup.GetPaths() - - if err := s.sandboxCgroup.Apply(); err != nil { - return fmt.Errorf("Could not constrain cgroup: %v", err) - } - - if s.overheadCgroup != nil { - if err = s.overheadCgroup.Apply(); err != nil { - return fmt.Errorf("Could not constrain cgroup: %v", err) - } + if err := vmmCgroup.AddProcess(runtimePid); err != nil { + return fmt.Errorf("Could not add runtime PID %d to sandbox cgroup: %v", runtimePid, err) } return nil diff --git a/src/runtime/virtcontainers/sandbox_test.go b/src/runtime/virtcontainers/sandbox_test.go index a39f241f4f..b445d0893b 100644 --- a/src/runtime/virtcontainers/sandbox_test.go +++ b/src/runtime/virtcontainers/sandbox_test.go @@ -52,6 +52,10 @@ func testCreateSandbox(t *testing.T, id string, nconfig NetworkConfig, containers []ContainerConfig, volumes []types.Volume) (*Sandbox, error) { + if tc.NotValid(ktu.NeedRoot()) { + t.Skip(testDisabledAsNonRoot) + } + sconfig := SandboxConfig{ ID: id, HypervisorType: htype, @@ -963,22 +967,6 @@ func TestEnterContainer(t *testing.T) { assert.Nil(t, err, "Enter container failed: %v", err) } -func TestDeleteStoreWhenCreateContainerFail(t *testing.T) { - hypervisorConfig := newHypervisorConfig(nil, nil) - s, err := testCreateSandbox(t, testSandboxID, MockHypervisor, hypervisorConfig, NetworkConfig{}, nil, nil) - if err != nil { - t.Fatal(err) - } - defer cleanUp() - - contID := "999" - contConfig := newTestContainerConfigNoop(contID) - contConfig.RootFs = RootFs{Target: "", Mounted: true} - s.state.CgroupPath = filepath.Join(testDir, "bad-cgroup") - _, err = s.CreateContainer(context.Background(), contConfig) - assert.NotNil(t, err, "Should fail to create container due to wrong cgroup") -} - func TestDeleteStoreWhenNewContainerFail(t *testing.T) { hConfig := newHypervisorConfig(nil, nil) p, err := testCreateSandbox(t, testSandboxID, MockHypervisor, hConfig, NetworkConfig{}, nil, nil) @@ -1451,7 +1439,7 @@ func TestSandboxExperimentalFeature(t *testing.T) { assert.True(t, sconfig.valid()) } -func TestSandbox_SetupSandboxCgroup(t *testing.T) { +func TestSandbox_Cgroups(t *testing.T) { sandboxContainer := ContainerConfig{} sandboxContainer.Annotations = make(map[string]string) sandboxContainer.Annotations[annotations.ContainerTypeKey] = string(PodSandbox) @@ -1486,8 +1474,8 @@ func TestSandbox_SetupSandboxCgroup(t *testing.T) { { "New sandbox, new config", &Sandbox{config: &SandboxConfig{}}, - true, false, + true, }, { "sandbox, container no sandbox type", @@ -1495,8 +1483,8 @@ func TestSandbox_SetupSandboxCgroup(t *testing.T) { config: &SandboxConfig{Containers: []ContainerConfig{ {}, }}}, - true, false, + true, }, { "sandbox, container sandbox type", @@ -1504,8 +1492,8 @@ func TestSandbox_SetupSandboxCgroup(t *testing.T) { config: &SandboxConfig{Containers: []ContainerConfig{ sandboxContainer, }}}, - true, false, + true, }, { "sandbox, empty linux json", @@ -1532,9 +1520,16 @@ func TestSandbox_SetupSandboxCgroup(t *testing.T) { } t.Run(tt.name, func(t *testing.T) { - tt.s.createCgroupManager() - if err := tt.s.setupSandboxCgroup(); (err != nil) != tt.wantErr { - t.Errorf("Sandbox.SetupSandboxCgroupOnly() error = %v, wantErr %v", err, tt.wantErr) + err := tt.s.createCgroups() + t.Logf("create groups error %v", err) + if (err != nil) != tt.wantErr { + t.Errorf("Sandbox.CreateCgroups() error = %v, wantErr %v", err, tt.wantErr) + } + + if err == nil { + if err := tt.s.setupCgroups(); (err != nil) != tt.wantErr { + t.Errorf("Sandbox.SetupCgroups() error = %v, wantErr %v", err, tt.wantErr) + } } }) } diff --git a/src/runtime/virtcontainers/types/sandbox.go b/src/runtime/virtcontainers/types/sandbox.go index 902017a6fb..f4fc3e503f 100644 --- a/src/runtime/virtcontainers/types/sandbox.go +++ b/src/runtime/virtcontainers/types/sandbox.go @@ -50,9 +50,15 @@ type SandboxState struct { State StateString `json:"state"` - // CgroupPath is the cgroup hierarchy where sandbox's processes - // including the hypervisor are placed. - CgroupPath string `json:"cgroupPath,omitempty"` + // SandboxCgroupPath is the cgroup path for all the sandbox processes, + // when sandbox_cgroup_only is set. When it's not set, part of those + // processes will be living under the overhead cgroup. + SandboxCgroupPath string `json:"sandboxCgroupPath,omitempty"` + + // OverheadCgroupPath is the path to the optional overhead cgroup + // path holding processes that should not be part of the sandbox + // cgroup. + OverheadCgroupPath string `json:"overheadCgroupPath,omitempty"` // PersistVersion indicates current storage api version. // It's also known as ABI version of kata-runtime. From 8d9d6e6af053ba089691be3b4060b21462b49c8e Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Mon, 19 Jul 2021 20:55:28 +0200 Subject: [PATCH 6/7] docs: Host cgroups documentation update Update according to the new sandbox/overhead cgroup split. Signed-off-by: Samuel Ortiz --- docs/design/host-cgroups.md | 326 +++++++++++++++++++++--------------- 1 file changed, 187 insertions(+), 139 deletions(-) diff --git a/docs/design/host-cgroups.md b/docs/design/host-cgroups.md index 5bec21f0bb..cabdfe47a2 100644 --- a/docs/design/host-cgroups.md +++ b/docs/design/host-cgroups.md @@ -12,187 +12,244 @@ The OCI [runtime specification][linux-config] provides guidance on where the con > [`cgroupsPath`][cgroupspath]: (string, OPTIONAL) path to the cgroups. It can be used to either control the cgroups > hierarchy for containers or to run a new process in an existing container -cgroups are hierarchical, and this can be seen with the following pod example: +Cgroups are hierarchical, and this can be seen with the following pod example: - Pod 1: `cgroupsPath=/kubepods/pod1` - - Container 1: -`cgroupsPath=/kubepods/pod1/container1` - - Container 2: -`cgroupsPath=/kubepods/pod1/container2` + - Container 1: `cgroupsPath=/kubepods/pod1/container1` + - Container 2: `cgroupsPath=/kubepods/pod1/container2` - Pod 2: `cgroupsPath=/kubepods/pod2` - - Container 1: -`cgroupsPath=/kubepods/pod2/container2` - - Container 2: -`cgroupsPath=/kubepods/pod2/container2` + - Container 1: `cgroupsPath=/kubepods/pod2/container2` + - Container 2: `cgroupsPath=/kubepods/pod2/container2` -Depending on the upper-level orchestrator, the cgroup under which the pod is placed is -managed by the orchestrator. In the case of Kubernetes, the pod-cgroup is created by Kubelet, -while the container cgroups are to be handled by the runtime. Kubelet will size the pod-cgroup -based on the container resource requirements. +Depending on the upper-level orchestration layers, the cgroup under which the pod is placed is +managed by the orchestrator or not. In the case of Kubernetes, the pod cgroup is created by Kubelet, +while the container cgroups are to be handled by the runtime. +Kubelet will size the pod cgroup based on the container resource requirements, to which it may add +a configured set of [pod resource overheads](https://kubernetes.io/docs/concepts/scheduling-eviction/pod-overhead/). -Kata Containers introduces a non-negligible overhead for running a sandbox (pod). Based on this, two scenarios are possible: - 1) The upper-layer orchestrator takes the overhead of running a sandbox into account when sizing the pod-cgroup, or - 2) Kata Containers do not fully constrain the VMM and associated processes, instead placing a subset of them outside of the pod-cgroup. +Kata Containers introduces a non-negligible resource overhead for running a sandbox (pod). Typically, the Kata shim, +through its underlying VMM invocation, will create many additional threads compared to process based container runtimes: +the para-virtualized I/O back-ends, the VMM instance or even the Kata shim process, all of those host processes consume +memory and CPU time not directly tied to the container workload, and introduces a sandbox resource overhead. +In order for a Kata workload to run without significant performance degradation, its sandbox overhead must be +provisioned accordingly. Two scenarios are possible: -Kata Containers provides two options for how cgroups are handled on the host. Selection of these options is done through -the `SandboxCgroupOnly` flag within the Kata Containers [configuration](../../src/runtime/README.md#configuration) -file. + 1) The upper-layer orchestrator takes the overhead of running a sandbox into account when sizing the pod cgroup. + For example, Kubernetes [`PodOverhead`](https://kubernetes.io/docs/concepts/scheduling-eviction/pod-overhead/) + feature lets the orchestrator add a configured sandbox overhead to the sum of all its containers resources. In + that case, the pod sandbox is properly sized and all Kata created processes will run under the pod cgroup + defined constraints and limits. + 2) The upper-layer orchestrator does **not** take the sandbox overhead into account and the pod cgroup is not + sized to properly run all Kata created processes. With that scenario, attaching all the Kata processes to the sandbox + cgroup may lead to non-negligible workload performance degradations. As a consequence, Kata Containers will move + all processes but the vCPU threads into a dedicated overhead cgroup under `/kata_overhead`. The Kata runtime will + not apply any constraints or limits to that cgroup, it is up to the infrastructure owner to optionally set it up. -## `SandboxCgroupOnly` enabled +Those 2 scenarios are not dynamically detected by the Kata Containers runtime implementation, and thus the +infrastructure owner must configure the runtime according to how the upper-layer orchestrator creates and sizes the +pod cgroup. That configuration selection is done through the `sandbox_cgroup_only` flag within the Kata Containers +[configuration](../../src/runtime/README.md#configuration) file. -With `SandboxCgroupOnly` enabled, it is expected that the parent cgroup is sized to take the overhead of running -a sandbox into account. This is ideal, as all the applicable Kata Containers components can be placed within the -given cgroup-path. +## `sandbox_cgroup_only = true` -In the context of Kubernetes, Kubelet will size the pod-cgroup to take the overhead of running a Kata-based sandbox -into account. This will be feasible in the 1.16 Kubernetes release through the `PodOverhead` feature. +Setting `sandbox_cgroup_only` to `true` from the Kata Containers configuration file means that the pod cgroup is +properly sized and takes the pod overhead into account. This is ideal, as all the applicable Kata Containers processes +can simply be placed within the given cgroup path. + +In the context of Kubernetes, Kubelet can size the pod cgroup to take the overhead of running a Kata-based sandbox +into account. This has been supported since the 1.16 Kubernetes release, through the +[`PodOverhead`](https://kubernetes.io/docs/concepts/scheduling-eviction/pod-overhead/) feature. ``` -+----------------------------------------------------------+ -| +---------------------------------------------------+ | -| | +---------------------------------------------+ | | -| | | +--------------------------------------+ | | | -| | | | kata-shimv2, VMM and threads: | | | | -| | | | (VMM, IO-threads, vCPU threads, etc)| | | | -| | | | | | | | -| | | | kata_ | | | | -| | | +--------------------------------------+ | | | -| | | | | | -| | |Pod 1 | | | -| | +---------------------------------------------+ | | -| | | | -| | +---------------------------------------------+ | | -| | | +--------------------------------------+ | | | -| | | | kata-shimv2, VMM and threads: | | | | -| | | | (VMM, IO-threads, vCPU threads, etc)| | | | -| | | | | | | | -| | | | kata_ | | | | -| | | +--------------------------------------+ | | | -| | |Pod 2 | | | -| | +---------------------------------------------+ | | -| |kubepods | | -| +---------------------------------------------------+ | -| | -|Node | -+----------------------------------------------------------+ +┌─────────────────────────────────────────┐ +│ │ +│ ┌──────────────────────────────────┐ │ +│ │ │ │ +│ │ ┌─────────────────────────────┐ │ │ +│ │ │ │ │ │ +│ │ │ ┌─────────────────────┐ │ │ │ +│ │ │ │ vCPU threads │ │ │ │ +│ │ │ │ I/O threads │ │ │ │ +│ │ │ │ VMM │ │ │ │ +│ │ │ │ Kata Shim │ │ │ │ +│ │ │ │ │ │ │ │ +│ │ │ │ /kata_ │ │ │ │ +│ │ │ └─────────────────────┘ │ │ │ +│ │ │Pod 1 │ │ │ +│ │ └─────────────────────────────┘ │ │ +│ │ │ │ +│ │ ┌─────────────────────────────┐ │ │ +│ │ │ │ │ │ +│ │ │ ┌─────────────────────┐ │ │ │ +│ │ │ │ vCPU threads │ │ │ │ +│ │ │ │ I/O threads │ │ │ │ +│ │ │ │ VMM │ │ │ │ +│ │ │ │ Kata Shim │ │ │ │ +│ │ │ │ │ │ │ │ +│ │ │ │ /kata_ │ │ │ │ +│ │ │ └─────────────────────┘ │ │ │ +│ │ │Pod 2 │ │ │ +│ │ └─────────────────────────────┘ │ │ +│ │ │ │ +│ │/kubepods │ │ +│ └──────────────────────────────────┘ │ +│ │ +│ Node │ +└─────────────────────────────────────────┘ ``` -### What does Kata do in this configuration? -1. Given a `PodSandbox` container creation, let: +### Implementation details - ``` - podCgroup=Parent(container.CgroupsPath) - KataSandboxCgroup=/kata_ - ``` +When `sandbox_cgroup_only` is enabled, the Kata shim will create a per pod +sub-cgroup under the pod's dedicated cgroup. For example, in the Kubernetes context, +it will create a `/kata_` under the `/kubepods` cgroup hierarchy. +On a typical cgroup v1 hierarchy mounted under `/sys/fs/cgroup/`, the memory cgroup +subsystem for a pod with sandbox ID `12345678` would live under +`/sys/fs/cgroup/memory/kubepods/kata_12345678`. -2. Create the cgroup, `KataSandboxCgroup` +In most cases, the `/kata_` created cgroup is unrestricted and inherits and shares all +constraints and limits from the parent cgroup (`/kubepods` in the Kubernetes case). The exception is +for the `cpuset` and `devices` cgroup subsystems, which are managed by the Kata shim. -3. Join the `KataSandboxCgroup` +After creating the `/kata_` cgroup, the Kata Containers shim will move itself to it, **before** starting +the virtual machine. As a consequence all processes subsequently created by the Kata Containers shim (the VMM itself, and +all vCPU and I/O related threads) will be created in the `/kata_` cgroup. -Any process created by the runtime will be created in `KataSandboxCgroup`. -The runtime will limit the cgroup in the host only if the sandbox doesn't have a -container type annotation, but the caller is free to set the proper limits for the `podCgroup`. +### Why create a kata-cgroup under the parent cgroup? -In the example above the pod cgroups are `/kubepods/pod1` and `/kubepods/pod2`. -Kata creates the unrestricted sandbox cgroup under the pod cgroup. +And why not directly adding the per sandbox shim directly to the pod cgroup (e.g. +`/kubepods` in the Kubernetes context)? -### Why create a Kata-cgroup under the parent cgroup? +The Kata Containers shim implementation creates a per-sandbox cgroup +(`/kata_`) to support the `Docker` use case. Although `Docker` does not +have a notion of pods, Kata Containers still creates a sandbox to support the pod-less, +single container use case that `Docker` implements. Since `Docker` does create any +cgroup hierarchy to place a container into, it would be very complex for Kata to map +a particular container to its sandbox without placing it under a `/kata_>` +sub-cgroup first. -`Docker` does not have a notion of pods, and will not create a cgroup directory -to place a particular container in (i.e., all containers would be in a path like -`/docker/container-id`. To simplify the implementation and continue to support `Docker`, -Kata Containers creates the sandbox-cgroup, in the case of Kubernetes, or a container cgroup, in the case -of docker. +### Advantages -### Improvements +Keeping all Kata Containers processes under a properly sized pod cgroup is ideal +and makes for a simpler Kata Containers implementation. It also helps with gathering +accurate statistics and preventing Kata workloads from being noisy neighbors. -- Get statistics about pod resources +#### Pod resources statistics If the Kata caller wants to know the resource usage on the host it can get statistics from the pod cgroup. All cgroups stats in the hierarchy will include the Kata overhead. This gives the possibility of gathering usage-statics at the pod level and the container level. -- Better host resource isolation +#### Better host resource isolation Because the Kata runtime will place all the Kata processes in the pod cgroup, the resource limits that the caller applies to the pod cgroup will affect all processes that belong to the Kata sandbox in the host. This will improve the isolation in the host preventing Kata to become a noisy neighbor. -## `SandboxCgroupOnly` disabled (default, legacy) +## `sandbox_cgroup_only = false` (Default setting) + +If the cgroup provided to Kata is not sized appropriately, Kata components will +consume resources that the actual container workloads expect to see and use. +This can cause instability and performance degradations. + +To avoid that situation, Kata Containers creates an unconstrained overhead +cgroup and moves all non workload related processes (Anything but the virtual CPU +threads) to it. The name of this overhead cgroup is `/kata_overhead` and a per +sandbox sub cgroup will be created under it for each sandbox Kata Containers creates. + +Kata Containers does not add any constraints or limitations on the overhead cgroup. It is up to the infrastructure +owner to either: + +- Provision nodes with a pre-sized `/kata_overhead` cgroup. Kata Containers will + load that existing cgroup and move all non workload related processes to it. +- Let Kata Containers create the `/kata_overhead` cgroup, leave it + unconstrained or resize it a-posteriori. -If the cgroup provided to Kata is not sized appropriately, instability will be -introduced when fully constraining Kata components, and the user-workload will -see a subset of resources that were requested. Based on this, the default -handling for Kata Containers is to not fully constrain the VMM and Kata -components on the host. ``` -+----------------------------------------------------------+ -| +---------------------------------------------------+ | -| | +---------------------------------------------+ | | -| | | +--------------------------------------+ | | | -| | | |Container 1 |-|Container 2 | | | | -| | | | |-| | | | | -| | | | Shim+container1 |-| Shim+container2 | | | | -| | | +--------------------------------------+ | | | -| | | | | | -| | |Pod 1 | | | -| | +---------------------------------------------+ | | -| | | | -| | +---------------------------------------------+ | | -| | | +--------------------------------------+ | | | -| | | |Container 1 |-|Container 2 | | | | -| | | | |-| | | | | -| | | | Shim+container1 |-| Shim+container2 | | | | -| | | +--------------------------------------+ | | | -| | | | | | -| | |Pod 2 | | | -| | +---------------------------------------------+ | | -| |kubepods | | -| +---------------------------------------------------+ | -| +---------------------------------------------------+ | -| | Hypervisor | | -| |Kata | | -| +---------------------------------------------------+ | -| | -|Node | -+----------------------------------------------------------+ +┌────────────────────────────────────────────────────────────────────┐ +│ │ +│ ┌─────────────────────────────┐ ┌───────────────────────────┐ │ +│ │ │ │ │ │ +│ │ ┌─────────────────────────┼────┼─────────────────────────┐ │ │ +│ │ │ │ │ │ │ │ +│ │ │ ┌─────────────────────┐ │ │ ┌─────────────────────┐ │ │ │ +│ │ │ │ vCPU threads │ │ │ │ VMM │ │ │ │ +│ │ │ │ │ │ │ │ I/O threads │ │ │ │ +│ │ │ │ │ │ │ │ Kata Shim │ │ │ │ +│ │ │ │ │ │ │ │ │ │ │ │ +│ │ │ │ /kata_ │ │ │ │ / │ │ │ │ +│ │ │ └─────────────────────┘ │ │ └─────────────────────┘ │ │ │ +│ │ │ │ │ │ │ │ +│ │ │ Pod 1 │ │ │ │ │ +│ │ └─────────────────────────┼────┼─────────────────────────┘ │ │ +│ │ │ │ │ │ +│ │ │ │ │ │ +│ │ ┌─────────────────────────┼────┼─────────────────────────┐ │ │ +│ │ │ │ │ │ │ │ +│ │ │ ┌─────────────────────┐ │ │ ┌─────────────────────┐ │ │ │ +│ │ │ │ vCPU threads │ │ │ │ VMM │ │ │ │ +│ │ │ │ │ │ │ │ I/O threads │ │ │ │ +│ │ │ │ │ │ │ │ Kata Shim │ │ │ │ +│ │ │ │ │ │ │ │ │ │ │ │ +│ │ │ │ /kata_ │ │ │ │ / │ │ │ │ +│ │ │ └─────────────────────┘ │ │ └─────────────────────┘ │ │ │ +│ │ │ │ │ │ │ │ +│ │ │ Pod 2 │ │ │ │ │ +│ │ └─────────────────────────┼────┼─────────────────────────┘ │ │ +│ │ │ │ │ │ +│ │ /kubepods │ │ /kata_overhead │ │ +│ └─────────────────────────────┘ └───────────────────────────┘ │ +│ │ +│ │ +│ Node │ +└────────────────────────────────────────────────────────────────────┘ ``` -### What does this method do? +### Implementation Details -1. Given a container creation let `containerCgroupHost=container.CgroupsPath` -1. Rename `containerCgroupHost` path to add `kata_` -1. Let `PodCgroupPath=PodSanboxContainerCgroup` where `PodSanboxContainerCgroup` is the cgroup of a container of type `PodSandbox` -1. Limit the `PodCgroupPath` with the sum of all the container limits in the Sandbox -1. Move only vCPU threads of hypervisor to `PodCgroupPath` -1. Per each container, move its `kata-shim` to its own `containerCgroupHost` -1. Move hypervisor and applicable threads to memory cgroup `/kata` +When `sandbox_cgroup_only` is disabled, the Kata Containers shim will create a per pod +sub-cgroup under the pods dedicated cgroup, and another one under the overhead cgroup. +For example, in the Kubernetes context, it will create a `/kata_` under +the `/kubepods` cgroup hierarchy, and a `/` under the `/kata_overhead` one. -_Note_: the Kata Containers runtime will not add all the hypervisor threads to -the cgroup path requested, only vCPUs. These threads are run unconstrained. +On a typical cgroup v1 hierarchy mounted under `/sys/fs/cgroup/`, for a pod which sandbox +ID is `12345678`, create with `sandbox_cgroup_only` disabled, the 2 memory subsystems +for the sandbox cgroup and the overhead cgroup would respectively live under +`/sys/fs/cgroup/memory/kubepods/kata_12345678` and `/sys/fs/cgroup/memory/kata_overhead/12345678`. -This mitigates the risk of the VMM and other threads receiving an out of memory scenario (`OOM`). +Unlike when `sandbox_cgroup_only` is enabled, the Kata Containers shim will move itself +to the overhead cgroup first, and then move the vCPU threads to the sandbox cgroup as +they're created. All Kata processes and threads will run under the overhead cgroup except for +the vCPU threads. +With `sandbox_cgroup_only` disabled, Kata Containers assumes the pod cgroup is only sized +to accommodate for the actual container workloads processes. For Kata, this maps +to the VMM created virtual CPU threads and so they are the only ones running under the pod +cgroup. This mitigates the risk of the VMM, the Kata shim and the I/O threads going through +a catastrophic out of memory scenario (`OOM`). -#### Impact +#### Pros and Cons -If resources are reserved at a system level to account for the overheads of -running sandbox containers, this configuration can be utilized with adequate -stability. In this scenario, non-negligible amounts of CPU and memory will be -utilized unaccounted for on the host. +Running all non vCPU threads under an unconstrained overhead cgroup could lead to workloads +potentially consuming a large amount of host resources. + +On the other hand, running all non vCPU threads under a dedicated overhead cgroup can provide +accurate metrics on the actual Kata Container pod overhead, allowing for tuning the overhead +cgroup size and constraints accordingly. [linux-config]: https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md [cgroupspath]: https://github.com/opencontainers/runtime-spec/blob/master/config-linux.md#cgroups-path # Supported cgroups -Kata Containers supports cgroups `v1` and `v2`. In the following sections each cgroup is -described briefly and what changes are needed in Kata Containers to support it. +Kata Containers currently only supports cgroups `v1`. + +In the following sections each cgroup is described briefly. ## Cgroups V1 @@ -244,7 +301,7 @@ diagram: A process can join a cgroup by writing its process id (`pid`) to `cgroup.procs` file, or join a cgroup partially by writing the task (thread) id (`tid`) to the `tasks` file. -Kata Containers supports `v1` by default and no change in the configuration file is needed. +Kata Containers only supports `v1`. To know more about `cgroups v1`, see [cgroupsv1(7)][2]. ## Cgroups V2 @@ -297,22 +354,13 @@ Same as `cgroups v1`, a process can join the cgroup by writing its process id (` `cgroup.procs` file, or join a cgroup partially by writing the task (thread) id (`tid`) to `cgroup.threads` file. -For backwards compatibility Kata Containers defaults to supporting cgroups v1 by default. -To change this to `v2`, set `sandbox_cgroup_only=true` in the `configuration.toml` file. -To know more about `cgroups v2`, see [cgroupsv2(7)][3]. +Kata Containers does not support cgroups `v2` on the host. ### Distro Support Many Linux distributions do not yet support `cgroups v2`, as it is quite a recent addition. For more information about the status of this feature see [issue #2494][4]. -# Summary - -| cgroup option | default? | status | pros | cons | cgroups -|-|-|-|-|-|-| -| `SandboxCgroupOnly=false` | yes | legacy | Easiest to make Kata work | Unaccounted for memory and resource utilization | v1 -| `SandboxCgroupOnly=true` | no | recommended | Complete tracking of Kata memory and CPU utilization. In Kubernetes, the Kubelet can fully constrain Kata via the pod cgroup | Requires upper layer orchestrator which sizes sandbox cgroup appropriately | v1, v2 - [1]: http://man7.org/linux/man-pages/man5/tmpfs.5.html [2]: http://man7.org/linux/man-pages/man7/cgroups.7.html#CGROUPS_VERSION_1 From 4b7e4a4c70f6a4fb438b632fd177e2c5eb8ad692 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Sun, 1 Aug 2021 04:17:02 +0200 Subject: [PATCH 7/7] runtime: Vendoring update Due to the libcontainer dependencies removal. Signed-off-by: Samuel Ortiz --- src/runtime/go.sum | 7 - .../checkpoint-restore/go-criu/v5/.gitignore | 6 - .../go-criu/v5/.golangci.yml | 12 - .../checkpoint-restore/go-criu/v5/LICENSE | 201 -- .../checkpoint-restore/go-criu/v5/Makefile | 57 - .../checkpoint-restore/go-criu/v5/README.md | 95 - .../checkpoint-restore/go-criu/v5/go.mod | 9 - .../checkpoint-restore/go-criu/v5/go.sum | 22 - .../checkpoint-restore/go-criu/v5/main.go | 260 -- .../checkpoint-restore/go-criu/v5/notify.go | 62 - .../go-criu/v5/rpc/rpc.pb.go | 2208 ----------------- .../github.com/cilium/ebpf/.clang-format | 17 - .../vendor/github.com/cilium/ebpf/.gitignore | 13 - .../github.com/cilium/ebpf/.golangci.yaml | 29 - .../github.com/cilium/ebpf/ARCHITECTURE.md | 80 - .../github.com/cilium/ebpf/CODE_OF_CONDUCT.md | 46 - .../github.com/cilium/ebpf/CONTRIBUTING.md | 40 - .../vendor/github.com/cilium/ebpf/LICENSE | 23 - .../vendor/github.com/cilium/ebpf/Makefile | 70 - .../vendor/github.com/cilium/ebpf/README.md | 62 - .../vendor/github.com/cilium/ebpf/asm/alu.go | 149 -- .../github.com/cilium/ebpf/asm/alu_string.go | 107 - .../vendor/github.com/cilium/ebpf/asm/doc.go | 2 - .../vendor/github.com/cilium/ebpf/asm/func.go | 195 -- .../github.com/cilium/ebpf/asm/func_string.go | 185 -- .../github.com/cilium/ebpf/asm/instruction.go | 506 ---- .../vendor/github.com/cilium/ebpf/asm/jump.go | 109 - .../github.com/cilium/ebpf/asm/jump_string.go | 53 - .../github.com/cilium/ebpf/asm/load_store.go | 204 -- .../cilium/ebpf/asm/load_store_string.go | 80 - .../github.com/cilium/ebpf/asm/opcode.go | 237 -- .../cilium/ebpf/asm/opcode_string.go | 38 - .../github.com/cilium/ebpf/asm/register.go | 49 - .../github.com/cilium/ebpf/collection.go | 616 ----- .../vendor/github.com/cilium/ebpf/doc.go | 16 - .../github.com/cilium/ebpf/elf_reader.go | 953 ------- .../github.com/cilium/ebpf/elf_reader_fuzz.go | 21 - .../vendor/github.com/cilium/ebpf/go.mod | 9 - .../vendor/github.com/cilium/ebpf/go.sum | 13 - .../vendor/github.com/cilium/ebpf/info.go | 239 -- .../cilium/ebpf/internal/btf/btf.go | 799 ------ .../cilium/ebpf/internal/btf/btf_types.go | 282 --- .../ebpf/internal/btf/btf_types_string.go | 44 - .../cilium/ebpf/internal/btf/core.go | 887 ------- .../cilium/ebpf/internal/btf/doc.go | 8 - .../cilium/ebpf/internal/btf/ext_info.go | 303 --- .../cilium/ebpf/internal/btf/fuzz.go | 49 - .../cilium/ebpf/internal/btf/strings.go | 60 - .../cilium/ebpf/internal/btf/types.go | 893 ------- .../github.com/cilium/ebpf/internal/cpu.go | 62 - .../github.com/cilium/ebpf/internal/elf.go | 68 - .../github.com/cilium/ebpf/internal/endian.go | 29 - .../github.com/cilium/ebpf/internal/errors.go | 51 - .../github.com/cilium/ebpf/internal/fd.go | 69 - .../cilium/ebpf/internal/feature.go | 100 - .../github.com/cilium/ebpf/internal/io.go | 16 - .../cilium/ebpf/internal/pinning.go | 44 - .../github.com/cilium/ebpf/internal/ptr.go | 31 - .../cilium/ebpf/internal/ptr_32_be.go | 14 - .../cilium/ebpf/internal/ptr_32_le.go | 14 - .../github.com/cilium/ebpf/internal/ptr_64.go | 14 - .../cilium/ebpf/internal/syscall.go | 245 -- .../cilium/ebpf/internal/syscall_string.go | 56 - .../cilium/ebpf/internal/unix/types_linux.go | 204 -- .../cilium/ebpf/internal/unix/types_other.go | 263 -- .../cilium/ebpf/internal/version.go | 163 -- .../github.com/cilium/ebpf/link/cgroup.go | 171 -- .../vendor/github.com/cilium/ebpf/link/doc.go | 2 - .../github.com/cilium/ebpf/link/iter.go | 100 - .../github.com/cilium/ebpf/link/kprobe.go | 438 ---- .../github.com/cilium/ebpf/link/link.go | 229 -- .../github.com/cilium/ebpf/link/netns.go | 60 - .../github.com/cilium/ebpf/link/perf_event.go | 273 -- .../github.com/cilium/ebpf/link/platform.go | 25 - .../github.com/cilium/ebpf/link/program.go | 76 - .../cilium/ebpf/link/raw_tracepoint.go | 61 - .../github.com/cilium/ebpf/link/syscalls.go | 190 -- .../github.com/cilium/ebpf/link/tracepoint.go | 56 - .../github.com/cilium/ebpf/link/uprobe.go | 237 -- .../vendor/github.com/cilium/ebpf/linker.go | 140 -- .../vendor/github.com/cilium/ebpf/map.go | 1232 --------- .../github.com/cilium/ebpf/marshalers.go | 218 -- .../vendor/github.com/cilium/ebpf/prog.go | 728 ------ .../github.com/cilium/ebpf/run-tests.sh | 123 - .../vendor/github.com/cilium/ebpf/syscalls.go | 480 ---- .../vendor/github.com/cilium/ebpf/types.go | 248 -- .../github.com/cilium/ebpf/types_string.go | 172 -- .../cyphar/filepath-securejoin/.travis.yml | 19 - .../cyphar/filepath-securejoin/LICENSE | 28 - .../cyphar/filepath-securejoin/README.md | 65 - .../cyphar/filepath-securejoin/VERSION | 1 - .../cyphar/filepath-securejoin/join.go | 134 - .../cyphar/filepath-securejoin/vendor.conf | 1 - .../cyphar/filepath-securejoin/vfs.go | 41 - .../github.com/mrunalp/fileutils/.gitignore | 1 - .../github.com/mrunalp/fileutils/LICENSE | 191 -- .../github.com/mrunalp/fileutils/MAINTAINERS | 1 - .../github.com/mrunalp/fileutils/README.md | 5 - .../github.com/mrunalp/fileutils/fileutils.go | 168 -- .../github.com/mrunalp/fileutils/go.mod | 3 - .../github.com/mrunalp/fileutils/idtools.go | 54 - .../runc/libcontainer/README.md | 334 --- .../opencontainers/runc/libcontainer/SPEC.md | 465 ---- .../runc/libcontainer/apparmor/apparmor.go | 16 - .../libcontainer/apparmor/apparmor_linux.go | 69 - .../apparmor/apparmor_unsupported.go | 14 - .../libcontainer/capabilities/capabilities.go | 111 - .../capabilities/capabilities_unsupported.go | 3 - .../runc/libcontainer/cgroups/cgroups.go | 61 - .../cgroups/cgroups_unsupported.go | 3 - .../cgroups/devices/devices_emulator.go | 382 --- .../cgroups/ebpf/devicefilter/devicefilter.go | 208 -- .../libcontainer/cgroups/ebpf/ebpf_linux.go | 253 -- .../runc/libcontainer/cgroups/file.go | 166 -- .../runc/libcontainer/cgroups/fs/blkio.go | 311 --- .../runc/libcontainer/cgroups/fs/cpu.go | 115 - .../runc/libcontainer/cgroups/fs/cpuacct.go | 172 -- .../runc/libcontainer/cgroups/fs/cpuset.go | 248 -- .../runc/libcontainer/cgroups/fs/devices.go | 110 - .../runc/libcontainer/cgroups/fs/freezer.go | 160 -- .../runc/libcontainer/cgroups/fs/fs.go | 440 ---- .../runc/libcontainer/cgroups/fs/hugetlb.go | 65 - .../runc/libcontainer/cgroups/fs/memory.go | 352 --- .../runc/libcontainer/cgroups/fs/name.go | 33 - .../runc/libcontainer/cgroups/fs/net_cls.go | 34 - .../runc/libcontainer/cgroups/fs/net_prio.go | 32 - .../libcontainer/cgroups/fs/perf_event.go | 26 - .../runc/libcontainer/cgroups/fs/pids.go | 68 - .../libcontainer/cgroups/fs/unsupported.go | 3 - .../runc/libcontainer/cgroups/fs2/cpu.go | 85 - .../runc/libcontainer/cgroups/fs2/cpuset.go | 30 - .../runc/libcontainer/cgroups/fs2/create.go | 152 -- .../libcontainer/cgroups/fs2/defaultpath.go | 103 - .../runc/libcontainer/cgroups/fs2/devices.go | 76 - .../runc/libcontainer/cgroups/fs2/freezer.go | 129 - .../runc/libcontainer/cgroups/fs2/fs2.go | 251 -- .../runc/libcontainer/cgroups/fs2/hugetlb.go | 57 - .../runc/libcontainer/cgroups/fs2/io.go | 194 -- .../runc/libcontainer/cgroups/fs2/memory.go | 213 -- .../runc/libcontainer/cgroups/fs2/pids.go | 77 - .../libcontainer/cgroups/fscommon/utils.go | 131 - .../runc/libcontainer/cgroups/stats.go | 163 -- .../libcontainer/cgroups/systemd/common.go | 516 ---- .../libcontainer/cgroups/systemd/cpuset.go | 67 - .../runc/libcontainer/cgroups/systemd/dbus.go | 98 - .../cgroups/systemd/unsupported.go | 71 - .../runc/libcontainer/cgroups/systemd/user.go | 106 - .../runc/libcontainer/cgroups/systemd/v1.go | 465 ---- .../runc/libcontainer/cgroups/systemd/v2.go | 460 ---- .../runc/libcontainer/cgroups/utils.go | 450 ---- .../runc/libcontainer/cgroups/v1_utils.go | 283 --- .../libcontainer/configs/validate/rootless.go | 93 - .../configs/validate/validator.go | 278 --- .../runc/libcontainer/console_linux.go | 41 - .../runc/libcontainer/container.go | 173 -- .../runc/libcontainer/container_linux.go | 2154 ---------------- .../runc/libcontainer/criu_opts_linux.go | 33 - .../opencontainers/runc/libcontainer/error.go | 70 - .../runc/libcontainer/factory.go | 44 - .../runc/libcontainer/factory_linux.go | 453 ---- .../runc/libcontainer/generic_error.go | 92 - .../runc/libcontainer/init_linux.go | 575 ----- .../runc/libcontainer/intelrdt/cmt.go | 23 - .../runc/libcontainer/intelrdt/intelrdt.go | 816 ------ .../runc/libcontainer/intelrdt/mbm.go | 33 - .../runc/libcontainer/intelrdt/monitoring.go | 84 - .../runc/libcontainer/intelrdt/stats.go | 59 - .../runc/libcontainer/keys/keyctl.go | 47 - .../runc/libcontainer/logs/logs.go | 106 - .../runc/libcontainer/message_linux.go | 89 - .../runc/libcontainer/network_linux.go | 102 - .../runc/libcontainer/notify_linux.go | 87 - .../runc/libcontainer/notify_linux_v2.go | 82 - .../runc/libcontainer/process.go | 115 - .../runc/libcontainer/process_linux.go | 726 ------ .../runc/libcontainer/restored_process.go | 129 - .../runc/libcontainer/rootfs_linux.go | 1080 -------- .../runc/libcontainer/seccomp/config.go | 77 - .../seccomp/patchbpf/enosys_linux.go | 660 ----- .../seccomp/patchbpf/enosys_unsupported.go | 3 - .../libcontainer/seccomp/seccomp_linux.go | 222 -- .../seccomp/seccomp_unsupported.go | 24 - .../runc/libcontainer/setns_init_linux.go | 98 - .../runc/libcontainer/specconv/example.go | 230 -- .../runc/libcontainer/specconv/spec_linux.go | 964 ------- .../runc/libcontainer/stacktrace/capture.go | 27 - .../runc/libcontainer/stacktrace/frame.go | 38 - .../libcontainer/stacktrace/stacktrace.go | 5 - .../runc/libcontainer/standard_init_linux.go | 231 -- .../runc/libcontainer/state_linux.go | 245 -- .../runc/libcontainer/stats_linux.go | 13 - .../opencontainers/runc/libcontainer/sync.go | 101 - .../runc/libcontainer/system/linux.go | 110 - .../runc/libcontainer/system/proc.go | 103 - .../libcontainer/system/syscall_linux_32.go | 26 - .../libcontainer/system/syscall_linux_64.go | 26 - .../runc/libcontainer/system/xattrs_linux.go | 35 - .../runc/libcontainer/utils/cmsg.go | 93 - .../runc/libcontainer/utils/utils.go | 177 -- .../runc/libcontainer/utils/utils_unix.go | 68 - .../opencontainers/runc/types/events.go | 155 -- .../seccomp/libseccomp-golang/.gitignore | 4 - .../seccomp/libseccomp-golang/CHANGELOG | 17 - .../seccomp/libseccomp-golang/LICENSE | 22 - .../seccomp/libseccomp-golang/Makefile | 26 - .../seccomp/libseccomp-golang/README | 51 - .../libseccomp-golang/SUBMITTING_PATCHES | 112 - .../seccomp/libseccomp-golang/seccomp.go | 935 ------- .../libseccomp-golang/seccomp_internal.go | 571 ----- .../github.com/syndtr/gocapability/LICENSE | 24 - .../gocapability/capability/capability.go | 133 - .../capability/capability_linux.go | 642 ----- .../capability/capability_noop.go | 19 - .../syndtr/gocapability/capability/enum.go | 309 --- .../gocapability/capability/enum_gen.go | 138 -- .../gocapability/capability/syscall_linux.go | 154 -- .../vendor/golang.org/x/net/bpf/asm.go | 41 - .../vendor/golang.org/x/net/bpf/constants.go | 222 -- .../vendor/golang.org/x/net/bpf/doc.go | 82 - .../golang.org/x/net/bpf/instructions.go | 726 ------ .../vendor/golang.org/x/net/bpf/setter.go | 10 - src/runtime/vendor/golang.org/x/net/bpf/vm.go | 150 -- .../golang.org/x/net/bpf/vm_instructions.go | 182 -- src/runtime/vendor/modules.txt | 41 - 224 files changed, 42196 deletions(-) delete mode 100644 src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/.gitignore delete mode 100644 src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/.golangci.yml delete mode 100644 src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/LICENSE delete mode 100644 src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/Makefile delete mode 100644 src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/README.md delete mode 100644 src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/go.mod delete mode 100644 src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/go.sum delete mode 100644 src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/main.go delete mode 100644 src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/notify.go delete mode 100644 src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/rpc/rpc.pb.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/.clang-format delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/.gitignore delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/.golangci.yaml delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/ARCHITECTURE.md delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/CODE_OF_CONDUCT.md delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/CONTRIBUTING.md delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/LICENSE delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/Makefile delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/README.md delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/asm/alu.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/asm/alu_string.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/asm/doc.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/asm/func.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/asm/func_string.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/asm/instruction.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/asm/jump.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/asm/jump_string.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/asm/load_store.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/asm/load_store_string.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/asm/opcode.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/asm/opcode_string.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/asm/register.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/collection.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/doc.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/elf_reader.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/elf_reader_fuzz.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/go.mod delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/go.sum delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/info.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/btf/btf.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/btf/btf_types.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/btf/btf_types_string.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/btf/core.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/btf/doc.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/btf/ext_info.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/btf/fuzz.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/btf/strings.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/btf/types.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/cpu.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/elf.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/endian.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/errors.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/fd.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/feature.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/io.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/pinning.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/ptr.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/ptr_32_be.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/ptr_32_le.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/ptr_64.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/syscall.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/syscall_string.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/unix/types_linux.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/unix/types_other.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/internal/version.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/link/cgroup.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/link/doc.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/link/iter.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/link/kprobe.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/link/link.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/link/netns.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/link/perf_event.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/link/platform.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/link/program.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/link/raw_tracepoint.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/link/syscalls.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/link/tracepoint.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/link/uprobe.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/linker.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/map.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/marshalers.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/prog.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/run-tests.sh delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/syscalls.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/types.go delete mode 100644 src/runtime/vendor/github.com/cilium/ebpf/types_string.go delete mode 100644 src/runtime/vendor/github.com/cyphar/filepath-securejoin/.travis.yml delete mode 100644 src/runtime/vendor/github.com/cyphar/filepath-securejoin/LICENSE delete mode 100644 src/runtime/vendor/github.com/cyphar/filepath-securejoin/README.md delete mode 100644 src/runtime/vendor/github.com/cyphar/filepath-securejoin/VERSION delete mode 100644 src/runtime/vendor/github.com/cyphar/filepath-securejoin/join.go delete mode 100644 src/runtime/vendor/github.com/cyphar/filepath-securejoin/vendor.conf delete mode 100644 src/runtime/vendor/github.com/cyphar/filepath-securejoin/vfs.go delete mode 100644 src/runtime/vendor/github.com/mrunalp/fileutils/.gitignore delete mode 100644 src/runtime/vendor/github.com/mrunalp/fileutils/LICENSE delete mode 100644 src/runtime/vendor/github.com/mrunalp/fileutils/MAINTAINERS delete mode 100644 src/runtime/vendor/github.com/mrunalp/fileutils/README.md delete mode 100644 src/runtime/vendor/github.com/mrunalp/fileutils/fileutils.go delete mode 100644 src/runtime/vendor/github.com/mrunalp/fileutils/go.mod delete mode 100644 src/runtime/vendor/github.com/mrunalp/fileutils/idtools.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/README.md delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/SPEC.md delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities_unsupported.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/unsupported.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/dbus.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/unsupported.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/container.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/error.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/factory.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/generic_error.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/cmt.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/mbm.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/monitoring.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/logs/logs.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/network_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/notify_linux_v2.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/process.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/restored_process.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_unsupported.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/specconv/example.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/specconv/spec_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/capture.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/frame.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/stacktrace.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/sync.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_32.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go delete mode 100644 src/runtime/vendor/github.com/opencontainers/runc/types/events.go delete mode 100644 src/runtime/vendor/github.com/seccomp/libseccomp-golang/.gitignore delete mode 100644 src/runtime/vendor/github.com/seccomp/libseccomp-golang/CHANGELOG delete mode 100644 src/runtime/vendor/github.com/seccomp/libseccomp-golang/LICENSE delete mode 100644 src/runtime/vendor/github.com/seccomp/libseccomp-golang/Makefile delete mode 100644 src/runtime/vendor/github.com/seccomp/libseccomp-golang/README delete mode 100644 src/runtime/vendor/github.com/seccomp/libseccomp-golang/SUBMITTING_PATCHES delete mode 100644 src/runtime/vendor/github.com/seccomp/libseccomp-golang/seccomp.go delete mode 100644 src/runtime/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go delete mode 100644 src/runtime/vendor/github.com/syndtr/gocapability/LICENSE delete mode 100644 src/runtime/vendor/github.com/syndtr/gocapability/capability/capability.go delete mode 100644 src/runtime/vendor/github.com/syndtr/gocapability/capability/capability_linux.go delete mode 100644 src/runtime/vendor/github.com/syndtr/gocapability/capability/capability_noop.go delete mode 100644 src/runtime/vendor/github.com/syndtr/gocapability/capability/enum.go delete mode 100644 src/runtime/vendor/github.com/syndtr/gocapability/capability/enum_gen.go delete mode 100644 src/runtime/vendor/github.com/syndtr/gocapability/capability/syscall_linux.go delete mode 100644 src/runtime/vendor/golang.org/x/net/bpf/asm.go delete mode 100644 src/runtime/vendor/golang.org/x/net/bpf/constants.go delete mode 100644 src/runtime/vendor/golang.org/x/net/bpf/doc.go delete mode 100644 src/runtime/vendor/golang.org/x/net/bpf/instructions.go delete mode 100644 src/runtime/vendor/golang.org/x/net/bpf/setter.go delete mode 100644 src/runtime/vendor/golang.org/x/net/bpf/vm.go delete mode 100644 src/runtime/vendor/golang.org/x/net/bpf/vm_instructions.go diff --git a/src/runtime/go.sum b/src/runtime/go.sum index d89d807df7..54d0dc1582 100644 --- a/src/runtime/go.sum +++ b/src/runtime/go.sum @@ -91,14 +91,12 @@ github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1 h1:6MnRN8NT7+YBpUIWxHtefFZOKTAPgGjpQSxqLNn0+qY= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= -github.com/checkpoint-restore/go-criu/v5 v5.0.0 h1:TW8f/UvntYoVDMN1K2HlT82qH1rb0sOjpGw3m6Ym+i4= github.com/checkpoint-restore/go-criu/v5 v5.0.0/go.mod h1:cfwC0EG7HMUenopBsUf9d89JlCLQIfgVcNsNN0t6T2M= github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI= github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI= github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs= github.com/cilium/ebpf v0.4.0/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs= -github.com/cilium/ebpf v0.6.2 h1:iHsfF/t4aW4heW2YKfeHrVPGdtYTL4C4KocpM8KTSnI= github.com/cilium/ebpf v0.6.2/go.mod h1:4tRaxcgiL706VnOzHOdBlY8IEAIdxINsQBcU4xJJXRs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= @@ -158,7 +156,6 @@ github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsr github.com/creack/pty v1.1.7/go.mod h1:lj5s0c3V2DBrqTV7llrYr5NG6My20zk30Fl46Y7DoTY= github.com/cri-o/cri-o v1.0.0-rc2.0.20170928185954-3394b3b2d6af h1:H6nLV96F1LkWizYLQtrMtqJBrlJxnpjgisHsTsOS2HU= github.com/cri-o/cri-o v1.0.0-rc2.0.20170928185954-3394b3b2d6af/go.mod h1:POmDVglzQ2jWTlL9ZCfZ8d1QjLhmk0oB36O8T0oG75Y= -github.com/cyphar/filepath-securejoin v0.2.2 h1:jCwT2GTP+PY5nBz3c/YL5PAIbusElVrPujOBSCj8xRg= github.com/cyphar/filepath-securejoin v0.2.2/go.mod h1:FpkQEhXnPnOthhzymB7CGsFk2G9VLXONKD9G7QGMM+4= github.com/d2g/dhcp4 v0.0.0-20170904100407-a1d1b6c41b1c/go.mod h1:Ct2BUK8SB0YC1SMSibvLzxjeJLnrYEVLULFNiHY9YfQ= github.com/d2g/dhcp4client v1.0.0/go.mod h1:j0hNfjhrt2SxUOw55nL0ATM/z4Yt3t2Kd1mW34z5W5s= @@ -188,7 +185,6 @@ github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7 github.com/evanphx/json-patch v4.9.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/form3tech-oss/jwt-go v3.2.2+incompatible/go.mod h1:pbq4aXjuKjdthFRnoDwaVPLA+WlJuPGy+QneDUgJi2k= -github.com/frankban/quicktest v1.11.3 h1:8sXhOn0uLys67V8EsXLc6eszDs8VXWxL3iRvebPhedY= github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= @@ -409,7 +405,6 @@ github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= -github.com/mrunalp/fileutils v0.5.0 h1:NKzVxiH7eSk+OQ4M+ZYW1K6h27RUV3MI6NUTsHhU6Z4= github.com/mrunalp/fileutils v0.5.0/go.mod h1:M1WthSahJixYnrXQl/DFQuteStB1weuxD2QJNHXfbSQ= github.com/munnerz/goautoneg v0.0.0-20120707110453-a547fc61f48d/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= @@ -488,7 +483,6 @@ github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQD github.com/safchain/ethtool v0.0.0-20190326074333-42ed695e3de8 h1:2c1EFnZHIPCW8qKWgHMH/fX2PkSabFc5mrVzfUNdg5U= github.com/safchain/ethtool v0.0.0-20190326074333-42ed695e3de8/go.mod h1:Z0q5wiBQGYcxhMZ6gUqHn6pYNLypFAvaL3UvgZLR0U4= github.com/satori/go.uuid v1.2.0/go.mod h1:dA0hQrYB0VpLJoorglMZABFdXlWrHn1NEOzdhQKdks0= -github.com/seccomp/libseccomp-golang v0.9.1 h1:NJjM5DNFOs0s3kYE1WUOr6G8V97sdt46rlXTMfXGWBo= github.com/seccomp/libseccomp-golang v0.9.1/go.mod h1:GbW5+tmTXfcxTToHLXlScSlAvWlF4P2Ca7zGrPiEpWo= github.com/shurcooL/sanitized_anchor_name v1.0.0 h1:PdmoCO6wvbs+7yrJyMORt4/BmY5IYyJwS/kOiWx8mHo= github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= @@ -528,7 +522,6 @@ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UV github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/tchap/go-patricia v2.2.6+incompatible/go.mod h1:bmLyhP68RS6kStMGxByiQ23RP/odRBOTVjwp2cDyi6I= github.com/tmc/grpc-websocket-proxy v0.0.0-20170815181823-89b8d40f7ca8/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= diff --git a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/.gitignore b/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/.gitignore deleted file mode 100644 index d99bf92cfe..0000000000 --- a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -test/test -test/piggie/piggie -test/phaul -image -rpc/rpc.proto -stats/stats.proto diff --git a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/.golangci.yml b/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/.golangci.yml deleted file mode 100644 index fbbac4b417..0000000000 --- a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/.golangci.yml +++ /dev/null @@ -1,12 +0,0 @@ -run: - skip_dirs: - - rpc - - stats - -linters: - disable-all: false - presets: - - bugs - - performance - - unused - - format diff --git a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/LICENSE b/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/LICENSE deleted file mode 100644 index 8dada3edaf..0000000000 --- a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright {yyyy} {name of copyright owner} - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/Makefile b/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/Makefile deleted file mode 100644 index 2c303a3c9f..0000000000 --- a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/Makefile +++ /dev/null @@ -1,57 +0,0 @@ -GO ?= go -CC ?= gcc - -all: build test phaul-test - -lint: - golangci-lint run ./... - -build: - $(GO) build -v ./... - -TEST_BINARIES := test/test test/piggie/piggie test/phaul/phaul -test-bin: $(TEST_BINARIES) - -test/piggie/piggie: test/piggie/piggie.c - $(CC) $^ -o $@ - -test/test: test/*.go - $(GO) build -v -o $@ $^ - -test: $(TEST_BINARIES) - mkdir -p image - PID=$$(test/piggie/piggie) && { \ - test/test dump $$PID image && \ - test/test restore image; \ - pkill -9 piggie; \ - } - rm -rf image - -test/phaul/phaul: test/phaul/*.go - $(GO) build -v -o $@ $^ - -phaul-test: $(TEST_BINARIES) - rm -rf image - PID=$$(test/piggie/piggie) && { \ - test/phaul/phaul $$PID; \ - pkill -9 piggie; \ - } - -clean: - @rm -f $(TEST_BINARIES) - @rm -rf image - @rm -f rpc/rpc.proto stats/stats.proto - -rpc/rpc.proto: - curl -sSL https://raw.githubusercontent.com/checkpoint-restore/criu/master/images/rpc.proto -o $@ - -stats/stats.proto: - curl -sSL https://raw.githubusercontent.com/checkpoint-restore/criu/master/images/stats.proto -o $@ - -rpc/rpc.pb.go: rpc/rpc.proto - protoc --go_out=. $^ - -stats/stats.pb.go: stats/stats.proto - protoc --go_out=. $^ - -.PHONY: build test phaul-test test-bin clean lint diff --git a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/README.md b/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/README.md deleted file mode 100644 index 390da3e98b..0000000000 --- a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/README.md +++ /dev/null @@ -1,95 +0,0 @@ -[![test](https://github.com/checkpoint-restore/go-criu/workflows/ci/badge.svg?branch=master)](https://github.com/checkpoint-restore/go-criu/actions?query=workflow%3Aci) -[![verify](https://github.com/checkpoint-restore/go-criu/workflows/verify/badge.svg?branch=master)](https://github.com/checkpoint-restore/go-criu/actions?query=workflow%3Averify) -[![Go Reference](https://pkg.go.dev/badge/github.com/checkpoint-restore/go-criu.svg)](https://pkg.go.dev/github.com/checkpoint-restore/go-criu) - -## go-criu -- Go bindings for CRIU - -This repository provides Go bindings for [CRIU](https://criu.org/). The code is based on the Go-based PHaul -implementation from the CRIU repository. For easier inclusion into other Go projects the -CRIU Go bindings have been moved to this repository. - -The Go bindings provide an easy way to use the CRIU RPC calls from Go without the need -to set up all the infrastructure to make the actual RPC connection to CRIU. - -The following example would print the version of CRIU: -```go -import ( - "log" - - "github.com/checkpoint/restore/go-criu/v5" -) - -func main() { - c := criu.MakeCriu() - version, err := c.GetCriuVersion() - if err != nil { - log.Fatalln(err) - } - log.Println(version) -} -``` - -or to just check if at least a certain CRIU version is installed: - -```go - c := criu.MakeCriu() - result, err := c.IsCriuAtLeast(31100) -``` - -## Releases - -The first go-criu release was 3.11 based on CRIU 3.11. The initial plan -was to follow CRIU so that go-criu would carry the same version number as -CRIU. - -As go-criu is imported in other projects and as Go modules are expected -to follow Semantic Versioning go-criu will also follow Semantic Versioning -starting with the 4.0.0 release. - -The following table shows the relation between go-criu and criu versions: - -| Major version | Latest release | CRIU version | -| -------------- | -------------- | ------------ | -| v5             | 5.0.0         | 3.15         | -| v4             | 4.1.0         | 3.14         | - -## How to contribute - -While bug fixes can first be identified via an "issue", that is not required. -It's ok to just open up a PR with the fix, but make sure you include the same -information you would have included in an issue - like how to reproduce it. - -PRs for new features should include some background on what use cases the -new code is trying to address. When possible and when it makes sense, try to -break-up larger PRs into smaller ones - it's easier to review smaller -code changes. But only if those smaller ones make sense as stand-alone PRs. - -Regardless of the type of PR, all PRs should include: -* well documented code changes -* additional testcases. Ideally, they should fail w/o your code change applied -* documentation changes - -Squash your commits into logical pieces of work that might want to be reviewed -separate from the rest of the PRs. Ideally, each commit should implement a -single idea, and the PR branch should pass the tests at every commit. GitHub -makes it easy to review the cumulative effect of many commits; so, when in -doubt, use smaller commits. - -PRs that fix issues should include a reference like `Closes #XXXX` in the -commit message so that github will automatically close the referenced issue -when the PR is merged. - -Contributors must assert that they are in compliance with the [Developer -Certificate of Origin 1.1](http://developercertificate.org/). This is achieved -by adding a "Signed-off-by" line containing the contributor's name and e-mail -to every commit message. Your signature certifies that you wrote the patch or -otherwise have the right to pass it on as an open-source patch. - -### License and copyright - -Unless mentioned otherwise in a specific file's header, all code in -this project is released under the Apache 2.0 license. - -The author of a change remains the copyright holder of their code -(no copyright assignment). The list of authors and contributors can be -retrieved from the git commit history and in some cases, the file headers. diff --git a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/go.mod b/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/go.mod deleted file mode 100644 index 58931e5f98..0000000000 --- a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/go.mod +++ /dev/null @@ -1,9 +0,0 @@ -module github.com/checkpoint-restore/go-criu/v5 - -go 1.13 - -require ( - github.com/golang/protobuf v1.4.3 - golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c - google.golang.org/protobuf v1.23.0 -) diff --git a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/go.sum b/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/go.sum deleted file mode 100644 index 0a5a48cde7..0000000000 --- a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/go.sum +++ /dev/null @@ -1,22 +0,0 @@ -github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= -github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= -github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= -github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= -github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= -github.com/golang/protobuf v1.4.3 h1:JjCZWpVbqXDqFVmTfYWEVTMIYrL/NPdPSCHPJ0T/raM= -github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4= -github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c h1:VwygUrnw9jn88c4u8GD3rZQbqrP/tgas88tPUbBxQrk= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= -google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= -google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= -google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= -google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= -google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM= -google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= diff --git a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/main.go b/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/main.go deleted file mode 100644 index 78811c309c..0000000000 --- a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/main.go +++ /dev/null @@ -1,260 +0,0 @@ -package criu - -import ( - "errors" - "fmt" - "os" - "os/exec" - "strconv" - "syscall" - - "github.com/checkpoint-restore/go-criu/v5/rpc" - "google.golang.org/protobuf/proto" -) - -// Criu struct -type Criu struct { - swrkCmd *exec.Cmd - swrkSk *os.File - swrkPath string -} - -// MakeCriu returns the Criu object required for most operations -func MakeCriu() *Criu { - return &Criu{ - swrkPath: "criu", - } -} - -// SetCriuPath allows setting the path to the CRIU binary -// if it is in a non standard location -func (c *Criu) SetCriuPath(path string) { - c.swrkPath = path -} - -// Prepare sets up everything for the RPC communication to CRIU -func (c *Criu) Prepare() error { - fds, err := syscall.Socketpair(syscall.AF_LOCAL, syscall.SOCK_SEQPACKET, 0) - if err != nil { - return err - } - - cln := os.NewFile(uintptr(fds[0]), "criu-xprt-cln") - syscall.CloseOnExec(fds[0]) - srv := os.NewFile(uintptr(fds[1]), "criu-xprt-srv") - defer srv.Close() - - args := []string{"swrk", strconv.Itoa(fds[1])} - // #nosec G204 - cmd := exec.Command(c.swrkPath, args...) - - err = cmd.Start() - if err != nil { - cln.Close() - return err - } - - c.swrkCmd = cmd - c.swrkSk = cln - - return nil -} - -// Cleanup cleans up -func (c *Criu) Cleanup() { - if c.swrkCmd != nil { - c.swrkSk.Close() - c.swrkSk = nil - _ = c.swrkCmd.Wait() - c.swrkCmd = nil - } -} - -func (c *Criu) sendAndRecv(reqB []byte) ([]byte, int, error) { - cln := c.swrkSk - _, err := cln.Write(reqB) - if err != nil { - return nil, 0, err - } - - respB := make([]byte, 2*4096) - n, err := cln.Read(respB) - if err != nil { - return nil, 0, err - } - - return respB, n, nil -} - -func (c *Criu) doSwrk(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy Notify) error { - resp, err := c.doSwrkWithResp(reqType, opts, nfy) - if err != nil { - return err - } - respType := resp.GetType() - if respType != reqType { - return errors.New("unexpected responce") - } - - return nil -} - -func (c *Criu) doSwrkWithResp(reqType rpc.CriuReqType, opts *rpc.CriuOpts, nfy Notify) (*rpc.CriuResp, error) { - var resp *rpc.CriuResp - - req := rpc.CriuReq{ - Type: &reqType, - Opts: opts, - } - - if nfy != nil { - opts.NotifyScripts = proto.Bool(true) - } - - if c.swrkCmd == nil { - err := c.Prepare() - if err != nil { - return nil, err - } - - defer c.Cleanup() - } - - for { - reqB, err := proto.Marshal(&req) - if err != nil { - return nil, err - } - - respB, respS, err := c.sendAndRecv(reqB) - if err != nil { - return nil, err - } - - resp = &rpc.CriuResp{} - err = proto.Unmarshal(respB[:respS], resp) - if err != nil { - return nil, err - } - - if !resp.GetSuccess() { - return resp, fmt.Errorf("operation failed (msg:%s err:%d)", - resp.GetCrErrmsg(), resp.GetCrErrno()) - } - - respType := resp.GetType() - if respType != rpc.CriuReqType_NOTIFY { - break - } - if nfy == nil { - return resp, errors.New("unexpected notify") - } - - notify := resp.GetNotify() - switch notify.GetScript() { - case "pre-dump": - err = nfy.PreDump() - case "post-dump": - err = nfy.PostDump() - case "pre-restore": - err = nfy.PreRestore() - case "post-restore": - err = nfy.PostRestore(notify.GetPid()) - case "network-lock": - err = nfy.NetworkLock() - case "network-unlock": - err = nfy.NetworkUnlock() - case "setup-namespaces": - err = nfy.SetupNamespaces(notify.GetPid()) - case "post-setup-namespaces": - err = nfy.PostSetupNamespaces() - case "post-resume": - err = nfy.PostResume() - default: - err = nil - } - - if err != nil { - return resp, err - } - - req = rpc.CriuReq{ - Type: &respType, - NotifySuccess: proto.Bool(true), - } - } - - return resp, nil -} - -// Dump dumps a process -func (c *Criu) Dump(opts *rpc.CriuOpts, nfy Notify) error { - return c.doSwrk(rpc.CriuReqType_DUMP, opts, nfy) -} - -// Restore restores a process -func (c *Criu) Restore(opts *rpc.CriuOpts, nfy Notify) error { - return c.doSwrk(rpc.CriuReqType_RESTORE, opts, nfy) -} - -// PreDump does a pre-dump -func (c *Criu) PreDump(opts *rpc.CriuOpts, nfy Notify) error { - return c.doSwrk(rpc.CriuReqType_PRE_DUMP, opts, nfy) -} - -// StartPageServer starts the page server -func (c *Criu) StartPageServer(opts *rpc.CriuOpts) error { - return c.doSwrk(rpc.CriuReqType_PAGE_SERVER, opts, nil) -} - -// StartPageServerChld starts the page server and returns PID and port -func (c *Criu) StartPageServerChld(opts *rpc.CriuOpts) (int, int, error) { - resp, err := c.doSwrkWithResp(rpc.CriuReqType_PAGE_SERVER_CHLD, opts, nil) - if err != nil { - return 0, 0, err - } - - return int(resp.Ps.GetPid()), int(resp.Ps.GetPort()), nil -} - -// GetCriuVersion executes the VERSION RPC call and returns the version -// as an integer. Major * 10000 + Minor * 100 + SubLevel -func (c *Criu) GetCriuVersion() (int, error) { - resp, err := c.doSwrkWithResp(rpc.CriuReqType_VERSION, nil, nil) - if err != nil { - return 0, err - } - - if resp.GetType() != rpc.CriuReqType_VERSION { - return 0, fmt.Errorf("Unexpected CRIU RPC response") - } - - version := int(*resp.GetVersion().MajorNumber) * 10000 - version += int(*resp.GetVersion().MinorNumber) * 100 - if resp.GetVersion().Sublevel != nil { - version += int(*resp.GetVersion().Sublevel) - } - - if resp.GetVersion().Gitid != nil { - // taken from runc: if it is a git release -> increase minor by 1 - version -= (version % 100) - version += 100 - } - - return version, nil -} - -// IsCriuAtLeast checks if the version is at least the same -// as the parameter version -func (c *Criu) IsCriuAtLeast(version int) (bool, error) { - criuVersion, err := c.GetCriuVersion() - if err != nil { - return false, err - } - - if criuVersion >= version { - return true, nil - } - - return false, nil -} diff --git a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/notify.go b/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/notify.go deleted file mode 100644 index a177f2bb5c..0000000000 --- a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/notify.go +++ /dev/null @@ -1,62 +0,0 @@ -package criu - -// Notify interface -type Notify interface { - PreDump() error - PostDump() error - PreRestore() error - PostRestore(pid int32) error - NetworkLock() error - NetworkUnlock() error - SetupNamespaces(pid int32) error - PostSetupNamespaces() error - PostResume() error -} - -// NoNotify struct -type NoNotify struct{} - -// PreDump NoNotify -func (c NoNotify) PreDump() error { - return nil -} - -// PostDump NoNotify -func (c NoNotify) PostDump() error { - return nil -} - -// PreRestore NoNotify -func (c NoNotify) PreRestore() error { - return nil -} - -// PostRestore NoNotify -func (c NoNotify) PostRestore(pid int32) error { - return nil -} - -// NetworkLock NoNotify -func (c NoNotify) NetworkLock() error { - return nil -} - -// NetworkUnlock NoNotify -func (c NoNotify) NetworkUnlock() error { - return nil -} - -// SetupNamespaces NoNotify -func (c NoNotify) SetupNamespaces(pid int32) error { - return nil -} - -// PostSetupNamespaces NoNotify -func (c NoNotify) PostSetupNamespaces() error { - return nil -} - -// PostResume NoNotify -func (c NoNotify) PostResume() error { - return nil -} diff --git a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/rpc/rpc.pb.go b/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/rpc/rpc.pb.go deleted file mode 100644 index fa37f93cbd..0000000000 --- a/src/runtime/vendor/github.com/checkpoint-restore/go-criu/v5/rpc/rpc.pb.go +++ /dev/null @@ -1,2208 +0,0 @@ -// Code generated by protoc-gen-go. DO NOT EDIT. -// versions: -// protoc-gen-go v1.25.0 -// protoc v3.12.4 -// source: rpc/rpc.proto - -package rpc - -import ( - proto "github.com/golang/protobuf/proto" - protoreflect "google.golang.org/protobuf/reflect/protoreflect" - protoimpl "google.golang.org/protobuf/runtime/protoimpl" - reflect "reflect" - sync "sync" -) - -const ( - // Verify that this generated code is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) - // Verify that runtime/protoimpl is sufficiently up-to-date. - _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) -) - -// This is a compile-time assertion that a sufficiently up-to-date version -// of the legacy proto package is being used. -const _ = proto.ProtoPackageIsVersion4 - -type CriuCgMode int32 - -const ( - CriuCgMode_IGNORE CriuCgMode = 0 - CriuCgMode_CG_NONE CriuCgMode = 1 - CriuCgMode_PROPS CriuCgMode = 2 - CriuCgMode_SOFT CriuCgMode = 3 - CriuCgMode_FULL CriuCgMode = 4 - CriuCgMode_STRICT CriuCgMode = 5 - CriuCgMode_DEFAULT CriuCgMode = 6 -) - -// Enum value maps for CriuCgMode. -var ( - CriuCgMode_name = map[int32]string{ - 0: "IGNORE", - 1: "CG_NONE", - 2: "PROPS", - 3: "SOFT", - 4: "FULL", - 5: "STRICT", - 6: "DEFAULT", - } - CriuCgMode_value = map[string]int32{ - "IGNORE": 0, - "CG_NONE": 1, - "PROPS": 2, - "SOFT": 3, - "FULL": 4, - "STRICT": 5, - "DEFAULT": 6, - } -) - -func (x CriuCgMode) Enum() *CriuCgMode { - p := new(CriuCgMode) - *p = x - return p -} - -func (x CriuCgMode) String() string { - return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) -} - -func (CriuCgMode) Descriptor() protoreflect.EnumDescriptor { - return file_rpc_rpc_proto_enumTypes[0].Descriptor() -} - -func (CriuCgMode) Type() protoreflect.EnumType { - return &file_rpc_rpc_proto_enumTypes[0] -} - -func (x CriuCgMode) Number() protoreflect.EnumNumber { - return protoreflect.EnumNumber(x) -} - -// Deprecated: Do not use. -func (x *CriuCgMode) UnmarshalJSON(b []byte) error { - num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b) - if err != nil { - return err - } - *x = CriuCgMode(num) - return nil -} - -// Deprecated: Use CriuCgMode.Descriptor instead. -func (CriuCgMode) EnumDescriptor() ([]byte, []int) { - return file_rpc_rpc_proto_rawDescGZIP(), []int{0} -} - -type CriuPreDumpMode int32 - -const ( - CriuPreDumpMode_SPLICE CriuPreDumpMode = 1 - CriuPreDumpMode_VM_READ CriuPreDumpMode = 2 -) - -// Enum value maps for CriuPreDumpMode. -var ( - CriuPreDumpMode_name = map[int32]string{ - 1: "SPLICE", - 2: "VM_READ", - } - CriuPreDumpMode_value = map[string]int32{ - "SPLICE": 1, - "VM_READ": 2, - } -) - -func (x CriuPreDumpMode) Enum() *CriuPreDumpMode { - p := new(CriuPreDumpMode) - *p = x - return p -} - -func (x CriuPreDumpMode) String() string { - return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) -} - -func (CriuPreDumpMode) Descriptor() protoreflect.EnumDescriptor { - return file_rpc_rpc_proto_enumTypes[1].Descriptor() -} - -func (CriuPreDumpMode) Type() protoreflect.EnumType { - return &file_rpc_rpc_proto_enumTypes[1] -} - -func (x CriuPreDumpMode) Number() protoreflect.EnumNumber { - return protoreflect.EnumNumber(x) -} - -// Deprecated: Do not use. -func (x *CriuPreDumpMode) UnmarshalJSON(b []byte) error { - num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b) - if err != nil { - return err - } - *x = CriuPreDumpMode(num) - return nil -} - -// Deprecated: Use CriuPreDumpMode.Descriptor instead. -func (CriuPreDumpMode) EnumDescriptor() ([]byte, []int) { - return file_rpc_rpc_proto_rawDescGZIP(), []int{1} -} - -type CriuReqType int32 - -const ( - CriuReqType_EMPTY CriuReqType = 0 - CriuReqType_DUMP CriuReqType = 1 - CriuReqType_RESTORE CriuReqType = 2 - CriuReqType_CHECK CriuReqType = 3 - CriuReqType_PRE_DUMP CriuReqType = 4 - CriuReqType_PAGE_SERVER CriuReqType = 5 - CriuReqType_NOTIFY CriuReqType = 6 - CriuReqType_CPUINFO_DUMP CriuReqType = 7 - CriuReqType_CPUINFO_CHECK CriuReqType = 8 - CriuReqType_FEATURE_CHECK CriuReqType = 9 - CriuReqType_VERSION CriuReqType = 10 - CriuReqType_WAIT_PID CriuReqType = 11 - CriuReqType_PAGE_SERVER_CHLD CriuReqType = 12 -) - -// Enum value maps for CriuReqType. -var ( - CriuReqType_name = map[int32]string{ - 0: "EMPTY", - 1: "DUMP", - 2: "RESTORE", - 3: "CHECK", - 4: "PRE_DUMP", - 5: "PAGE_SERVER", - 6: "NOTIFY", - 7: "CPUINFO_DUMP", - 8: "CPUINFO_CHECK", - 9: "FEATURE_CHECK", - 10: "VERSION", - 11: "WAIT_PID", - 12: "PAGE_SERVER_CHLD", - } - CriuReqType_value = map[string]int32{ - "EMPTY": 0, - "DUMP": 1, - "RESTORE": 2, - "CHECK": 3, - "PRE_DUMP": 4, - "PAGE_SERVER": 5, - "NOTIFY": 6, - "CPUINFO_DUMP": 7, - "CPUINFO_CHECK": 8, - "FEATURE_CHECK": 9, - "VERSION": 10, - "WAIT_PID": 11, - "PAGE_SERVER_CHLD": 12, - } -) - -func (x CriuReqType) Enum() *CriuReqType { - p := new(CriuReqType) - *p = x - return p -} - -func (x CriuReqType) String() string { - return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) -} - -func (CriuReqType) Descriptor() protoreflect.EnumDescriptor { - return file_rpc_rpc_proto_enumTypes[2].Descriptor() -} - -func (CriuReqType) Type() protoreflect.EnumType { - return &file_rpc_rpc_proto_enumTypes[2] -} - -func (x CriuReqType) Number() protoreflect.EnumNumber { - return protoreflect.EnumNumber(x) -} - -// Deprecated: Do not use. -func (x *CriuReqType) UnmarshalJSON(b []byte) error { - num, err := protoimpl.X.UnmarshalJSONEnum(x.Descriptor(), b) - if err != nil { - return err - } - *x = CriuReqType(num) - return nil -} - -// Deprecated: Use CriuReqType.Descriptor instead. -func (CriuReqType) EnumDescriptor() ([]byte, []int) { - return file_rpc_rpc_proto_rawDescGZIP(), []int{2} -} - -type CriuPageServerInfo struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Address *string `protobuf:"bytes,1,opt,name=address" json:"address,omitempty"` - Port *int32 `protobuf:"varint,2,opt,name=port" json:"port,omitempty"` - Pid *int32 `protobuf:"varint,3,opt,name=pid" json:"pid,omitempty"` - Fd *int32 `protobuf:"varint,4,opt,name=fd" json:"fd,omitempty"` -} - -func (x *CriuPageServerInfo) Reset() { - *x = CriuPageServerInfo{} - if protoimpl.UnsafeEnabled { - mi := &file_rpc_rpc_proto_msgTypes[0] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *CriuPageServerInfo) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*CriuPageServerInfo) ProtoMessage() {} - -func (x *CriuPageServerInfo) ProtoReflect() protoreflect.Message { - mi := &file_rpc_rpc_proto_msgTypes[0] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use CriuPageServerInfo.ProtoReflect.Descriptor instead. -func (*CriuPageServerInfo) Descriptor() ([]byte, []int) { - return file_rpc_rpc_proto_rawDescGZIP(), []int{0} -} - -func (x *CriuPageServerInfo) GetAddress() string { - if x != nil && x.Address != nil { - return *x.Address - } - return "" -} - -func (x *CriuPageServerInfo) GetPort() int32 { - if x != nil && x.Port != nil { - return *x.Port - } - return 0 -} - -func (x *CriuPageServerInfo) GetPid() int32 { - if x != nil && x.Pid != nil { - return *x.Pid - } - return 0 -} - -func (x *CriuPageServerInfo) GetFd() int32 { - if x != nil && x.Fd != nil { - return *x.Fd - } - return 0 -} - -type CriuVethPair struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - IfIn *string `protobuf:"bytes,1,req,name=if_in,json=ifIn" json:"if_in,omitempty"` - IfOut *string `protobuf:"bytes,2,req,name=if_out,json=ifOut" json:"if_out,omitempty"` -} - -func (x *CriuVethPair) Reset() { - *x = CriuVethPair{} - if protoimpl.UnsafeEnabled { - mi := &file_rpc_rpc_proto_msgTypes[1] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *CriuVethPair) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*CriuVethPair) ProtoMessage() {} - -func (x *CriuVethPair) ProtoReflect() protoreflect.Message { - mi := &file_rpc_rpc_proto_msgTypes[1] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use CriuVethPair.ProtoReflect.Descriptor instead. -func (*CriuVethPair) Descriptor() ([]byte, []int) { - return file_rpc_rpc_proto_rawDescGZIP(), []int{1} -} - -func (x *CriuVethPair) GetIfIn() string { - if x != nil && x.IfIn != nil { - return *x.IfIn - } - return "" -} - -func (x *CriuVethPair) GetIfOut() string { - if x != nil && x.IfOut != nil { - return *x.IfOut - } - return "" -} - -type ExtMountMap struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"` - Val *string `protobuf:"bytes,2,req,name=val" json:"val,omitempty"` -} - -func (x *ExtMountMap) Reset() { - *x = ExtMountMap{} - if protoimpl.UnsafeEnabled { - mi := &file_rpc_rpc_proto_msgTypes[2] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *ExtMountMap) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*ExtMountMap) ProtoMessage() {} - -func (x *ExtMountMap) ProtoReflect() protoreflect.Message { - mi := &file_rpc_rpc_proto_msgTypes[2] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use ExtMountMap.ProtoReflect.Descriptor instead. -func (*ExtMountMap) Descriptor() ([]byte, []int) { - return file_rpc_rpc_proto_rawDescGZIP(), []int{2} -} - -func (x *ExtMountMap) GetKey() string { - if x != nil && x.Key != nil { - return *x.Key - } - return "" -} - -func (x *ExtMountMap) GetVal() string { - if x != nil && x.Val != nil { - return *x.Val - } - return "" -} - -type JoinNamespace struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Ns *string `protobuf:"bytes,1,req,name=ns" json:"ns,omitempty"` - NsFile *string `protobuf:"bytes,2,req,name=ns_file,json=nsFile" json:"ns_file,omitempty"` - ExtraOpt *string `protobuf:"bytes,3,opt,name=extra_opt,json=extraOpt" json:"extra_opt,omitempty"` -} - -func (x *JoinNamespace) Reset() { - *x = JoinNamespace{} - if protoimpl.UnsafeEnabled { - mi := &file_rpc_rpc_proto_msgTypes[3] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *JoinNamespace) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*JoinNamespace) ProtoMessage() {} - -func (x *JoinNamespace) ProtoReflect() protoreflect.Message { - mi := &file_rpc_rpc_proto_msgTypes[3] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use JoinNamespace.ProtoReflect.Descriptor instead. -func (*JoinNamespace) Descriptor() ([]byte, []int) { - return file_rpc_rpc_proto_rawDescGZIP(), []int{3} -} - -func (x *JoinNamespace) GetNs() string { - if x != nil && x.Ns != nil { - return *x.Ns - } - return "" -} - -func (x *JoinNamespace) GetNsFile() string { - if x != nil && x.NsFile != nil { - return *x.NsFile - } - return "" -} - -func (x *JoinNamespace) GetExtraOpt() string { - if x != nil && x.ExtraOpt != nil { - return *x.ExtraOpt - } - return "" -} - -type InheritFd struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Key *string `protobuf:"bytes,1,req,name=key" json:"key,omitempty"` - Fd *int32 `protobuf:"varint,2,req,name=fd" json:"fd,omitempty"` -} - -func (x *InheritFd) Reset() { - *x = InheritFd{} - if protoimpl.UnsafeEnabled { - mi := &file_rpc_rpc_proto_msgTypes[4] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *InheritFd) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*InheritFd) ProtoMessage() {} - -func (x *InheritFd) ProtoReflect() protoreflect.Message { - mi := &file_rpc_rpc_proto_msgTypes[4] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use InheritFd.ProtoReflect.Descriptor instead. -func (*InheritFd) Descriptor() ([]byte, []int) { - return file_rpc_rpc_proto_rawDescGZIP(), []int{4} -} - -func (x *InheritFd) GetKey() string { - if x != nil && x.Key != nil { - return *x.Key - } - return "" -} - -func (x *InheritFd) GetFd() int32 { - if x != nil && x.Fd != nil { - return *x.Fd - } - return 0 -} - -type CgroupRoot struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Ctrl *string `protobuf:"bytes,1,opt,name=ctrl" json:"ctrl,omitempty"` - Path *string `protobuf:"bytes,2,req,name=path" json:"path,omitempty"` -} - -func (x *CgroupRoot) Reset() { - *x = CgroupRoot{} - if protoimpl.UnsafeEnabled { - mi := &file_rpc_rpc_proto_msgTypes[5] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *CgroupRoot) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*CgroupRoot) ProtoMessage() {} - -func (x *CgroupRoot) ProtoReflect() protoreflect.Message { - mi := &file_rpc_rpc_proto_msgTypes[5] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use CgroupRoot.ProtoReflect.Descriptor instead. -func (*CgroupRoot) Descriptor() ([]byte, []int) { - return file_rpc_rpc_proto_rawDescGZIP(), []int{5} -} - -func (x *CgroupRoot) GetCtrl() string { - if x != nil && x.Ctrl != nil { - return *x.Ctrl - } - return "" -} - -func (x *CgroupRoot) GetPath() string { - if x != nil && x.Path != nil { - return *x.Path - } - return "" -} - -type UnixSk struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Inode *uint32 `protobuf:"varint,1,req,name=inode" json:"inode,omitempty"` -} - -func (x *UnixSk) Reset() { - *x = UnixSk{} - if protoimpl.UnsafeEnabled { - mi := &file_rpc_rpc_proto_msgTypes[6] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *UnixSk) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*UnixSk) ProtoMessage() {} - -func (x *UnixSk) ProtoReflect() protoreflect.Message { - mi := &file_rpc_rpc_proto_msgTypes[6] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use UnixSk.ProtoReflect.Descriptor instead. -func (*UnixSk) Descriptor() ([]byte, []int) { - return file_rpc_rpc_proto_rawDescGZIP(), []int{6} -} - -func (x *UnixSk) GetInode() uint32 { - if x != nil && x.Inode != nil { - return *x.Inode - } - return 0 -} - -type CriuOpts struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - ImagesDirFd *int32 `protobuf:"varint,1,req,name=images_dir_fd,json=imagesDirFd" json:"images_dir_fd,omitempty"` - Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"` // if not set on dump, will dump requesting process - LeaveRunning *bool `protobuf:"varint,3,opt,name=leave_running,json=leaveRunning" json:"leave_running,omitempty"` - ExtUnixSk *bool `protobuf:"varint,4,opt,name=ext_unix_sk,json=extUnixSk" json:"ext_unix_sk,omitempty"` - TcpEstablished *bool `protobuf:"varint,5,opt,name=tcp_established,json=tcpEstablished" json:"tcp_established,omitempty"` - EvasiveDevices *bool `protobuf:"varint,6,opt,name=evasive_devices,json=evasiveDevices" json:"evasive_devices,omitempty"` - ShellJob *bool `protobuf:"varint,7,opt,name=shell_job,json=shellJob" json:"shell_job,omitempty"` - FileLocks *bool `protobuf:"varint,8,opt,name=file_locks,json=fileLocks" json:"file_locks,omitempty"` - LogLevel *int32 `protobuf:"varint,9,opt,name=log_level,json=logLevel,def=2" json:"log_level,omitempty"` - LogFile *string `protobuf:"bytes,10,opt,name=log_file,json=logFile" json:"log_file,omitempty"` // No subdirs are allowed. Consider using work-dir - Ps *CriuPageServerInfo `protobuf:"bytes,11,opt,name=ps" json:"ps,omitempty"` - NotifyScripts *bool `protobuf:"varint,12,opt,name=notify_scripts,json=notifyScripts" json:"notify_scripts,omitempty"` - Root *string `protobuf:"bytes,13,opt,name=root" json:"root,omitempty"` - ParentImg *string `protobuf:"bytes,14,opt,name=parent_img,json=parentImg" json:"parent_img,omitempty"` - TrackMem *bool `protobuf:"varint,15,opt,name=track_mem,json=trackMem" json:"track_mem,omitempty"` - AutoDedup *bool `protobuf:"varint,16,opt,name=auto_dedup,json=autoDedup" json:"auto_dedup,omitempty"` - WorkDirFd *int32 `protobuf:"varint,17,opt,name=work_dir_fd,json=workDirFd" json:"work_dir_fd,omitempty"` - LinkRemap *bool `protobuf:"varint,18,opt,name=link_remap,json=linkRemap" json:"link_remap,omitempty"` - Veths []*CriuVethPair `protobuf:"bytes,19,rep,name=veths" json:"veths,omitempty"` // DEPRECATED, use external instead - CpuCap *uint32 `protobuf:"varint,20,opt,name=cpu_cap,json=cpuCap,def=4294967295" json:"cpu_cap,omitempty"` - ForceIrmap *bool `protobuf:"varint,21,opt,name=force_irmap,json=forceIrmap" json:"force_irmap,omitempty"` - ExecCmd []string `protobuf:"bytes,22,rep,name=exec_cmd,json=execCmd" json:"exec_cmd,omitempty"` - ExtMnt []*ExtMountMap `protobuf:"bytes,23,rep,name=ext_mnt,json=extMnt" json:"ext_mnt,omitempty"` // DEPRECATED, use external instead - ManageCgroups *bool `protobuf:"varint,24,opt,name=manage_cgroups,json=manageCgroups" json:"manage_cgroups,omitempty"` // backward compatibility - CgRoot []*CgroupRoot `protobuf:"bytes,25,rep,name=cg_root,json=cgRoot" json:"cg_root,omitempty"` - RstSibling *bool `protobuf:"varint,26,opt,name=rst_sibling,json=rstSibling" json:"rst_sibling,omitempty"` // swrk only - InheritFd []*InheritFd `protobuf:"bytes,27,rep,name=inherit_fd,json=inheritFd" json:"inherit_fd,omitempty"` // swrk only - AutoExtMnt *bool `protobuf:"varint,28,opt,name=auto_ext_mnt,json=autoExtMnt" json:"auto_ext_mnt,omitempty"` - ExtSharing *bool `protobuf:"varint,29,opt,name=ext_sharing,json=extSharing" json:"ext_sharing,omitempty"` - ExtMasters *bool `protobuf:"varint,30,opt,name=ext_masters,json=extMasters" json:"ext_masters,omitempty"` - SkipMnt []string `protobuf:"bytes,31,rep,name=skip_mnt,json=skipMnt" json:"skip_mnt,omitempty"` - EnableFs []string `protobuf:"bytes,32,rep,name=enable_fs,json=enableFs" json:"enable_fs,omitempty"` - UnixSkIno []*UnixSk `protobuf:"bytes,33,rep,name=unix_sk_ino,json=unixSkIno" json:"unix_sk_ino,omitempty"` // DEPRECATED, use external instead - ManageCgroupsMode *CriuCgMode `protobuf:"varint,34,opt,name=manage_cgroups_mode,json=manageCgroupsMode,enum=CriuCgMode" json:"manage_cgroups_mode,omitempty"` - GhostLimit *uint32 `protobuf:"varint,35,opt,name=ghost_limit,json=ghostLimit,def=1048576" json:"ghost_limit,omitempty"` - IrmapScanPaths []string `protobuf:"bytes,36,rep,name=irmap_scan_paths,json=irmapScanPaths" json:"irmap_scan_paths,omitempty"` - External []string `protobuf:"bytes,37,rep,name=external" json:"external,omitempty"` - EmptyNs *uint32 `protobuf:"varint,38,opt,name=empty_ns,json=emptyNs" json:"empty_ns,omitempty"` - JoinNs []*JoinNamespace `protobuf:"bytes,39,rep,name=join_ns,json=joinNs" json:"join_ns,omitempty"` - CgroupProps *string `protobuf:"bytes,41,opt,name=cgroup_props,json=cgroupProps" json:"cgroup_props,omitempty"` - CgroupPropsFile *string `protobuf:"bytes,42,opt,name=cgroup_props_file,json=cgroupPropsFile" json:"cgroup_props_file,omitempty"` - CgroupDumpController []string `protobuf:"bytes,43,rep,name=cgroup_dump_controller,json=cgroupDumpController" json:"cgroup_dump_controller,omitempty"` - FreezeCgroup *string `protobuf:"bytes,44,opt,name=freeze_cgroup,json=freezeCgroup" json:"freeze_cgroup,omitempty"` - Timeout *uint32 `protobuf:"varint,45,opt,name=timeout" json:"timeout,omitempty"` - TcpSkipInFlight *bool `protobuf:"varint,46,opt,name=tcp_skip_in_flight,json=tcpSkipInFlight" json:"tcp_skip_in_flight,omitempty"` - WeakSysctls *bool `protobuf:"varint,47,opt,name=weak_sysctls,json=weakSysctls" json:"weak_sysctls,omitempty"` - LazyPages *bool `protobuf:"varint,48,opt,name=lazy_pages,json=lazyPages" json:"lazy_pages,omitempty"` - StatusFd *int32 `protobuf:"varint,49,opt,name=status_fd,json=statusFd" json:"status_fd,omitempty"` - OrphanPtsMaster *bool `protobuf:"varint,50,opt,name=orphan_pts_master,json=orphanPtsMaster" json:"orphan_pts_master,omitempty"` - ConfigFile *string `protobuf:"bytes,51,opt,name=config_file,json=configFile" json:"config_file,omitempty"` - TcpClose *bool `protobuf:"varint,52,opt,name=tcp_close,json=tcpClose" json:"tcp_close,omitempty"` - LsmProfile *string `protobuf:"bytes,53,opt,name=lsm_profile,json=lsmProfile" json:"lsm_profile,omitempty"` - TlsCacert *string `protobuf:"bytes,54,opt,name=tls_cacert,json=tlsCacert" json:"tls_cacert,omitempty"` - TlsCacrl *string `protobuf:"bytes,55,opt,name=tls_cacrl,json=tlsCacrl" json:"tls_cacrl,omitempty"` - TlsCert *string `protobuf:"bytes,56,opt,name=tls_cert,json=tlsCert" json:"tls_cert,omitempty"` - TlsKey *string `protobuf:"bytes,57,opt,name=tls_key,json=tlsKey" json:"tls_key,omitempty"` - Tls *bool `protobuf:"varint,58,opt,name=tls" json:"tls,omitempty"` - TlsNoCnVerify *bool `protobuf:"varint,59,opt,name=tls_no_cn_verify,json=tlsNoCnVerify" json:"tls_no_cn_verify,omitempty"` - CgroupYard *string `protobuf:"bytes,60,opt,name=cgroup_yard,json=cgroupYard" json:"cgroup_yard,omitempty"` - PreDumpMode *CriuPreDumpMode `protobuf:"varint,61,opt,name=pre_dump_mode,json=preDumpMode,enum=CriuPreDumpMode,def=1" json:"pre_dump_mode,omitempty"` // optional bool check_mounts = 128; -} - -// Default values for CriuOpts fields. -const ( - Default_CriuOpts_LogLevel = int32(2) - Default_CriuOpts_CpuCap = uint32(4294967295) - Default_CriuOpts_GhostLimit = uint32(1048576) - Default_CriuOpts_PreDumpMode = CriuPreDumpMode_SPLICE -) - -func (x *CriuOpts) Reset() { - *x = CriuOpts{} - if protoimpl.UnsafeEnabled { - mi := &file_rpc_rpc_proto_msgTypes[7] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *CriuOpts) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*CriuOpts) ProtoMessage() {} - -func (x *CriuOpts) ProtoReflect() protoreflect.Message { - mi := &file_rpc_rpc_proto_msgTypes[7] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use CriuOpts.ProtoReflect.Descriptor instead. -func (*CriuOpts) Descriptor() ([]byte, []int) { - return file_rpc_rpc_proto_rawDescGZIP(), []int{7} -} - -func (x *CriuOpts) GetImagesDirFd() int32 { - if x != nil && x.ImagesDirFd != nil { - return *x.ImagesDirFd - } - return 0 -} - -func (x *CriuOpts) GetPid() int32 { - if x != nil && x.Pid != nil { - return *x.Pid - } - return 0 -} - -func (x *CriuOpts) GetLeaveRunning() bool { - if x != nil && x.LeaveRunning != nil { - return *x.LeaveRunning - } - return false -} - -func (x *CriuOpts) GetExtUnixSk() bool { - if x != nil && x.ExtUnixSk != nil { - return *x.ExtUnixSk - } - return false -} - -func (x *CriuOpts) GetTcpEstablished() bool { - if x != nil && x.TcpEstablished != nil { - return *x.TcpEstablished - } - return false -} - -func (x *CriuOpts) GetEvasiveDevices() bool { - if x != nil && x.EvasiveDevices != nil { - return *x.EvasiveDevices - } - return false -} - -func (x *CriuOpts) GetShellJob() bool { - if x != nil && x.ShellJob != nil { - return *x.ShellJob - } - return false -} - -func (x *CriuOpts) GetFileLocks() bool { - if x != nil && x.FileLocks != nil { - return *x.FileLocks - } - return false -} - -func (x *CriuOpts) GetLogLevel() int32 { - if x != nil && x.LogLevel != nil { - return *x.LogLevel - } - return Default_CriuOpts_LogLevel -} - -func (x *CriuOpts) GetLogFile() string { - if x != nil && x.LogFile != nil { - return *x.LogFile - } - return "" -} - -func (x *CriuOpts) GetPs() *CriuPageServerInfo { - if x != nil { - return x.Ps - } - return nil -} - -func (x *CriuOpts) GetNotifyScripts() bool { - if x != nil && x.NotifyScripts != nil { - return *x.NotifyScripts - } - return false -} - -func (x *CriuOpts) GetRoot() string { - if x != nil && x.Root != nil { - return *x.Root - } - return "" -} - -func (x *CriuOpts) GetParentImg() string { - if x != nil && x.ParentImg != nil { - return *x.ParentImg - } - return "" -} - -func (x *CriuOpts) GetTrackMem() bool { - if x != nil && x.TrackMem != nil { - return *x.TrackMem - } - return false -} - -func (x *CriuOpts) GetAutoDedup() bool { - if x != nil && x.AutoDedup != nil { - return *x.AutoDedup - } - return false -} - -func (x *CriuOpts) GetWorkDirFd() int32 { - if x != nil && x.WorkDirFd != nil { - return *x.WorkDirFd - } - return 0 -} - -func (x *CriuOpts) GetLinkRemap() bool { - if x != nil && x.LinkRemap != nil { - return *x.LinkRemap - } - return false -} - -func (x *CriuOpts) GetVeths() []*CriuVethPair { - if x != nil { - return x.Veths - } - return nil -} - -func (x *CriuOpts) GetCpuCap() uint32 { - if x != nil && x.CpuCap != nil { - return *x.CpuCap - } - return Default_CriuOpts_CpuCap -} - -func (x *CriuOpts) GetForceIrmap() bool { - if x != nil && x.ForceIrmap != nil { - return *x.ForceIrmap - } - return false -} - -func (x *CriuOpts) GetExecCmd() []string { - if x != nil { - return x.ExecCmd - } - return nil -} - -func (x *CriuOpts) GetExtMnt() []*ExtMountMap { - if x != nil { - return x.ExtMnt - } - return nil -} - -func (x *CriuOpts) GetManageCgroups() bool { - if x != nil && x.ManageCgroups != nil { - return *x.ManageCgroups - } - return false -} - -func (x *CriuOpts) GetCgRoot() []*CgroupRoot { - if x != nil { - return x.CgRoot - } - return nil -} - -func (x *CriuOpts) GetRstSibling() bool { - if x != nil && x.RstSibling != nil { - return *x.RstSibling - } - return false -} - -func (x *CriuOpts) GetInheritFd() []*InheritFd { - if x != nil { - return x.InheritFd - } - return nil -} - -func (x *CriuOpts) GetAutoExtMnt() bool { - if x != nil && x.AutoExtMnt != nil { - return *x.AutoExtMnt - } - return false -} - -func (x *CriuOpts) GetExtSharing() bool { - if x != nil && x.ExtSharing != nil { - return *x.ExtSharing - } - return false -} - -func (x *CriuOpts) GetExtMasters() bool { - if x != nil && x.ExtMasters != nil { - return *x.ExtMasters - } - return false -} - -func (x *CriuOpts) GetSkipMnt() []string { - if x != nil { - return x.SkipMnt - } - return nil -} - -func (x *CriuOpts) GetEnableFs() []string { - if x != nil { - return x.EnableFs - } - return nil -} - -func (x *CriuOpts) GetUnixSkIno() []*UnixSk { - if x != nil { - return x.UnixSkIno - } - return nil -} - -func (x *CriuOpts) GetManageCgroupsMode() CriuCgMode { - if x != nil && x.ManageCgroupsMode != nil { - return *x.ManageCgroupsMode - } - return CriuCgMode_IGNORE -} - -func (x *CriuOpts) GetGhostLimit() uint32 { - if x != nil && x.GhostLimit != nil { - return *x.GhostLimit - } - return Default_CriuOpts_GhostLimit -} - -func (x *CriuOpts) GetIrmapScanPaths() []string { - if x != nil { - return x.IrmapScanPaths - } - return nil -} - -func (x *CriuOpts) GetExternal() []string { - if x != nil { - return x.External - } - return nil -} - -func (x *CriuOpts) GetEmptyNs() uint32 { - if x != nil && x.EmptyNs != nil { - return *x.EmptyNs - } - return 0 -} - -func (x *CriuOpts) GetJoinNs() []*JoinNamespace { - if x != nil { - return x.JoinNs - } - return nil -} - -func (x *CriuOpts) GetCgroupProps() string { - if x != nil && x.CgroupProps != nil { - return *x.CgroupProps - } - return "" -} - -func (x *CriuOpts) GetCgroupPropsFile() string { - if x != nil && x.CgroupPropsFile != nil { - return *x.CgroupPropsFile - } - return "" -} - -func (x *CriuOpts) GetCgroupDumpController() []string { - if x != nil { - return x.CgroupDumpController - } - return nil -} - -func (x *CriuOpts) GetFreezeCgroup() string { - if x != nil && x.FreezeCgroup != nil { - return *x.FreezeCgroup - } - return "" -} - -func (x *CriuOpts) GetTimeout() uint32 { - if x != nil && x.Timeout != nil { - return *x.Timeout - } - return 0 -} - -func (x *CriuOpts) GetTcpSkipInFlight() bool { - if x != nil && x.TcpSkipInFlight != nil { - return *x.TcpSkipInFlight - } - return false -} - -func (x *CriuOpts) GetWeakSysctls() bool { - if x != nil && x.WeakSysctls != nil { - return *x.WeakSysctls - } - return false -} - -func (x *CriuOpts) GetLazyPages() bool { - if x != nil && x.LazyPages != nil { - return *x.LazyPages - } - return false -} - -func (x *CriuOpts) GetStatusFd() int32 { - if x != nil && x.StatusFd != nil { - return *x.StatusFd - } - return 0 -} - -func (x *CriuOpts) GetOrphanPtsMaster() bool { - if x != nil && x.OrphanPtsMaster != nil { - return *x.OrphanPtsMaster - } - return false -} - -func (x *CriuOpts) GetConfigFile() string { - if x != nil && x.ConfigFile != nil { - return *x.ConfigFile - } - return "" -} - -func (x *CriuOpts) GetTcpClose() bool { - if x != nil && x.TcpClose != nil { - return *x.TcpClose - } - return false -} - -func (x *CriuOpts) GetLsmProfile() string { - if x != nil && x.LsmProfile != nil { - return *x.LsmProfile - } - return "" -} - -func (x *CriuOpts) GetTlsCacert() string { - if x != nil && x.TlsCacert != nil { - return *x.TlsCacert - } - return "" -} - -func (x *CriuOpts) GetTlsCacrl() string { - if x != nil && x.TlsCacrl != nil { - return *x.TlsCacrl - } - return "" -} - -func (x *CriuOpts) GetTlsCert() string { - if x != nil && x.TlsCert != nil { - return *x.TlsCert - } - return "" -} - -func (x *CriuOpts) GetTlsKey() string { - if x != nil && x.TlsKey != nil { - return *x.TlsKey - } - return "" -} - -func (x *CriuOpts) GetTls() bool { - if x != nil && x.Tls != nil { - return *x.Tls - } - return false -} - -func (x *CriuOpts) GetTlsNoCnVerify() bool { - if x != nil && x.TlsNoCnVerify != nil { - return *x.TlsNoCnVerify - } - return false -} - -func (x *CriuOpts) GetCgroupYard() string { - if x != nil && x.CgroupYard != nil { - return *x.CgroupYard - } - return "" -} - -func (x *CriuOpts) GetPreDumpMode() CriuPreDumpMode { - if x != nil && x.PreDumpMode != nil { - return *x.PreDumpMode - } - return Default_CriuOpts_PreDumpMode -} - -type CriuDumpResp struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Restored *bool `protobuf:"varint,1,opt,name=restored" json:"restored,omitempty"` -} - -func (x *CriuDumpResp) Reset() { - *x = CriuDumpResp{} - if protoimpl.UnsafeEnabled { - mi := &file_rpc_rpc_proto_msgTypes[8] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *CriuDumpResp) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*CriuDumpResp) ProtoMessage() {} - -func (x *CriuDumpResp) ProtoReflect() protoreflect.Message { - mi := &file_rpc_rpc_proto_msgTypes[8] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use CriuDumpResp.ProtoReflect.Descriptor instead. -func (*CriuDumpResp) Descriptor() ([]byte, []int) { - return file_rpc_rpc_proto_rawDescGZIP(), []int{8} -} - -func (x *CriuDumpResp) GetRestored() bool { - if x != nil && x.Restored != nil { - return *x.Restored - } - return false -} - -type CriuRestoreResp struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Pid *int32 `protobuf:"varint,1,req,name=pid" json:"pid,omitempty"` -} - -func (x *CriuRestoreResp) Reset() { - *x = CriuRestoreResp{} - if protoimpl.UnsafeEnabled { - mi := &file_rpc_rpc_proto_msgTypes[9] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *CriuRestoreResp) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*CriuRestoreResp) ProtoMessage() {} - -func (x *CriuRestoreResp) ProtoReflect() protoreflect.Message { - mi := &file_rpc_rpc_proto_msgTypes[9] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use CriuRestoreResp.ProtoReflect.Descriptor instead. -func (*CriuRestoreResp) Descriptor() ([]byte, []int) { - return file_rpc_rpc_proto_rawDescGZIP(), []int{9} -} - -func (x *CriuRestoreResp) GetPid() int32 { - if x != nil && x.Pid != nil { - return *x.Pid - } - return 0 -} - -type CriuNotify struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Script *string `protobuf:"bytes,1,opt,name=script" json:"script,omitempty"` - Pid *int32 `protobuf:"varint,2,opt,name=pid" json:"pid,omitempty"` -} - -func (x *CriuNotify) Reset() { - *x = CriuNotify{} - if protoimpl.UnsafeEnabled { - mi := &file_rpc_rpc_proto_msgTypes[10] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *CriuNotify) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*CriuNotify) ProtoMessage() {} - -func (x *CriuNotify) ProtoReflect() protoreflect.Message { - mi := &file_rpc_rpc_proto_msgTypes[10] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use CriuNotify.ProtoReflect.Descriptor instead. -func (*CriuNotify) Descriptor() ([]byte, []int) { - return file_rpc_rpc_proto_rawDescGZIP(), []int{10} -} - -func (x *CriuNotify) GetScript() string { - if x != nil && x.Script != nil { - return *x.Script - } - return "" -} - -func (x *CriuNotify) GetPid() int32 { - if x != nil && x.Pid != nil { - return *x.Pid - } - return 0 -} - -// -// List of features which can queried via -// CRIU_REQ_TYPE__FEATURE_CHECK -type CriuFeatures struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - MemTrack *bool `protobuf:"varint,1,opt,name=mem_track,json=memTrack" json:"mem_track,omitempty"` - LazyPages *bool `protobuf:"varint,2,opt,name=lazy_pages,json=lazyPages" json:"lazy_pages,omitempty"` -} - -func (x *CriuFeatures) Reset() { - *x = CriuFeatures{} - if protoimpl.UnsafeEnabled { - mi := &file_rpc_rpc_proto_msgTypes[11] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *CriuFeatures) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*CriuFeatures) ProtoMessage() {} - -func (x *CriuFeatures) ProtoReflect() protoreflect.Message { - mi := &file_rpc_rpc_proto_msgTypes[11] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use CriuFeatures.ProtoReflect.Descriptor instead. -func (*CriuFeatures) Descriptor() ([]byte, []int) { - return file_rpc_rpc_proto_rawDescGZIP(), []int{11} -} - -func (x *CriuFeatures) GetMemTrack() bool { - if x != nil && x.MemTrack != nil { - return *x.MemTrack - } - return false -} - -func (x *CriuFeatures) GetLazyPages() bool { - if x != nil && x.LazyPages != nil { - return *x.LazyPages - } - return false -} - -type CriuReq struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Type *CriuReqType `protobuf:"varint,1,req,name=type,enum=CriuReqType" json:"type,omitempty"` - Opts *CriuOpts `protobuf:"bytes,2,opt,name=opts" json:"opts,omitempty"` - NotifySuccess *bool `protobuf:"varint,3,opt,name=notify_success,json=notifySuccess" json:"notify_success,omitempty"` - // - // When set service won't close the connection but - // will wait for more req-s to appear. Works not - // for all request types. - KeepOpen *bool `protobuf:"varint,4,opt,name=keep_open,json=keepOpen" json:"keep_open,omitempty"` - // - // 'features' can be used to query which features - // are supported by the installed criu/kernel - // via RPC. - Features *CriuFeatures `protobuf:"bytes,5,opt,name=features" json:"features,omitempty"` - // 'pid' is used for WAIT_PID - Pid *uint32 `protobuf:"varint,6,opt,name=pid" json:"pid,omitempty"` -} - -func (x *CriuReq) Reset() { - *x = CriuReq{} - if protoimpl.UnsafeEnabled { - mi := &file_rpc_rpc_proto_msgTypes[12] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *CriuReq) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*CriuReq) ProtoMessage() {} - -func (x *CriuReq) ProtoReflect() protoreflect.Message { - mi := &file_rpc_rpc_proto_msgTypes[12] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use CriuReq.ProtoReflect.Descriptor instead. -func (*CriuReq) Descriptor() ([]byte, []int) { - return file_rpc_rpc_proto_rawDescGZIP(), []int{12} -} - -func (x *CriuReq) GetType() CriuReqType { - if x != nil && x.Type != nil { - return *x.Type - } - return CriuReqType_EMPTY -} - -func (x *CriuReq) GetOpts() *CriuOpts { - if x != nil { - return x.Opts - } - return nil -} - -func (x *CriuReq) GetNotifySuccess() bool { - if x != nil && x.NotifySuccess != nil { - return *x.NotifySuccess - } - return false -} - -func (x *CriuReq) GetKeepOpen() bool { - if x != nil && x.KeepOpen != nil { - return *x.KeepOpen - } - return false -} - -func (x *CriuReq) GetFeatures() *CriuFeatures { - if x != nil { - return x.Features - } - return nil -} - -func (x *CriuReq) GetPid() uint32 { - if x != nil && x.Pid != nil { - return *x.Pid - } - return 0 -} - -type CriuResp struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - Type *CriuReqType `protobuf:"varint,1,req,name=type,enum=CriuReqType" json:"type,omitempty"` - Success *bool `protobuf:"varint,2,req,name=success" json:"success,omitempty"` - Dump *CriuDumpResp `protobuf:"bytes,3,opt,name=dump" json:"dump,omitempty"` - Restore *CriuRestoreResp `protobuf:"bytes,4,opt,name=restore" json:"restore,omitempty"` - Notify *CriuNotify `protobuf:"bytes,5,opt,name=notify" json:"notify,omitempty"` - Ps *CriuPageServerInfo `protobuf:"bytes,6,opt,name=ps" json:"ps,omitempty"` - CrErrno *int32 `protobuf:"varint,7,opt,name=cr_errno,json=crErrno" json:"cr_errno,omitempty"` - Features *CriuFeatures `protobuf:"bytes,8,opt,name=features" json:"features,omitempty"` - CrErrmsg *string `protobuf:"bytes,9,opt,name=cr_errmsg,json=crErrmsg" json:"cr_errmsg,omitempty"` - Version *CriuVersion `protobuf:"bytes,10,opt,name=version" json:"version,omitempty"` - Status *int32 `protobuf:"varint,11,opt,name=status" json:"status,omitempty"` -} - -func (x *CriuResp) Reset() { - *x = CriuResp{} - if protoimpl.UnsafeEnabled { - mi := &file_rpc_rpc_proto_msgTypes[13] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *CriuResp) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*CriuResp) ProtoMessage() {} - -func (x *CriuResp) ProtoReflect() protoreflect.Message { - mi := &file_rpc_rpc_proto_msgTypes[13] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use CriuResp.ProtoReflect.Descriptor instead. -func (*CriuResp) Descriptor() ([]byte, []int) { - return file_rpc_rpc_proto_rawDescGZIP(), []int{13} -} - -func (x *CriuResp) GetType() CriuReqType { - if x != nil && x.Type != nil { - return *x.Type - } - return CriuReqType_EMPTY -} - -func (x *CriuResp) GetSuccess() bool { - if x != nil && x.Success != nil { - return *x.Success - } - return false -} - -func (x *CriuResp) GetDump() *CriuDumpResp { - if x != nil { - return x.Dump - } - return nil -} - -func (x *CriuResp) GetRestore() *CriuRestoreResp { - if x != nil { - return x.Restore - } - return nil -} - -func (x *CriuResp) GetNotify() *CriuNotify { - if x != nil { - return x.Notify - } - return nil -} - -func (x *CriuResp) GetPs() *CriuPageServerInfo { - if x != nil { - return x.Ps - } - return nil -} - -func (x *CriuResp) GetCrErrno() int32 { - if x != nil && x.CrErrno != nil { - return *x.CrErrno - } - return 0 -} - -func (x *CriuResp) GetFeatures() *CriuFeatures { - if x != nil { - return x.Features - } - return nil -} - -func (x *CriuResp) GetCrErrmsg() string { - if x != nil && x.CrErrmsg != nil { - return *x.CrErrmsg - } - return "" -} - -func (x *CriuResp) GetVersion() *CriuVersion { - if x != nil { - return x.Version - } - return nil -} - -func (x *CriuResp) GetStatus() int32 { - if x != nil && x.Status != nil { - return *x.Status - } - return 0 -} - -// Answer for criu_req_type.VERSION requests -type CriuVersion struct { - state protoimpl.MessageState - sizeCache protoimpl.SizeCache - unknownFields protoimpl.UnknownFields - - MajorNumber *int32 `protobuf:"varint,1,req,name=major_number,json=majorNumber" json:"major_number,omitempty"` - MinorNumber *int32 `protobuf:"varint,2,req,name=minor_number,json=minorNumber" json:"minor_number,omitempty"` - Gitid *string `protobuf:"bytes,3,opt,name=gitid" json:"gitid,omitempty"` - Sublevel *int32 `protobuf:"varint,4,opt,name=sublevel" json:"sublevel,omitempty"` - Extra *int32 `protobuf:"varint,5,opt,name=extra" json:"extra,omitempty"` - Name *string `protobuf:"bytes,6,opt,name=name" json:"name,omitempty"` -} - -func (x *CriuVersion) Reset() { - *x = CriuVersion{} - if protoimpl.UnsafeEnabled { - mi := &file_rpc_rpc_proto_msgTypes[14] - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - ms.StoreMessageInfo(mi) - } -} - -func (x *CriuVersion) String() string { - return protoimpl.X.MessageStringOf(x) -} - -func (*CriuVersion) ProtoMessage() {} - -func (x *CriuVersion) ProtoReflect() protoreflect.Message { - mi := &file_rpc_rpc_proto_msgTypes[14] - if protoimpl.UnsafeEnabled && x != nil { - ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) - if ms.LoadMessageInfo() == nil { - ms.StoreMessageInfo(mi) - } - return ms - } - return mi.MessageOf(x) -} - -// Deprecated: Use CriuVersion.ProtoReflect.Descriptor instead. -func (*CriuVersion) Descriptor() ([]byte, []int) { - return file_rpc_rpc_proto_rawDescGZIP(), []int{14} -} - -func (x *CriuVersion) GetMajorNumber() int32 { - if x != nil && x.MajorNumber != nil { - return *x.MajorNumber - } - return 0 -} - -func (x *CriuVersion) GetMinorNumber() int32 { - if x != nil && x.MinorNumber != nil { - return *x.MinorNumber - } - return 0 -} - -func (x *CriuVersion) GetGitid() string { - if x != nil && x.Gitid != nil { - return *x.Gitid - } - return "" -} - -func (x *CriuVersion) GetSublevel() int32 { - if x != nil && x.Sublevel != nil { - return *x.Sublevel - } - return 0 -} - -func (x *CriuVersion) GetExtra() int32 { - if x != nil && x.Extra != nil { - return *x.Extra - } - return 0 -} - -func (x *CriuVersion) GetName() string { - if x != nil && x.Name != nil { - return *x.Name - } - return "" -} - -var File_rpc_rpc_proto protoreflect.FileDescriptor - -var file_rpc_rpc_proto_rawDesc = []byte{ - 0x0a, 0x0d, 0x72, 0x70, 0x63, 0x2f, 0x72, 0x70, 0x63, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, - 0x67, 0x0a, 0x15, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x70, 0x61, 0x67, 0x65, 0x5f, 0x73, 0x65, 0x72, - 0x76, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x12, 0x18, 0x0a, 0x07, 0x61, 0x64, 0x64, 0x72, - 0x65, 0x73, 0x73, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x61, 0x64, 0x64, 0x72, 0x65, - 0x73, 0x73, 0x12, 0x12, 0x0a, 0x04, 0x70, 0x6f, 0x72, 0x74, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x04, 0x70, 0x6f, 0x72, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x70, 0x69, 0x64, 0x18, 0x03, 0x20, - 0x01, 0x28, 0x05, 0x52, 0x03, 0x70, 0x69, 0x64, 0x12, 0x0e, 0x0a, 0x02, 0x66, 0x64, 0x18, 0x04, - 0x20, 0x01, 0x28, 0x05, 0x52, 0x02, 0x66, 0x64, 0x22, 0x3c, 0x0a, 0x0e, 0x63, 0x72, 0x69, 0x75, - 0x5f, 0x76, 0x65, 0x74, 0x68, 0x5f, 0x70, 0x61, 0x69, 0x72, 0x12, 0x13, 0x0a, 0x05, 0x69, 0x66, - 0x5f, 0x69, 0x6e, 0x18, 0x01, 0x20, 0x02, 0x28, 0x09, 0x52, 0x04, 0x69, 0x66, 0x49, 0x6e, 0x12, - 0x15, 0x0a, 0x06, 0x69, 0x66, 0x5f, 0x6f, 0x75, 0x74, 0x18, 0x02, 0x20, 0x02, 0x28, 0x09, 0x52, - 0x05, 0x69, 0x66, 0x4f, 0x75, 0x74, 0x22, 0x33, 0x0a, 0x0d, 0x65, 0x78, 0x74, 0x5f, 0x6d, 0x6f, - 0x75, 0x6e, 0x74, 0x5f, 0x6d, 0x61, 0x70, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, - 0x20, 0x02, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x76, 0x61, 0x6c, - 0x18, 0x02, 0x20, 0x02, 0x28, 0x09, 0x52, 0x03, 0x76, 0x61, 0x6c, 0x22, 0x56, 0x0a, 0x0e, 0x6a, - 0x6f, 0x69, 0x6e, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x12, 0x0e, 0x0a, - 0x02, 0x6e, 0x73, 0x18, 0x01, 0x20, 0x02, 0x28, 0x09, 0x52, 0x02, 0x6e, 0x73, 0x12, 0x17, 0x0a, - 0x07, 0x6e, 0x73, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x18, 0x02, 0x20, 0x02, 0x28, 0x09, 0x52, 0x06, - 0x6e, 0x73, 0x46, 0x69, 0x6c, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x65, 0x78, 0x74, 0x72, 0x61, 0x5f, - 0x6f, 0x70, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x65, 0x78, 0x74, 0x72, 0x61, - 0x4f, 0x70, 0x74, 0x22, 0x2e, 0x0a, 0x0a, 0x69, 0x6e, 0x68, 0x65, 0x72, 0x69, 0x74, 0x5f, 0x66, - 0x64, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x02, 0x28, 0x09, 0x52, 0x03, - 0x6b, 0x65, 0x79, 0x12, 0x0e, 0x0a, 0x02, 0x66, 0x64, 0x18, 0x02, 0x20, 0x02, 0x28, 0x05, 0x52, - 0x02, 0x66, 0x64, 0x22, 0x35, 0x0a, 0x0b, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x5f, 0x72, 0x6f, - 0x6f, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x63, 0x74, 0x72, 0x6c, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x04, 0x63, 0x74, 0x72, 0x6c, 0x12, 0x12, 0x0a, 0x04, 0x70, 0x61, 0x74, 0x68, 0x18, 0x02, - 0x20, 0x02, 0x28, 0x09, 0x52, 0x04, 0x70, 0x61, 0x74, 0x68, 0x22, 0x1f, 0x0a, 0x07, 0x75, 0x6e, - 0x69, 0x78, 0x5f, 0x73, 0x6b, 0x12, 0x14, 0x0a, 0x05, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x18, 0x01, - 0x20, 0x02, 0x28, 0x0d, 0x52, 0x05, 0x69, 0x6e, 0x6f, 0x64, 0x65, 0x22, 0xba, 0x10, 0x0a, 0x09, - 0x63, 0x72, 0x69, 0x75, 0x5f, 0x6f, 0x70, 0x74, 0x73, 0x12, 0x22, 0x0a, 0x0d, 0x69, 0x6d, 0x61, - 0x67, 0x65, 0x73, 0x5f, 0x64, 0x69, 0x72, 0x5f, 0x66, 0x64, 0x18, 0x01, 0x20, 0x02, 0x28, 0x05, - 0x52, 0x0b, 0x69, 0x6d, 0x61, 0x67, 0x65, 0x73, 0x44, 0x69, 0x72, 0x46, 0x64, 0x12, 0x10, 0x0a, - 0x03, 0x70, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x05, 0x52, 0x03, 0x70, 0x69, 0x64, 0x12, - 0x23, 0x0a, 0x0d, 0x6c, 0x65, 0x61, 0x76, 0x65, 0x5f, 0x72, 0x75, 0x6e, 0x6e, 0x69, 0x6e, 0x67, - 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x6c, 0x65, 0x61, 0x76, 0x65, 0x52, 0x75, 0x6e, - 0x6e, 0x69, 0x6e, 0x67, 0x12, 0x1e, 0x0a, 0x0b, 0x65, 0x78, 0x74, 0x5f, 0x75, 0x6e, 0x69, 0x78, - 0x5f, 0x73, 0x6b, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x65, 0x78, 0x74, 0x55, 0x6e, - 0x69, 0x78, 0x53, 0x6b, 0x12, 0x27, 0x0a, 0x0f, 0x74, 0x63, 0x70, 0x5f, 0x65, 0x73, 0x74, 0x61, - 0x62, 0x6c, 0x69, 0x73, 0x68, 0x65, 0x64, 0x18, 0x05, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x74, - 0x63, 0x70, 0x45, 0x73, 0x74, 0x61, 0x62, 0x6c, 0x69, 0x73, 0x68, 0x65, 0x64, 0x12, 0x27, 0x0a, - 0x0f, 0x65, 0x76, 0x61, 0x73, 0x69, 0x76, 0x65, 0x5f, 0x64, 0x65, 0x76, 0x69, 0x63, 0x65, 0x73, - 0x18, 0x06, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0e, 0x65, 0x76, 0x61, 0x73, 0x69, 0x76, 0x65, 0x44, - 0x65, 0x76, 0x69, 0x63, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x68, 0x65, 0x6c, 0x6c, 0x5f, - 0x6a, 0x6f, 0x62, 0x18, 0x07, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x73, 0x68, 0x65, 0x6c, 0x6c, - 0x4a, 0x6f, 0x62, 0x12, 0x1d, 0x0a, 0x0a, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x6c, 0x6f, 0x63, 0x6b, - 0x73, 0x18, 0x08, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x66, 0x69, 0x6c, 0x65, 0x4c, 0x6f, 0x63, - 0x6b, 0x73, 0x12, 0x1e, 0x0a, 0x09, 0x6c, 0x6f, 0x67, 0x5f, 0x6c, 0x65, 0x76, 0x65, 0x6c, 0x18, - 0x09, 0x20, 0x01, 0x28, 0x05, 0x3a, 0x01, 0x32, 0x52, 0x08, 0x6c, 0x6f, 0x67, 0x4c, 0x65, 0x76, - 0x65, 0x6c, 0x12, 0x19, 0x0a, 0x08, 0x6c, 0x6f, 0x67, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x18, 0x0a, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x6c, 0x6f, 0x67, 0x46, 0x69, 0x6c, 0x65, 0x12, 0x26, 0x0a, - 0x02, 0x70, 0x73, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x63, 0x72, 0x69, 0x75, - 0x5f, 0x70, 0x61, 0x67, 0x65, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x66, - 0x6f, 0x52, 0x02, 0x70, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x6e, 0x6f, 0x74, 0x69, 0x66, 0x79, 0x5f, - 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x73, 0x18, 0x0c, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x6e, - 0x6f, 0x74, 0x69, 0x66, 0x79, 0x53, 0x63, 0x72, 0x69, 0x70, 0x74, 0x73, 0x12, 0x12, 0x0a, 0x04, - 0x72, 0x6f, 0x6f, 0x74, 0x18, 0x0d, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x72, 0x6f, 0x6f, 0x74, - 0x12, 0x1d, 0x0a, 0x0a, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x5f, 0x69, 0x6d, 0x67, 0x18, 0x0e, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x70, 0x61, 0x72, 0x65, 0x6e, 0x74, 0x49, 0x6d, 0x67, 0x12, - 0x1b, 0x0a, 0x09, 0x74, 0x72, 0x61, 0x63, 0x6b, 0x5f, 0x6d, 0x65, 0x6d, 0x18, 0x0f, 0x20, 0x01, - 0x28, 0x08, 0x52, 0x08, 0x74, 0x72, 0x61, 0x63, 0x6b, 0x4d, 0x65, 0x6d, 0x12, 0x1d, 0x0a, 0x0a, - 0x61, 0x75, 0x74, 0x6f, 0x5f, 0x64, 0x65, 0x64, 0x75, 0x70, 0x18, 0x10, 0x20, 0x01, 0x28, 0x08, - 0x52, 0x09, 0x61, 0x75, 0x74, 0x6f, 0x44, 0x65, 0x64, 0x75, 0x70, 0x12, 0x1e, 0x0a, 0x0b, 0x77, - 0x6f, 0x72, 0x6b, 0x5f, 0x64, 0x69, 0x72, 0x5f, 0x66, 0x64, 0x18, 0x11, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x09, 0x77, 0x6f, 0x72, 0x6b, 0x44, 0x69, 0x72, 0x46, 0x64, 0x12, 0x1d, 0x0a, 0x0a, 0x6c, - 0x69, 0x6e, 0x6b, 0x5f, 0x72, 0x65, 0x6d, 0x61, 0x70, 0x18, 0x12, 0x20, 0x01, 0x28, 0x08, 0x52, - 0x09, 0x6c, 0x69, 0x6e, 0x6b, 0x52, 0x65, 0x6d, 0x61, 0x70, 0x12, 0x25, 0x0a, 0x05, 0x76, 0x65, - 0x74, 0x68, 0x73, 0x18, 0x13, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0f, 0x2e, 0x63, 0x72, 0x69, 0x75, - 0x5f, 0x76, 0x65, 0x74, 0x68, 0x5f, 0x70, 0x61, 0x69, 0x72, 0x52, 0x05, 0x76, 0x65, 0x74, 0x68, - 0x73, 0x12, 0x23, 0x0a, 0x07, 0x63, 0x70, 0x75, 0x5f, 0x63, 0x61, 0x70, 0x18, 0x14, 0x20, 0x01, - 0x28, 0x0d, 0x3a, 0x0a, 0x34, 0x32, 0x39, 0x34, 0x39, 0x36, 0x37, 0x32, 0x39, 0x35, 0x52, 0x06, - 0x63, 0x70, 0x75, 0x43, 0x61, 0x70, 0x12, 0x1f, 0x0a, 0x0b, 0x66, 0x6f, 0x72, 0x63, 0x65, 0x5f, - 0x69, 0x72, 0x6d, 0x61, 0x70, 0x18, 0x15, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x66, 0x6f, 0x72, - 0x63, 0x65, 0x49, 0x72, 0x6d, 0x61, 0x70, 0x12, 0x19, 0x0a, 0x08, 0x65, 0x78, 0x65, 0x63, 0x5f, - 0x63, 0x6d, 0x64, 0x18, 0x16, 0x20, 0x03, 0x28, 0x09, 0x52, 0x07, 0x65, 0x78, 0x65, 0x63, 0x43, - 0x6d, 0x64, 0x12, 0x27, 0x0a, 0x07, 0x65, 0x78, 0x74, 0x5f, 0x6d, 0x6e, 0x74, 0x18, 0x17, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x0e, 0x2e, 0x65, 0x78, 0x74, 0x5f, 0x6d, 0x6f, 0x75, 0x6e, 0x74, 0x5f, - 0x6d, 0x61, 0x70, 0x52, 0x06, 0x65, 0x78, 0x74, 0x4d, 0x6e, 0x74, 0x12, 0x25, 0x0a, 0x0e, 0x6d, - 0x61, 0x6e, 0x61, 0x67, 0x65, 0x5f, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x73, 0x18, 0x18, 0x20, - 0x01, 0x28, 0x08, 0x52, 0x0d, 0x6d, 0x61, 0x6e, 0x61, 0x67, 0x65, 0x43, 0x67, 0x72, 0x6f, 0x75, - 0x70, 0x73, 0x12, 0x25, 0x0a, 0x07, 0x63, 0x67, 0x5f, 0x72, 0x6f, 0x6f, 0x74, 0x18, 0x19, 0x20, - 0x03, 0x28, 0x0b, 0x32, 0x0c, 0x2e, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x5f, 0x72, 0x6f, 0x6f, - 0x74, 0x52, 0x06, 0x63, 0x67, 0x52, 0x6f, 0x6f, 0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x72, 0x73, 0x74, - 0x5f, 0x73, 0x69, 0x62, 0x6c, 0x69, 0x6e, 0x67, 0x18, 0x1a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, - 0x72, 0x73, 0x74, 0x53, 0x69, 0x62, 0x6c, 0x69, 0x6e, 0x67, 0x12, 0x2a, 0x0a, 0x0a, 0x69, 0x6e, - 0x68, 0x65, 0x72, 0x69, 0x74, 0x5f, 0x66, 0x64, 0x18, 0x1b, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0b, - 0x2e, 0x69, 0x6e, 0x68, 0x65, 0x72, 0x69, 0x74, 0x5f, 0x66, 0x64, 0x52, 0x09, 0x69, 0x6e, 0x68, - 0x65, 0x72, 0x69, 0x74, 0x46, 0x64, 0x12, 0x20, 0x0a, 0x0c, 0x61, 0x75, 0x74, 0x6f, 0x5f, 0x65, - 0x78, 0x74, 0x5f, 0x6d, 0x6e, 0x74, 0x18, 0x1c, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x61, 0x75, - 0x74, 0x6f, 0x45, 0x78, 0x74, 0x4d, 0x6e, 0x74, 0x12, 0x1f, 0x0a, 0x0b, 0x65, 0x78, 0x74, 0x5f, - 0x73, 0x68, 0x61, 0x72, 0x69, 0x6e, 0x67, 0x18, 0x1d, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x65, - 0x78, 0x74, 0x53, 0x68, 0x61, 0x72, 0x69, 0x6e, 0x67, 0x12, 0x1f, 0x0a, 0x0b, 0x65, 0x78, 0x74, - 0x5f, 0x6d, 0x61, 0x73, 0x74, 0x65, 0x72, 0x73, 0x18, 0x1e, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, - 0x65, 0x78, 0x74, 0x4d, 0x61, 0x73, 0x74, 0x65, 0x72, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x6b, - 0x69, 0x70, 0x5f, 0x6d, 0x6e, 0x74, 0x18, 0x1f, 0x20, 0x03, 0x28, 0x09, 0x52, 0x07, 0x73, 0x6b, - 0x69, 0x70, 0x4d, 0x6e, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, 0x5f, - 0x66, 0x73, 0x18, 0x20, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x65, 0x6e, 0x61, 0x62, 0x6c, 0x65, - 0x46, 0x73, 0x12, 0x28, 0x0a, 0x0b, 0x75, 0x6e, 0x69, 0x78, 0x5f, 0x73, 0x6b, 0x5f, 0x69, 0x6e, - 0x6f, 0x18, 0x21, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x08, 0x2e, 0x75, 0x6e, 0x69, 0x78, 0x5f, 0x73, - 0x6b, 0x52, 0x09, 0x75, 0x6e, 0x69, 0x78, 0x53, 0x6b, 0x49, 0x6e, 0x6f, 0x12, 0x3d, 0x0a, 0x13, - 0x6d, 0x61, 0x6e, 0x61, 0x67, 0x65, 0x5f, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x73, 0x5f, 0x6d, - 0x6f, 0x64, 0x65, 0x18, 0x22, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x0d, 0x2e, 0x63, 0x72, 0x69, 0x75, - 0x5f, 0x63, 0x67, 0x5f, 0x6d, 0x6f, 0x64, 0x65, 0x52, 0x11, 0x6d, 0x61, 0x6e, 0x61, 0x67, 0x65, - 0x43, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x73, 0x4d, 0x6f, 0x64, 0x65, 0x12, 0x28, 0x0a, 0x0b, 0x67, - 0x68, 0x6f, 0x73, 0x74, 0x5f, 0x6c, 0x69, 0x6d, 0x69, 0x74, 0x18, 0x23, 0x20, 0x01, 0x28, 0x0d, - 0x3a, 0x07, 0x31, 0x30, 0x34, 0x38, 0x35, 0x37, 0x36, 0x52, 0x0a, 0x67, 0x68, 0x6f, 0x73, 0x74, - 0x4c, 0x69, 0x6d, 0x69, 0x74, 0x12, 0x28, 0x0a, 0x10, 0x69, 0x72, 0x6d, 0x61, 0x70, 0x5f, 0x73, - 0x63, 0x61, 0x6e, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x73, 0x18, 0x24, 0x20, 0x03, 0x28, 0x09, 0x52, - 0x0e, 0x69, 0x72, 0x6d, 0x61, 0x70, 0x53, 0x63, 0x61, 0x6e, 0x50, 0x61, 0x74, 0x68, 0x73, 0x12, - 0x1a, 0x0a, 0x08, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x18, 0x25, 0x20, 0x03, 0x28, - 0x09, 0x52, 0x08, 0x65, 0x78, 0x74, 0x65, 0x72, 0x6e, 0x61, 0x6c, 0x12, 0x19, 0x0a, 0x08, 0x65, - 0x6d, 0x70, 0x74, 0x79, 0x5f, 0x6e, 0x73, 0x18, 0x26, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x65, - 0x6d, 0x70, 0x74, 0x79, 0x4e, 0x73, 0x12, 0x28, 0x0a, 0x07, 0x6a, 0x6f, 0x69, 0x6e, 0x5f, 0x6e, - 0x73, 0x18, 0x27, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x0f, 0x2e, 0x6a, 0x6f, 0x69, 0x6e, 0x5f, 0x6e, - 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x52, 0x06, 0x6a, 0x6f, 0x69, 0x6e, 0x4e, 0x73, - 0x12, 0x21, 0x0a, 0x0c, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x5f, 0x70, 0x72, 0x6f, 0x70, 0x73, - 0x18, 0x29, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0b, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x50, 0x72, - 0x6f, 0x70, 0x73, 0x12, 0x2a, 0x0a, 0x11, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x5f, 0x70, 0x72, - 0x6f, 0x70, 0x73, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x18, 0x2a, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0f, - 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x50, 0x72, 0x6f, 0x70, 0x73, 0x46, 0x69, 0x6c, 0x65, 0x12, - 0x34, 0x0a, 0x16, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x5f, 0x64, 0x75, 0x6d, 0x70, 0x5f, 0x63, - 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x18, 0x2b, 0x20, 0x03, 0x28, 0x09, 0x52, - 0x14, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x44, 0x75, 0x6d, 0x70, 0x43, 0x6f, 0x6e, 0x74, 0x72, - 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x12, 0x23, 0x0a, 0x0d, 0x66, 0x72, 0x65, 0x65, 0x7a, 0x65, 0x5f, - 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x18, 0x2c, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x66, 0x72, - 0x65, 0x65, 0x7a, 0x65, 0x43, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x12, 0x18, 0x0a, 0x07, 0x74, 0x69, - 0x6d, 0x65, 0x6f, 0x75, 0x74, 0x18, 0x2d, 0x20, 0x01, 0x28, 0x0d, 0x52, 0x07, 0x74, 0x69, 0x6d, - 0x65, 0x6f, 0x75, 0x74, 0x12, 0x2b, 0x0a, 0x12, 0x74, 0x63, 0x70, 0x5f, 0x73, 0x6b, 0x69, 0x70, - 0x5f, 0x69, 0x6e, 0x5f, 0x66, 0x6c, 0x69, 0x67, 0x68, 0x74, 0x18, 0x2e, 0x20, 0x01, 0x28, 0x08, - 0x52, 0x0f, 0x74, 0x63, 0x70, 0x53, 0x6b, 0x69, 0x70, 0x49, 0x6e, 0x46, 0x6c, 0x69, 0x67, 0x68, - 0x74, 0x12, 0x21, 0x0a, 0x0c, 0x77, 0x65, 0x61, 0x6b, 0x5f, 0x73, 0x79, 0x73, 0x63, 0x74, 0x6c, - 0x73, 0x18, 0x2f, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0b, 0x77, 0x65, 0x61, 0x6b, 0x53, 0x79, 0x73, - 0x63, 0x74, 0x6c, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x6c, 0x61, 0x7a, 0x79, 0x5f, 0x70, 0x61, 0x67, - 0x65, 0x73, 0x18, 0x30, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x6c, 0x61, 0x7a, 0x79, 0x50, 0x61, - 0x67, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x5f, 0x66, 0x64, - 0x18, 0x31, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x46, 0x64, - 0x12, 0x2a, 0x0a, 0x11, 0x6f, 0x72, 0x70, 0x68, 0x61, 0x6e, 0x5f, 0x70, 0x74, 0x73, 0x5f, 0x6d, - 0x61, 0x73, 0x74, 0x65, 0x72, 0x18, 0x32, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0f, 0x6f, 0x72, 0x70, - 0x68, 0x61, 0x6e, 0x50, 0x74, 0x73, 0x4d, 0x61, 0x73, 0x74, 0x65, 0x72, 0x12, 0x1f, 0x0a, 0x0b, - 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x5f, 0x66, 0x69, 0x6c, 0x65, 0x18, 0x33, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x0a, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x46, 0x69, 0x6c, 0x65, 0x12, 0x1b, 0x0a, - 0x09, 0x74, 0x63, 0x70, 0x5f, 0x63, 0x6c, 0x6f, 0x73, 0x65, 0x18, 0x34, 0x20, 0x01, 0x28, 0x08, - 0x52, 0x08, 0x74, 0x63, 0x70, 0x43, 0x6c, 0x6f, 0x73, 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x6c, 0x73, - 0x6d, 0x5f, 0x70, 0x72, 0x6f, 0x66, 0x69, 0x6c, 0x65, 0x18, 0x35, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0a, 0x6c, 0x73, 0x6d, 0x50, 0x72, 0x6f, 0x66, 0x69, 0x6c, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x74, - 0x6c, 0x73, 0x5f, 0x63, 0x61, 0x63, 0x65, 0x72, 0x74, 0x18, 0x36, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x09, 0x74, 0x6c, 0x73, 0x43, 0x61, 0x63, 0x65, 0x72, 0x74, 0x12, 0x1b, 0x0a, 0x09, 0x74, 0x6c, - 0x73, 0x5f, 0x63, 0x61, 0x63, 0x72, 0x6c, 0x18, 0x37, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x74, - 0x6c, 0x73, 0x43, 0x61, 0x63, 0x72, 0x6c, 0x12, 0x19, 0x0a, 0x08, 0x74, 0x6c, 0x73, 0x5f, 0x63, - 0x65, 0x72, 0x74, 0x18, 0x38, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x74, 0x6c, 0x73, 0x43, 0x65, - 0x72, 0x74, 0x12, 0x17, 0x0a, 0x07, 0x74, 0x6c, 0x73, 0x5f, 0x6b, 0x65, 0x79, 0x18, 0x39, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x06, 0x74, 0x6c, 0x73, 0x4b, 0x65, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x74, - 0x6c, 0x73, 0x18, 0x3a, 0x20, 0x01, 0x28, 0x08, 0x52, 0x03, 0x74, 0x6c, 0x73, 0x12, 0x27, 0x0a, - 0x10, 0x74, 0x6c, 0x73, 0x5f, 0x6e, 0x6f, 0x5f, 0x63, 0x6e, 0x5f, 0x76, 0x65, 0x72, 0x69, 0x66, - 0x79, 0x18, 0x3b, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x74, 0x6c, 0x73, 0x4e, 0x6f, 0x43, 0x6e, - 0x56, 0x65, 0x72, 0x69, 0x66, 0x79, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x67, 0x72, 0x6f, 0x75, 0x70, - 0x5f, 0x79, 0x61, 0x72, 0x64, 0x18, 0x3c, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, 0x63, 0x67, 0x72, - 0x6f, 0x75, 0x70, 0x59, 0x61, 0x72, 0x64, 0x12, 0x3f, 0x0a, 0x0d, 0x70, 0x72, 0x65, 0x5f, 0x64, - 0x75, 0x6d, 0x70, 0x5f, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x3d, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x13, - 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x70, 0x72, 0x65, 0x5f, 0x64, 0x75, 0x6d, 0x70, 0x5f, 0x6d, - 0x6f, 0x64, 0x65, 0x3a, 0x06, 0x53, 0x50, 0x4c, 0x49, 0x43, 0x45, 0x52, 0x0b, 0x70, 0x72, 0x65, - 0x44, 0x75, 0x6d, 0x70, 0x4d, 0x6f, 0x64, 0x65, 0x22, 0x2c, 0x0a, 0x0e, 0x63, 0x72, 0x69, 0x75, - 0x5f, 0x64, 0x75, 0x6d, 0x70, 0x5f, 0x72, 0x65, 0x73, 0x70, 0x12, 0x1a, 0x0a, 0x08, 0x72, 0x65, - 0x73, 0x74, 0x6f, 0x72, 0x65, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x72, 0x65, - 0x73, 0x74, 0x6f, 0x72, 0x65, 0x64, 0x22, 0x25, 0x0a, 0x11, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x72, - 0x65, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x5f, 0x72, 0x65, 0x73, 0x70, 0x12, 0x10, 0x0a, 0x03, 0x70, - 0x69, 0x64, 0x18, 0x01, 0x20, 0x02, 0x28, 0x05, 0x52, 0x03, 0x70, 0x69, 0x64, 0x22, 0x37, 0x0a, - 0x0b, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x6e, 0x6f, 0x74, 0x69, 0x66, 0x79, 0x12, 0x16, 0x0a, 0x06, - 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x73, 0x63, - 0x72, 0x69, 0x70, 0x74, 0x12, 0x10, 0x0a, 0x03, 0x70, 0x69, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x05, 0x52, 0x03, 0x70, 0x69, 0x64, 0x22, 0x4b, 0x0a, 0x0d, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x66, - 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x6d, 0x65, 0x6d, 0x5f, 0x74, - 0x72, 0x61, 0x63, 0x6b, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, 0x6d, 0x65, 0x6d, 0x54, - 0x72, 0x61, 0x63, 0x6b, 0x12, 0x1d, 0x0a, 0x0a, 0x6c, 0x61, 0x7a, 0x79, 0x5f, 0x70, 0x61, 0x67, - 0x65, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x09, 0x6c, 0x61, 0x7a, 0x79, 0x50, 0x61, - 0x67, 0x65, 0x73, 0x22, 0xd0, 0x01, 0x0a, 0x08, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x72, 0x65, 0x71, - 0x12, 0x22, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x02, 0x28, 0x0e, 0x32, 0x0e, - 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x72, 0x65, 0x71, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x52, 0x04, - 0x74, 0x79, 0x70, 0x65, 0x12, 0x1e, 0x0a, 0x04, 0x6f, 0x70, 0x74, 0x73, 0x18, 0x02, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x0a, 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x6f, 0x70, 0x74, 0x73, 0x52, 0x04, - 0x6f, 0x70, 0x74, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x6e, 0x6f, 0x74, 0x69, 0x66, 0x79, 0x5f, 0x73, - 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0d, 0x6e, 0x6f, - 0x74, 0x69, 0x66, 0x79, 0x53, 0x75, 0x63, 0x63, 0x65, 0x73, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x6b, - 0x65, 0x65, 0x70, 0x5f, 0x6f, 0x70, 0x65, 0x6e, 0x18, 0x04, 0x20, 0x01, 0x28, 0x08, 0x52, 0x08, - 0x6b, 0x65, 0x65, 0x70, 0x4f, 0x70, 0x65, 0x6e, 0x12, 0x2a, 0x0a, 0x08, 0x66, 0x65, 0x61, 0x74, - 0x75, 0x72, 0x65, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0e, 0x2e, 0x63, 0x72, 0x69, - 0x75, 0x5f, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x52, 0x08, 0x66, 0x65, 0x61, 0x74, - 0x75, 0x72, 0x65, 0x73, 0x12, 0x10, 0x0a, 0x03, 0x70, 0x69, 0x64, 0x18, 0x06, 0x20, 0x01, 0x28, - 0x0d, 0x52, 0x03, 0x70, 0x69, 0x64, 0x22, 0x8f, 0x03, 0x0a, 0x09, 0x63, 0x72, 0x69, 0x75, 0x5f, - 0x72, 0x65, 0x73, 0x70, 0x12, 0x22, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x01, 0x20, 0x02, - 0x28, 0x0e, 0x32, 0x0e, 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x72, 0x65, 0x71, 0x5f, 0x74, 0x79, - 0x70, 0x65, 0x52, 0x04, 0x74, 0x79, 0x70, 0x65, 0x12, 0x18, 0x0a, 0x07, 0x73, 0x75, 0x63, 0x63, - 0x65, 0x73, 0x73, 0x18, 0x02, 0x20, 0x02, 0x28, 0x08, 0x52, 0x07, 0x73, 0x75, 0x63, 0x63, 0x65, - 0x73, 0x73, 0x12, 0x23, 0x0a, 0x04, 0x64, 0x75, 0x6d, 0x70, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, - 0x32, 0x0f, 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x64, 0x75, 0x6d, 0x70, 0x5f, 0x72, 0x65, 0x73, - 0x70, 0x52, 0x04, 0x64, 0x75, 0x6d, 0x70, 0x12, 0x2c, 0x0a, 0x07, 0x72, 0x65, 0x73, 0x74, 0x6f, - 0x72, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x12, 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, - 0x72, 0x65, 0x73, 0x74, 0x6f, 0x72, 0x65, 0x5f, 0x72, 0x65, 0x73, 0x70, 0x52, 0x07, 0x72, 0x65, - 0x73, 0x74, 0x6f, 0x72, 0x65, 0x12, 0x24, 0x0a, 0x06, 0x6e, 0x6f, 0x74, 0x69, 0x66, 0x79, 0x18, - 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0c, 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x6e, 0x6f, 0x74, - 0x69, 0x66, 0x79, 0x52, 0x06, 0x6e, 0x6f, 0x74, 0x69, 0x66, 0x79, 0x12, 0x26, 0x0a, 0x02, 0x70, - 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x16, 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x70, - 0x61, 0x67, 0x65, 0x5f, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x5f, 0x69, 0x6e, 0x66, 0x6f, 0x52, - 0x02, 0x70, 0x73, 0x12, 0x19, 0x0a, 0x08, 0x63, 0x72, 0x5f, 0x65, 0x72, 0x72, 0x6e, 0x6f, 0x18, - 0x07, 0x20, 0x01, 0x28, 0x05, 0x52, 0x07, 0x63, 0x72, 0x45, 0x72, 0x72, 0x6e, 0x6f, 0x12, 0x2a, - 0x0a, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x18, 0x08, 0x20, 0x01, 0x28, 0x0b, - 0x32, 0x0e, 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, - 0x52, 0x08, 0x66, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x73, 0x12, 0x1b, 0x0a, 0x09, 0x63, 0x72, - 0x5f, 0x65, 0x72, 0x72, 0x6d, 0x73, 0x67, 0x18, 0x09, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x63, - 0x72, 0x45, 0x72, 0x72, 0x6d, 0x73, 0x67, 0x12, 0x27, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, - 0x6f, 0x6e, 0x18, 0x0a, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0d, 0x2e, 0x63, 0x72, 0x69, 0x75, 0x5f, - 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, - 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x0b, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0xb0, 0x01, 0x0a, 0x0c, 0x63, 0x72, 0x69, - 0x75, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x21, 0x0a, 0x0c, 0x6d, 0x61, 0x6a, - 0x6f, 0x72, 0x5f, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x01, 0x20, 0x02, 0x28, 0x05, 0x52, - 0x0b, 0x6d, 0x61, 0x6a, 0x6f, 0x72, 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, 0x21, 0x0a, 0x0c, - 0x6d, 0x69, 0x6e, 0x6f, 0x72, 0x5f, 0x6e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x18, 0x02, 0x20, 0x02, - 0x28, 0x05, 0x52, 0x0b, 0x6d, 0x69, 0x6e, 0x6f, 0x72, 0x4e, 0x75, 0x6d, 0x62, 0x65, 0x72, 0x12, - 0x14, 0x0a, 0x05, 0x67, 0x69, 0x74, 0x69, 0x64, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, - 0x67, 0x69, 0x74, 0x69, 0x64, 0x12, 0x1a, 0x0a, 0x08, 0x73, 0x75, 0x62, 0x6c, 0x65, 0x76, 0x65, - 0x6c, 0x18, 0x04, 0x20, 0x01, 0x28, 0x05, 0x52, 0x08, 0x73, 0x75, 0x62, 0x6c, 0x65, 0x76, 0x65, - 0x6c, 0x12, 0x14, 0x0a, 0x05, 0x65, 0x78, 0x74, 0x72, 0x61, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, - 0x52, 0x05, 0x65, 0x78, 0x74, 0x72, 0x61, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, - 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x2a, 0x5f, 0x0a, 0x0c, 0x63, - 0x72, 0x69, 0x75, 0x5f, 0x63, 0x67, 0x5f, 0x6d, 0x6f, 0x64, 0x65, 0x12, 0x0a, 0x0a, 0x06, 0x49, - 0x47, 0x4e, 0x4f, 0x52, 0x45, 0x10, 0x00, 0x12, 0x0b, 0x0a, 0x07, 0x43, 0x47, 0x5f, 0x4e, 0x4f, - 0x4e, 0x45, 0x10, 0x01, 0x12, 0x09, 0x0a, 0x05, 0x50, 0x52, 0x4f, 0x50, 0x53, 0x10, 0x02, 0x12, - 0x08, 0x0a, 0x04, 0x53, 0x4f, 0x46, 0x54, 0x10, 0x03, 0x12, 0x08, 0x0a, 0x04, 0x46, 0x55, 0x4c, - 0x4c, 0x10, 0x04, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x54, 0x52, 0x49, 0x43, 0x54, 0x10, 0x05, 0x12, - 0x0b, 0x0a, 0x07, 0x44, 0x45, 0x46, 0x41, 0x55, 0x4c, 0x54, 0x10, 0x06, 0x2a, 0x2d, 0x0a, 0x12, - 0x63, 0x72, 0x69, 0x75, 0x5f, 0x70, 0x72, 0x65, 0x5f, 0x64, 0x75, 0x6d, 0x70, 0x5f, 0x6d, 0x6f, - 0x64, 0x65, 0x12, 0x0a, 0x0a, 0x06, 0x53, 0x50, 0x4c, 0x49, 0x43, 0x45, 0x10, 0x01, 0x12, 0x0b, - 0x0a, 0x07, 0x56, 0x4d, 0x5f, 0x52, 0x45, 0x41, 0x44, 0x10, 0x02, 0x2a, 0xd0, 0x01, 0x0a, 0x0d, - 0x63, 0x72, 0x69, 0x75, 0x5f, 0x72, 0x65, 0x71, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x12, 0x09, 0x0a, - 0x05, 0x45, 0x4d, 0x50, 0x54, 0x59, 0x10, 0x00, 0x12, 0x08, 0x0a, 0x04, 0x44, 0x55, 0x4d, 0x50, - 0x10, 0x01, 0x12, 0x0b, 0x0a, 0x07, 0x52, 0x45, 0x53, 0x54, 0x4f, 0x52, 0x45, 0x10, 0x02, 0x12, - 0x09, 0x0a, 0x05, 0x43, 0x48, 0x45, 0x43, 0x4b, 0x10, 0x03, 0x12, 0x0c, 0x0a, 0x08, 0x50, 0x52, - 0x45, 0x5f, 0x44, 0x55, 0x4d, 0x50, 0x10, 0x04, 0x12, 0x0f, 0x0a, 0x0b, 0x50, 0x41, 0x47, 0x45, - 0x5f, 0x53, 0x45, 0x52, 0x56, 0x45, 0x52, 0x10, 0x05, 0x12, 0x0a, 0x0a, 0x06, 0x4e, 0x4f, 0x54, - 0x49, 0x46, 0x59, 0x10, 0x06, 0x12, 0x10, 0x0a, 0x0c, 0x43, 0x50, 0x55, 0x49, 0x4e, 0x46, 0x4f, - 0x5f, 0x44, 0x55, 0x4d, 0x50, 0x10, 0x07, 0x12, 0x11, 0x0a, 0x0d, 0x43, 0x50, 0x55, 0x49, 0x4e, - 0x46, 0x4f, 0x5f, 0x43, 0x48, 0x45, 0x43, 0x4b, 0x10, 0x08, 0x12, 0x11, 0x0a, 0x0d, 0x46, 0x45, - 0x41, 0x54, 0x55, 0x52, 0x45, 0x5f, 0x43, 0x48, 0x45, 0x43, 0x4b, 0x10, 0x09, 0x12, 0x0b, 0x0a, - 0x07, 0x56, 0x45, 0x52, 0x53, 0x49, 0x4f, 0x4e, 0x10, 0x0a, 0x12, 0x0c, 0x0a, 0x08, 0x57, 0x41, - 0x49, 0x54, 0x5f, 0x50, 0x49, 0x44, 0x10, 0x0b, 0x12, 0x14, 0x0a, 0x10, 0x50, 0x41, 0x47, 0x45, - 0x5f, 0x53, 0x45, 0x52, 0x56, 0x45, 0x52, 0x5f, 0x43, 0x48, 0x4c, 0x44, 0x10, 0x0c, -} - -var ( - file_rpc_rpc_proto_rawDescOnce sync.Once - file_rpc_rpc_proto_rawDescData = file_rpc_rpc_proto_rawDesc -) - -func file_rpc_rpc_proto_rawDescGZIP() []byte { - file_rpc_rpc_proto_rawDescOnce.Do(func() { - file_rpc_rpc_proto_rawDescData = protoimpl.X.CompressGZIP(file_rpc_rpc_proto_rawDescData) - }) - return file_rpc_rpc_proto_rawDescData -} - -var file_rpc_rpc_proto_enumTypes = make([]protoimpl.EnumInfo, 3) -var file_rpc_rpc_proto_msgTypes = make([]protoimpl.MessageInfo, 15) -var file_rpc_rpc_proto_goTypes = []interface{}{ - (CriuCgMode)(0), // 0: criu_cg_mode - (CriuPreDumpMode)(0), // 1: criu_pre_dump_mode - (CriuReqType)(0), // 2: criu_req_type - (*CriuPageServerInfo)(nil), // 3: criu_page_server_info - (*CriuVethPair)(nil), // 4: criu_veth_pair - (*ExtMountMap)(nil), // 5: ext_mount_map - (*JoinNamespace)(nil), // 6: join_namespace - (*InheritFd)(nil), // 7: inherit_fd - (*CgroupRoot)(nil), // 8: cgroup_root - (*UnixSk)(nil), // 9: unix_sk - (*CriuOpts)(nil), // 10: criu_opts - (*CriuDumpResp)(nil), // 11: criu_dump_resp - (*CriuRestoreResp)(nil), // 12: criu_restore_resp - (*CriuNotify)(nil), // 13: criu_notify - (*CriuFeatures)(nil), // 14: criu_features - (*CriuReq)(nil), // 15: criu_req - (*CriuResp)(nil), // 16: criu_resp - (*CriuVersion)(nil), // 17: criu_version -} -var file_rpc_rpc_proto_depIdxs = []int32{ - 3, // 0: criu_opts.ps:type_name -> criu_page_server_info - 4, // 1: criu_opts.veths:type_name -> criu_veth_pair - 5, // 2: criu_opts.ext_mnt:type_name -> ext_mount_map - 8, // 3: criu_opts.cg_root:type_name -> cgroup_root - 7, // 4: criu_opts.inherit_fd:type_name -> inherit_fd - 9, // 5: criu_opts.unix_sk_ino:type_name -> unix_sk - 0, // 6: criu_opts.manage_cgroups_mode:type_name -> criu_cg_mode - 6, // 7: criu_opts.join_ns:type_name -> join_namespace - 1, // 8: criu_opts.pre_dump_mode:type_name -> criu_pre_dump_mode - 2, // 9: criu_req.type:type_name -> criu_req_type - 10, // 10: criu_req.opts:type_name -> criu_opts - 14, // 11: criu_req.features:type_name -> criu_features - 2, // 12: criu_resp.type:type_name -> criu_req_type - 11, // 13: criu_resp.dump:type_name -> criu_dump_resp - 12, // 14: criu_resp.restore:type_name -> criu_restore_resp - 13, // 15: criu_resp.notify:type_name -> criu_notify - 3, // 16: criu_resp.ps:type_name -> criu_page_server_info - 14, // 17: criu_resp.features:type_name -> criu_features - 17, // 18: criu_resp.version:type_name -> criu_version - 19, // [19:19] is the sub-list for method output_type - 19, // [19:19] is the sub-list for method input_type - 19, // [19:19] is the sub-list for extension type_name - 19, // [19:19] is the sub-list for extension extendee - 0, // [0:19] is the sub-list for field type_name -} - -func init() { file_rpc_rpc_proto_init() } -func file_rpc_rpc_proto_init() { - if File_rpc_rpc_proto != nil { - return - } - if !protoimpl.UnsafeEnabled { - file_rpc_rpc_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*CriuPageServerInfo); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_rpc_rpc_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*CriuVethPair); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_rpc_rpc_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*ExtMountMap); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_rpc_rpc_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*JoinNamespace); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_rpc_rpc_proto_msgTypes[4].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*InheritFd); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_rpc_rpc_proto_msgTypes[5].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*CgroupRoot); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_rpc_rpc_proto_msgTypes[6].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*UnixSk); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_rpc_rpc_proto_msgTypes[7].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*CriuOpts); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_rpc_rpc_proto_msgTypes[8].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*CriuDumpResp); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_rpc_rpc_proto_msgTypes[9].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*CriuRestoreResp); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_rpc_rpc_proto_msgTypes[10].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*CriuNotify); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_rpc_rpc_proto_msgTypes[11].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*CriuFeatures); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_rpc_rpc_proto_msgTypes[12].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*CriuReq); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_rpc_rpc_proto_msgTypes[13].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*CriuResp); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - file_rpc_rpc_proto_msgTypes[14].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*CriuVersion); i { - case 0: - return &v.state - case 1: - return &v.sizeCache - case 2: - return &v.unknownFields - default: - return nil - } - } - } - type x struct{} - out := protoimpl.TypeBuilder{ - File: protoimpl.DescBuilder{ - GoPackagePath: reflect.TypeOf(x{}).PkgPath(), - RawDescriptor: file_rpc_rpc_proto_rawDesc, - NumEnums: 3, - NumMessages: 15, - NumExtensions: 0, - NumServices: 0, - }, - GoTypes: file_rpc_rpc_proto_goTypes, - DependencyIndexes: file_rpc_rpc_proto_depIdxs, - EnumInfos: file_rpc_rpc_proto_enumTypes, - MessageInfos: file_rpc_rpc_proto_msgTypes, - }.Build() - File_rpc_rpc_proto = out.File - file_rpc_rpc_proto_rawDesc = nil - file_rpc_rpc_proto_goTypes = nil - file_rpc_rpc_proto_depIdxs = nil -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/.clang-format b/src/runtime/vendor/github.com/cilium/ebpf/.clang-format deleted file mode 100644 index 4eb94b1baa..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/.clang-format +++ /dev/null @@ -1,17 +0,0 @@ ---- -Language: Cpp -BasedOnStyle: LLVM -AlignAfterOpenBracket: DontAlign -AlignConsecutiveAssignments: true -AlignEscapedNewlines: DontAlign -AlwaysBreakBeforeMultilineStrings: true -AlwaysBreakTemplateDeclarations: false -AllowAllParametersOfDeclarationOnNextLine: false -AllowShortFunctionsOnASingleLine: false -BreakBeforeBraces: Attach -IndentWidth: 4 -KeepEmptyLinesAtTheStartOfBlocks: false -TabWidth: 4 -UseTab: ForContinuationAndIndentation -ColumnLimit: 1000 -... diff --git a/src/runtime/vendor/github.com/cilium/ebpf/.gitignore b/src/runtime/vendor/github.com/cilium/ebpf/.gitignore deleted file mode 100644 index 38b15653c0..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/.gitignore +++ /dev/null @@ -1,13 +0,0 @@ -# Binaries for programs and plugins -*.exe -*.exe~ -*.dll -*.so -*.dylib -*.o - -# Test binary, build with `go test -c` -*.test - -# Output of the go coverage tool, specifically when used with LiteIDE -*.out diff --git a/src/runtime/vendor/github.com/cilium/ebpf/.golangci.yaml b/src/runtime/vendor/github.com/cilium/ebpf/.golangci.yaml deleted file mode 100644 index a88374197e..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/.golangci.yaml +++ /dev/null @@ -1,29 +0,0 @@ ---- -issues: - exclude-rules: - # syscall param structs will have unused fields in Go code. - - path: syscall.*.go - linters: - - structcheck - -linters: - disable-all: true - enable: - - deadcode - - errcheck - - goimports - - gosimple - - govet - - ineffassign - - misspell - - staticcheck - - structcheck - - typecheck - - unused - - varcheck - - # Could be enabled later: - # - gocyclo - # - prealloc - # - maligned - # - gosec diff --git a/src/runtime/vendor/github.com/cilium/ebpf/ARCHITECTURE.md b/src/runtime/vendor/github.com/cilium/ebpf/ARCHITECTURE.md deleted file mode 100644 index aee9c0a0d4..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/ARCHITECTURE.md +++ /dev/null @@ -1,80 +0,0 @@ -Architecture of the library -=== - - ELF -> Specifications -> Objects -> Links - -ELF ---- - -BPF is usually produced by using Clang to compile a subset of C. Clang outputs -an ELF file which contains program byte code (aka BPF), but also metadata for -maps used by the program. The metadata follows the conventions set by libbpf -shipped with the kernel. Certain ELF sections have special meaning -and contain structures defined by libbpf. Newer versions of clang emit -additional metadata in BPF Type Format (aka BTF). - -The library aims to be compatible with libbpf so that moving from a C toolchain -to a Go one creates little friction. To that end, the [ELF reader](elf_reader.go) -is tested against the Linux selftests and avoids introducing custom behaviour -if possible. - -The output of the ELF reader is a `CollectionSpec` which encodes -all of the information contained in the ELF in a form that is easy to work with -in Go. - -### BTF - -The BPF Type Format describes more than just the types used by a BPF program. It -includes debug aids like which source line corresponds to which instructions and -what global variables are used. - -[BTF parsing](internal/btf/) lives in a separate internal package since exposing -it would mean an additional maintenance burden, and because the API still -has sharp corners. The most important concept is the `btf.Type` interface, which -also describes things that aren't really types like `.rodata` or `.bss` sections. -`btf.Type`s can form cyclical graphs, which can easily lead to infinite loops if -one is not careful. Hopefully a safe pattern to work with `btf.Type` emerges as -we write more code that deals with it. - -Specifications ---- - -`CollectionSpec`, `ProgramSpec` and `MapSpec` are blueprints for in-kernel -objects and contain everything necessary to execute the relevant `bpf(2)` -syscalls. Since the ELF reader outputs a `CollectionSpec` it's possible to -modify clang-compiled BPF code, for example to rewrite constants. At the same -time the [asm](asm/) package provides an assembler that can be used to generate -`ProgramSpec` on the fly. - -Creating a spec should never require any privileges or be restricted in any way, -for example by only allowing programs in native endianness. This ensures that -the library stays flexible. - -Objects ---- - -`Program` and `Map` are the result of loading specs into the kernel. Sometimes -loading a spec will fail because the kernel is too old, or a feature is not -enabled. There are multiple ways the library deals with that: - -* Fallback: older kernels don't allowing naming programs and maps. The library - automatically detects support for names, and omits them during load if - necessary. This works since name is primarily a debug aid. - -* Sentinel error: sometimes it's possible to detect that a feature isn't available. - In that case the library will return an error wrapping `ErrNotSupported`. - This is also useful to skip tests that can't run on the current kernel. - -Once program and map objects are loaded they expose the kernel's low-level API, -e.g. `NextKey`. Often this API is awkward to use in Go, so there are safer -wrappers on top of the low-level API, like `MapIterator`. The low-level API is -useful as an out when our higher-level API doesn't support a particular use case. - -Links ---- - -BPF can be attached to many different points in the kernel and newer BPF hooks -tend to use bpf_link to do so. Older hooks unfortunately use a combination of -syscalls, netlink messages, etc. Adding support for a new link type should not -pull in large dependencies like netlink, so XDP programs or tracepoints are -out of scope. diff --git a/src/runtime/vendor/github.com/cilium/ebpf/CODE_OF_CONDUCT.md b/src/runtime/vendor/github.com/cilium/ebpf/CODE_OF_CONDUCT.md deleted file mode 100644 index 8e42838c5a..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,46 +0,0 @@ -# Contributor Covenant Code of Conduct - -## Our Pledge - -In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. - -## Our Standards - -Examples of behavior that contributes to creating a positive environment include: - -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy towards other community members - -Examples of unacceptable behavior by participants include: - -* The use of sexualized language or imagery and unwelcome sexual attention or advances -* Trolling, insulting/derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or electronic address, without explicit permission -* Other conduct which could reasonably be considered inappropriate in a professional setting - -## Our Responsibilities - -Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. - -Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. - -## Scope - -This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. - -## Enforcement - -Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at nathanjsweet at gmail dot com or i at lmb dot io. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. - -Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] - -[homepage]: http://contributor-covenant.org -[version]: http://contributor-covenant.org/version/1/4/ diff --git a/src/runtime/vendor/github.com/cilium/ebpf/CONTRIBUTING.md b/src/runtime/vendor/github.com/cilium/ebpf/CONTRIBUTING.md deleted file mode 100644 index 72ceb43782..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/CONTRIBUTING.md +++ /dev/null @@ -1,40 +0,0 @@ -# How to contribute - -Development is on [GitHub](https://github.com/cilium/ebpf) and contributions in -the form of pull requests and issues reporting bugs or suggesting new features -are welcome. Please take a look at [the architecture](ARCHITECTURE.md) to get -a better understanding for the high-level goals. - -New features must be accompanied by tests. Before starting work on any large -feature, please [join](https://cilium.herokuapp.com/) the -[#libbpf-go](https://cilium.slack.com/messages/libbpf-go) channel on Slack to -discuss the design first. - -When submitting pull requests, consider writing details about what problem you -are solving and why the proposed approach solves that problem in commit messages -and/or pull request description to help future library users and maintainers to -reason about the proposed changes. - -## Running the tests - -Many of the tests require privileges to set resource limits and load eBPF code. -The easiest way to obtain these is to run the tests with `sudo`. - -To test the current package with your local kernel you can simply run: -``` -go test -exec sudo ./... -``` - -To test the current package with a different kernel version you can use the [run-tests.sh](run-tests.sh) script. -It requires [virtme](https://github.com/amluto/virtme) and qemu to be installed. - -Examples: - -```bash -# Run all tests on a 5.4 kernel -./run-tests.sh 5.4 - -# Run a subset of tests: -./run-tests.sh 5.4 go test ./link -``` - diff --git a/src/runtime/vendor/github.com/cilium/ebpf/LICENSE b/src/runtime/vendor/github.com/cilium/ebpf/LICENSE deleted file mode 100644 index c637ae99c2..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/LICENSE +++ /dev/null @@ -1,23 +0,0 @@ -MIT License - -Copyright (c) 2017 Nathan Sweet -Copyright (c) 2018, 2019 Cloudflare -Copyright (c) 2019 Authors of Cilium - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/src/runtime/vendor/github.com/cilium/ebpf/Makefile b/src/runtime/vendor/github.com/cilium/ebpf/Makefile deleted file mode 100644 index 5dd342c5b2..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/Makefile +++ /dev/null @@ -1,70 +0,0 @@ -# The development version of clang is distributed as the 'clang' binary, -# while stable/released versions have a version number attached. -# Pin the default clang to a stable version. -CLANG ?= clang-12 -CFLAGS := -target bpf -O2 -g -Wall -Werror $(CFLAGS) - -# Obtain an absolute path to the directory of the Makefile. -# Assume the Makefile is in the root of the repository. -REPODIR := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) -UIDGID := $(shell stat -c '%u:%g' ${REPODIR}) - -IMAGE := $(shell cat ${REPODIR}/testdata/docker/IMAGE) -VERSION := $(shell cat ${REPODIR}/testdata/docker/VERSION) - -# clang <8 doesn't tag relocs properly (STT_NOTYPE) -# clang 9 is the first version emitting BTF -TARGETS := \ - testdata/loader-clang-7 \ - testdata/loader-clang-9 \ - testdata/loader-$(CLANG) \ - testdata/invalid_map \ - testdata/raw_tracepoint \ - testdata/invalid_map_static \ - testdata/initialized_btf_map \ - testdata/strings \ - internal/btf/testdata/relocs - -.PHONY: all clean docker-all docker-shell - -.DEFAULT_TARGET = docker-all - -# Build all ELF binaries using a Dockerized LLVM toolchain. -docker-all: - docker run --rm --user "${UIDGID}" \ - -v "${REPODIR}":/ebpf -w /ebpf --env MAKEFLAGS \ - --env CFLAGS="-fdebug-prefix-map=/ebpf=." \ - "${IMAGE}:${VERSION}" \ - make all - -# (debug) Drop the user into a shell inside the Docker container as root. -docker-shell: - docker run --rm -ti \ - -v "${REPODIR}":/ebpf -w /ebpf \ - "${IMAGE}:${VERSION}" - -clean: - -$(RM) testdata/*.elf - -$(RM) internal/btf/testdata/*.elf - -all: $(addsuffix -el.elf,$(TARGETS)) $(addsuffix -eb.elf,$(TARGETS)) - ln -srf testdata/loader-$(CLANG)-el.elf testdata/loader-el.elf - ln -srf testdata/loader-$(CLANG)-eb.elf testdata/loader-eb.elf - -testdata/loader-%-el.elf: testdata/loader.c - $* $(CFLAGS) -mlittle-endian -c $< -o $@ - -testdata/loader-%-eb.elf: testdata/loader.c - $* $(CFLAGS) -mbig-endian -c $< -o $@ - -%-el.elf: %.c - $(CLANG) $(CFLAGS) -mlittle-endian -c $< -o $@ - -%-eb.elf : %.c - $(CLANG) $(CFLAGS) -mbig-endian -c $< -o $@ - -# Usage: make VMLINUX=/path/to/vmlinux vmlinux-btf -.PHONY: vmlinux-btf -vmlinux-btf: internal/btf/testdata/vmlinux-btf.gz -internal/btf/testdata/vmlinux-btf.gz: $(VMLINUX) - objcopy --dump-section .BTF=/dev/stdout "$<" /dev/null | gzip > "$@" diff --git a/src/runtime/vendor/github.com/cilium/ebpf/README.md b/src/runtime/vendor/github.com/cilium/ebpf/README.md deleted file mode 100644 index 76c3c303bb..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/README.md +++ /dev/null @@ -1,62 +0,0 @@ -# eBPF - -[![PkgGoDev](https://pkg.go.dev/badge/github.com/cilium/ebpf)](https://pkg.go.dev/github.com/cilium/ebpf) - -eBPF is a pure Go library that provides utilities for loading, compiling, and -debugging eBPF programs. It has minimal external dependencies and is intended to -be used in long running processes. - -* [asm](https://pkg.go.dev/github.com/cilium/ebpf/asm) contains a basic - assembler -* [link](https://pkg.go.dev/github.com/cilium/ebpf/link) allows attaching eBPF - to various hooks -* [perf](https://pkg.go.dev/github.com/cilium/ebpf/perf) allows reading from a - `PERF_EVENT_ARRAY` -* [cmd/bpf2go](https://pkg.go.dev/github.com/cilium/ebpf/cmd/bpf2go) allows - compiling and embedding eBPF programs in Go code - -The library is maintained by [Cloudflare](https://www.cloudflare.com) and -[Cilium](https://www.cilium.io). Feel free to -[join](https://cilium.herokuapp.com/) the -[#libbpf-go](https://cilium.slack.com/messages/libbpf-go) channel on Slack. - -## Current status - -The package is production ready, but **the API is explicitly unstable right -now**. Expect to update your code if you want to follow along. - -## Getting Started - -A small collection of Go and eBPF programs that serve as examples for building -your own tools can be found under [examples/](examples/). - -Contributions are highly encouraged, as they highlight certain use cases of -eBPF and the library, and help shape the future of the project. - -## Requirements - -* A version of Go that is [supported by - upstream](https://golang.org/doc/devel/release.html#policy) -* Linux 4.9, 4.19 or 5.4 (versions in-between should work, but are not tested) - -## Useful resources - -* [eBPF.io](https://ebpf.io) (recommended) -* [Cilium eBPF documentation](https://docs.cilium.io/en/latest/bpf/#bpf-guide) - (recommended) -* [Linux documentation on - BPF](https://www.kernel.org/doc/html/latest/networking/filter.html) -* [eBPF features by Linux - version](https://github.com/iovisor/bcc/blob/master/docs/kernel-versions.md) - -## Regenerating Testdata - -Run `make` in the root of this repository to rebuild testdata in all -subpackages. This requires Docker, as it relies on a standardized build -environment to keep the build output stable. - -The toolchain image build files are kept in [testdata/docker/](testdata/docker/). - -## License - -MIT diff --git a/src/runtime/vendor/github.com/cilium/ebpf/asm/alu.go b/src/runtime/vendor/github.com/cilium/ebpf/asm/alu.go deleted file mode 100644 index 70ccc4d151..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/asm/alu.go +++ /dev/null @@ -1,149 +0,0 @@ -package asm - -//go:generate stringer -output alu_string.go -type=Source,Endianness,ALUOp - -// Source of ALU / ALU64 / Branch operations -// -// msb lsb -// +----+-+---+ -// |op |S|cls| -// +----+-+---+ -type Source uint8 - -const sourceMask OpCode = 0x08 - -// Source bitmask -const ( - // InvalidSource is returned by getters when invoked - // on non ALU / branch OpCodes. - InvalidSource Source = 0xff - // ImmSource src is from constant - ImmSource Source = 0x00 - // RegSource src is from register - RegSource Source = 0x08 -) - -// The Endianness of a byte swap instruction. -type Endianness uint8 - -const endianMask = sourceMask - -// Endian flags -const ( - InvalidEndian Endianness = 0xff - // Convert to little endian - LE Endianness = 0x00 - // Convert to big endian - BE Endianness = 0x08 -) - -// ALUOp are ALU / ALU64 operations -// -// msb lsb -// +----+-+---+ -// |OP |s|cls| -// +----+-+---+ -type ALUOp uint8 - -const aluMask OpCode = 0xf0 - -const ( - // InvalidALUOp is returned by getters when invoked - // on non ALU OpCodes - InvalidALUOp ALUOp = 0xff - // Add - addition - Add ALUOp = 0x00 - // Sub - subtraction - Sub ALUOp = 0x10 - // Mul - multiplication - Mul ALUOp = 0x20 - // Div - division - Div ALUOp = 0x30 - // Or - bitwise or - Or ALUOp = 0x40 - // And - bitwise and - And ALUOp = 0x50 - // LSh - bitwise shift left - LSh ALUOp = 0x60 - // RSh - bitwise shift right - RSh ALUOp = 0x70 - // Neg - sign/unsign signing bit - Neg ALUOp = 0x80 - // Mod - modulo - Mod ALUOp = 0x90 - // Xor - bitwise xor - Xor ALUOp = 0xa0 - // Mov - move value from one place to another - Mov ALUOp = 0xb0 - // ArSh - arithmatic shift - ArSh ALUOp = 0xc0 - // Swap - endian conversions - Swap ALUOp = 0xd0 -) - -// HostTo converts from host to another endianness. -func HostTo(endian Endianness, dst Register, size Size) Instruction { - var imm int64 - switch size { - case Half: - imm = 16 - case Word: - imm = 32 - case DWord: - imm = 64 - default: - return Instruction{OpCode: InvalidOpCode} - } - - return Instruction{ - OpCode: OpCode(ALUClass).SetALUOp(Swap).SetSource(Source(endian)), - Dst: dst, - Constant: imm, - } -} - -// Op returns the OpCode for an ALU operation with a given source. -func (op ALUOp) Op(source Source) OpCode { - return OpCode(ALU64Class).SetALUOp(op).SetSource(source) -} - -// Reg emits `dst (op) src`. -func (op ALUOp) Reg(dst, src Register) Instruction { - return Instruction{ - OpCode: op.Op(RegSource), - Dst: dst, - Src: src, - } -} - -// Imm emits `dst (op) value`. -func (op ALUOp) Imm(dst Register, value int32) Instruction { - return Instruction{ - OpCode: op.Op(ImmSource), - Dst: dst, - Constant: int64(value), - } -} - -// Op32 returns the OpCode for a 32-bit ALU operation with a given source. -func (op ALUOp) Op32(source Source) OpCode { - return OpCode(ALUClass).SetALUOp(op).SetSource(source) -} - -// Reg32 emits `dst (op) src`, zeroing the upper 32 bit of dst. -func (op ALUOp) Reg32(dst, src Register) Instruction { - return Instruction{ - OpCode: op.Op32(RegSource), - Dst: dst, - Src: src, - } -} - -// Imm32 emits `dst (op) value`, zeroing the upper 32 bit of dst. -func (op ALUOp) Imm32(dst Register, value int32) Instruction { - return Instruction{ - OpCode: op.Op32(ImmSource), - Dst: dst, - Constant: int64(value), - } -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/asm/alu_string.go b/src/runtime/vendor/github.com/cilium/ebpf/asm/alu_string.go deleted file mode 100644 index 72d3fe6292..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/asm/alu_string.go +++ /dev/null @@ -1,107 +0,0 @@ -// Code generated by "stringer -output alu_string.go -type=Source,Endianness,ALUOp"; DO NOT EDIT. - -package asm - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[InvalidSource-255] - _ = x[ImmSource-0] - _ = x[RegSource-8] -} - -const ( - _Source_name_0 = "ImmSource" - _Source_name_1 = "RegSource" - _Source_name_2 = "InvalidSource" -) - -func (i Source) String() string { - switch { - case i == 0: - return _Source_name_0 - case i == 8: - return _Source_name_1 - case i == 255: - return _Source_name_2 - default: - return "Source(" + strconv.FormatInt(int64(i), 10) + ")" - } -} -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[InvalidEndian-255] - _ = x[LE-0] - _ = x[BE-8] -} - -const ( - _Endianness_name_0 = "LE" - _Endianness_name_1 = "BE" - _Endianness_name_2 = "InvalidEndian" -) - -func (i Endianness) String() string { - switch { - case i == 0: - return _Endianness_name_0 - case i == 8: - return _Endianness_name_1 - case i == 255: - return _Endianness_name_2 - default: - return "Endianness(" + strconv.FormatInt(int64(i), 10) + ")" - } -} -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[InvalidALUOp-255] - _ = x[Add-0] - _ = x[Sub-16] - _ = x[Mul-32] - _ = x[Div-48] - _ = x[Or-64] - _ = x[And-80] - _ = x[LSh-96] - _ = x[RSh-112] - _ = x[Neg-128] - _ = x[Mod-144] - _ = x[Xor-160] - _ = x[Mov-176] - _ = x[ArSh-192] - _ = x[Swap-208] -} - -const _ALUOp_name = "AddSubMulDivOrAndLShRShNegModXorMovArShSwapInvalidALUOp" - -var _ALUOp_map = map[ALUOp]string{ - 0: _ALUOp_name[0:3], - 16: _ALUOp_name[3:6], - 32: _ALUOp_name[6:9], - 48: _ALUOp_name[9:12], - 64: _ALUOp_name[12:14], - 80: _ALUOp_name[14:17], - 96: _ALUOp_name[17:20], - 112: _ALUOp_name[20:23], - 128: _ALUOp_name[23:26], - 144: _ALUOp_name[26:29], - 160: _ALUOp_name[29:32], - 176: _ALUOp_name[32:35], - 192: _ALUOp_name[35:39], - 208: _ALUOp_name[39:43], - 255: _ALUOp_name[43:55], -} - -func (i ALUOp) String() string { - if str, ok := _ALUOp_map[i]; ok { - return str - } - return "ALUOp(" + strconv.FormatInt(int64(i), 10) + ")" -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/asm/doc.go b/src/runtime/vendor/github.com/cilium/ebpf/asm/doc.go deleted file mode 100644 index 7031bdc276..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/asm/doc.go +++ /dev/null @@ -1,2 +0,0 @@ -// Package asm is an assembler for eBPF bytecode. -package asm diff --git a/src/runtime/vendor/github.com/cilium/ebpf/asm/func.go b/src/runtime/vendor/github.com/cilium/ebpf/asm/func.go deleted file mode 100644 index aee2c7ac81..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/asm/func.go +++ /dev/null @@ -1,195 +0,0 @@ -package asm - -//go:generate stringer -output func_string.go -type=BuiltinFunc - -// BuiltinFunc is a built-in eBPF function. -type BuiltinFunc int32 - -// eBPF built-in functions -// -// You can regenerate this list using the following gawk script: -// -// /FN\(.+\),/ { -// match($1, /\((.+)\)/, r) -// split(r[1], p, "_") -// printf "Fn" -// for (i in p) { -// printf "%s%s", toupper(substr(p[i], 1, 1)), substr(p[i], 2) -// } -// print "" -// } -// -// The script expects include/uapi/linux/bpf.h as it's input. -const ( - FnUnspec BuiltinFunc = iota - FnMapLookupElem - FnMapUpdateElem - FnMapDeleteElem - FnProbeRead - FnKtimeGetNs - FnTracePrintk - FnGetPrandomU32 - FnGetSmpProcessorId - FnSkbStoreBytes - FnL3CsumReplace - FnL4CsumReplace - FnTailCall - FnCloneRedirect - FnGetCurrentPidTgid - FnGetCurrentUidGid - FnGetCurrentComm - FnGetCgroupClassid - FnSkbVlanPush - FnSkbVlanPop - FnSkbGetTunnelKey - FnSkbSetTunnelKey - FnPerfEventRead - FnRedirect - FnGetRouteRealm - FnPerfEventOutput - FnSkbLoadBytes - FnGetStackid - FnCsumDiff - FnSkbGetTunnelOpt - FnSkbSetTunnelOpt - FnSkbChangeProto - FnSkbChangeType - FnSkbUnderCgroup - FnGetHashRecalc - FnGetCurrentTask - FnProbeWriteUser - FnCurrentTaskUnderCgroup - FnSkbChangeTail - FnSkbPullData - FnCsumUpdate - FnSetHashInvalid - FnGetNumaNodeId - FnSkbChangeHead - FnXdpAdjustHead - FnProbeReadStr - FnGetSocketCookie - FnGetSocketUid - FnSetHash - FnSetsockopt - FnSkbAdjustRoom - FnRedirectMap - FnSkRedirectMap - FnSockMapUpdate - FnXdpAdjustMeta - FnPerfEventReadValue - FnPerfProgReadValue - FnGetsockopt - FnOverrideReturn - FnSockOpsCbFlagsSet - FnMsgRedirectMap - FnMsgApplyBytes - FnMsgCorkBytes - FnMsgPullData - FnBind - FnXdpAdjustTail - FnSkbGetXfrmState - FnGetStack - FnSkbLoadBytesRelative - FnFibLookup - FnSockHashUpdate - FnMsgRedirectHash - FnSkRedirectHash - FnLwtPushEncap - FnLwtSeg6StoreBytes - FnLwtSeg6AdjustSrh - FnLwtSeg6Action - FnRcRepeat - FnRcKeydown - FnSkbCgroupId - FnGetCurrentCgroupId - FnGetLocalStorage - FnSkSelectReuseport - FnSkbAncestorCgroupId - FnSkLookupTcp - FnSkLookupUdp - FnSkRelease - FnMapPushElem - FnMapPopElem - FnMapPeekElem - FnMsgPushData - FnMsgPopData - FnRcPointerRel - FnSpinLock - FnSpinUnlock - FnSkFullsock - FnTcpSock - FnSkbEcnSetCe - FnGetListenerSock - FnSkcLookupTcp - FnTcpCheckSyncookie - FnSysctlGetName - FnSysctlGetCurrentValue - FnSysctlGetNewValue - FnSysctlSetNewValue - FnStrtol - FnStrtoul - FnSkStorageGet - FnSkStorageDelete - FnSendSignal - FnTcpGenSyncookie - FnSkbOutput - FnProbeReadUser - FnProbeReadKernel - FnProbeReadUserStr - FnProbeReadKernelStr - FnTcpSendAck - FnSendSignalThread - FnJiffies64 - FnReadBranchRecords - FnGetNsCurrentPidTgid - FnXdpOutput - FnGetNetnsCookie - FnGetCurrentAncestorCgroupId - FnSkAssign - FnKtimeGetBootNs - FnSeqPrintf - FnSeqWrite - FnSkCgroupId - FnSkAncestorCgroupId - FnRingbufOutput - FnRingbufReserve - FnRingbufSubmit - FnRingbufDiscard - FnRingbufQuery - FnCsumLevel - FnSkcToTcp6Sock - FnSkcToTcpSock - FnSkcToTcpTimewaitSock - FnSkcToTcpRequestSock - FnSkcToUdp6Sock - FnGetTaskStack - FnLoadHdrOpt - FnStoreHdrOpt - FnReserveHdrOpt - FnInodeStorageGet - FnInodeStorageDelete - FnDPath - FnCopyFromUser - FnSnprintfBtf - FnSeqPrintfBtf - FnSkbCgroupClassid - FnRedirectNeigh - FnPerCpuPtr - FnThisCpuPtr - FnRedirectPeer - FnTaskStorageGet - FnTaskStorageDelete - FnGetCurrentTaskBtf - FnBprmOptsSet - FnKtimeGetCoarseNs - FnImaInodeHash - FnSockFromFile -) - -// Call emits a function call. -func (fn BuiltinFunc) Call() Instruction { - return Instruction{ - OpCode: OpCode(JumpClass).SetJumpOp(Call), - Constant: int64(fn), - } -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/asm/func_string.go b/src/runtime/vendor/github.com/cilium/ebpf/asm/func_string.go deleted file mode 100644 index a712c5da8a..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/asm/func_string.go +++ /dev/null @@ -1,185 +0,0 @@ -// Code generated by "stringer -output func_string.go -type=BuiltinFunc"; DO NOT EDIT. - -package asm - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[FnUnspec-0] - _ = x[FnMapLookupElem-1] - _ = x[FnMapUpdateElem-2] - _ = x[FnMapDeleteElem-3] - _ = x[FnProbeRead-4] - _ = x[FnKtimeGetNs-5] - _ = x[FnTracePrintk-6] - _ = x[FnGetPrandomU32-7] - _ = x[FnGetSmpProcessorId-8] - _ = x[FnSkbStoreBytes-9] - _ = x[FnL3CsumReplace-10] - _ = x[FnL4CsumReplace-11] - _ = x[FnTailCall-12] - _ = x[FnCloneRedirect-13] - _ = x[FnGetCurrentPidTgid-14] - _ = x[FnGetCurrentUidGid-15] - _ = x[FnGetCurrentComm-16] - _ = x[FnGetCgroupClassid-17] - _ = x[FnSkbVlanPush-18] - _ = x[FnSkbVlanPop-19] - _ = x[FnSkbGetTunnelKey-20] - _ = x[FnSkbSetTunnelKey-21] - _ = x[FnPerfEventRead-22] - _ = x[FnRedirect-23] - _ = x[FnGetRouteRealm-24] - _ = x[FnPerfEventOutput-25] - _ = x[FnSkbLoadBytes-26] - _ = x[FnGetStackid-27] - _ = x[FnCsumDiff-28] - _ = x[FnSkbGetTunnelOpt-29] - _ = x[FnSkbSetTunnelOpt-30] - _ = x[FnSkbChangeProto-31] - _ = x[FnSkbChangeType-32] - _ = x[FnSkbUnderCgroup-33] - _ = x[FnGetHashRecalc-34] - _ = x[FnGetCurrentTask-35] - _ = x[FnProbeWriteUser-36] - _ = x[FnCurrentTaskUnderCgroup-37] - _ = x[FnSkbChangeTail-38] - _ = x[FnSkbPullData-39] - _ = x[FnCsumUpdate-40] - _ = x[FnSetHashInvalid-41] - _ = x[FnGetNumaNodeId-42] - _ = x[FnSkbChangeHead-43] - _ = x[FnXdpAdjustHead-44] - _ = x[FnProbeReadStr-45] - _ = x[FnGetSocketCookie-46] - _ = x[FnGetSocketUid-47] - _ = x[FnSetHash-48] - _ = x[FnSetsockopt-49] - _ = x[FnSkbAdjustRoom-50] - _ = x[FnRedirectMap-51] - _ = x[FnSkRedirectMap-52] - _ = x[FnSockMapUpdate-53] - _ = x[FnXdpAdjustMeta-54] - _ = x[FnPerfEventReadValue-55] - _ = x[FnPerfProgReadValue-56] - _ = x[FnGetsockopt-57] - _ = x[FnOverrideReturn-58] - _ = x[FnSockOpsCbFlagsSet-59] - _ = x[FnMsgRedirectMap-60] - _ = x[FnMsgApplyBytes-61] - _ = x[FnMsgCorkBytes-62] - _ = x[FnMsgPullData-63] - _ = x[FnBind-64] - _ = x[FnXdpAdjustTail-65] - _ = x[FnSkbGetXfrmState-66] - _ = x[FnGetStack-67] - _ = x[FnSkbLoadBytesRelative-68] - _ = x[FnFibLookup-69] - _ = x[FnSockHashUpdate-70] - _ = x[FnMsgRedirectHash-71] - _ = x[FnSkRedirectHash-72] - _ = x[FnLwtPushEncap-73] - _ = x[FnLwtSeg6StoreBytes-74] - _ = x[FnLwtSeg6AdjustSrh-75] - _ = x[FnLwtSeg6Action-76] - _ = x[FnRcRepeat-77] - _ = x[FnRcKeydown-78] - _ = x[FnSkbCgroupId-79] - _ = x[FnGetCurrentCgroupId-80] - _ = x[FnGetLocalStorage-81] - _ = x[FnSkSelectReuseport-82] - _ = x[FnSkbAncestorCgroupId-83] - _ = x[FnSkLookupTcp-84] - _ = x[FnSkLookupUdp-85] - _ = x[FnSkRelease-86] - _ = x[FnMapPushElem-87] - _ = x[FnMapPopElem-88] - _ = x[FnMapPeekElem-89] - _ = x[FnMsgPushData-90] - _ = x[FnMsgPopData-91] - _ = x[FnRcPointerRel-92] - _ = x[FnSpinLock-93] - _ = x[FnSpinUnlock-94] - _ = x[FnSkFullsock-95] - _ = x[FnTcpSock-96] - _ = x[FnSkbEcnSetCe-97] - _ = x[FnGetListenerSock-98] - _ = x[FnSkcLookupTcp-99] - _ = x[FnTcpCheckSyncookie-100] - _ = x[FnSysctlGetName-101] - _ = x[FnSysctlGetCurrentValue-102] - _ = x[FnSysctlGetNewValue-103] - _ = x[FnSysctlSetNewValue-104] - _ = x[FnStrtol-105] - _ = x[FnStrtoul-106] - _ = x[FnSkStorageGet-107] - _ = x[FnSkStorageDelete-108] - _ = x[FnSendSignal-109] - _ = x[FnTcpGenSyncookie-110] - _ = x[FnSkbOutput-111] - _ = x[FnProbeReadUser-112] - _ = x[FnProbeReadKernel-113] - _ = x[FnProbeReadUserStr-114] - _ = x[FnProbeReadKernelStr-115] - _ = x[FnTcpSendAck-116] - _ = x[FnSendSignalThread-117] - _ = x[FnJiffies64-118] - _ = x[FnReadBranchRecords-119] - _ = x[FnGetNsCurrentPidTgid-120] - _ = x[FnXdpOutput-121] - _ = x[FnGetNetnsCookie-122] - _ = x[FnGetCurrentAncestorCgroupId-123] - _ = x[FnSkAssign-124] - _ = x[FnKtimeGetBootNs-125] - _ = x[FnSeqPrintf-126] - _ = x[FnSeqWrite-127] - _ = x[FnSkCgroupId-128] - _ = x[FnSkAncestorCgroupId-129] - _ = x[FnRingbufOutput-130] - _ = x[FnRingbufReserve-131] - _ = x[FnRingbufSubmit-132] - _ = x[FnRingbufDiscard-133] - _ = x[FnRingbufQuery-134] - _ = x[FnCsumLevel-135] - _ = x[FnSkcToTcp6Sock-136] - _ = x[FnSkcToTcpSock-137] - _ = x[FnSkcToTcpTimewaitSock-138] - _ = x[FnSkcToTcpRequestSock-139] - _ = x[FnSkcToUdp6Sock-140] - _ = x[FnGetTaskStack-141] - _ = x[FnLoadHdrOpt-142] - _ = x[FnStoreHdrOpt-143] - _ = x[FnReserveHdrOpt-144] - _ = x[FnInodeStorageGet-145] - _ = x[FnInodeStorageDelete-146] - _ = x[FnDPath-147] - _ = x[FnCopyFromUser-148] - _ = x[FnSnprintfBtf-149] - _ = x[FnSeqPrintfBtf-150] - _ = x[FnSkbCgroupClassid-151] - _ = x[FnRedirectNeigh-152] - _ = x[FnPerCpuPtr-153] - _ = x[FnThisCpuPtr-154] - _ = x[FnRedirectPeer-155] - _ = x[FnTaskStorageGet-156] - _ = x[FnTaskStorageDelete-157] - _ = x[FnGetCurrentTaskBtf-158] - _ = x[FnBprmOptsSet-159] - _ = x[FnKtimeGetCoarseNs-160] - _ = x[FnImaInodeHash-161] - _ = x[FnSockFromFile-162] -} - -const _BuiltinFunc_name = "FnUnspecFnMapLookupElemFnMapUpdateElemFnMapDeleteElemFnProbeReadFnKtimeGetNsFnTracePrintkFnGetPrandomU32FnGetSmpProcessorIdFnSkbStoreBytesFnL3CsumReplaceFnL4CsumReplaceFnTailCallFnCloneRedirectFnGetCurrentPidTgidFnGetCurrentUidGidFnGetCurrentCommFnGetCgroupClassidFnSkbVlanPushFnSkbVlanPopFnSkbGetTunnelKeyFnSkbSetTunnelKeyFnPerfEventReadFnRedirectFnGetRouteRealmFnPerfEventOutputFnSkbLoadBytesFnGetStackidFnCsumDiffFnSkbGetTunnelOptFnSkbSetTunnelOptFnSkbChangeProtoFnSkbChangeTypeFnSkbUnderCgroupFnGetHashRecalcFnGetCurrentTaskFnProbeWriteUserFnCurrentTaskUnderCgroupFnSkbChangeTailFnSkbPullDataFnCsumUpdateFnSetHashInvalidFnGetNumaNodeIdFnSkbChangeHeadFnXdpAdjustHeadFnProbeReadStrFnGetSocketCookieFnGetSocketUidFnSetHashFnSetsockoptFnSkbAdjustRoomFnRedirectMapFnSkRedirectMapFnSockMapUpdateFnXdpAdjustMetaFnPerfEventReadValueFnPerfProgReadValueFnGetsockoptFnOverrideReturnFnSockOpsCbFlagsSetFnMsgRedirectMapFnMsgApplyBytesFnMsgCorkBytesFnMsgPullDataFnBindFnXdpAdjustTailFnSkbGetXfrmStateFnGetStackFnSkbLoadBytesRelativeFnFibLookupFnSockHashUpdateFnMsgRedirectHashFnSkRedirectHashFnLwtPushEncapFnLwtSeg6StoreBytesFnLwtSeg6AdjustSrhFnLwtSeg6ActionFnRcRepeatFnRcKeydownFnSkbCgroupIdFnGetCurrentCgroupIdFnGetLocalStorageFnSkSelectReuseportFnSkbAncestorCgroupIdFnSkLookupTcpFnSkLookupUdpFnSkReleaseFnMapPushElemFnMapPopElemFnMapPeekElemFnMsgPushDataFnMsgPopDataFnRcPointerRelFnSpinLockFnSpinUnlockFnSkFullsockFnTcpSockFnSkbEcnSetCeFnGetListenerSockFnSkcLookupTcpFnTcpCheckSyncookieFnSysctlGetNameFnSysctlGetCurrentValueFnSysctlGetNewValueFnSysctlSetNewValueFnStrtolFnStrtoulFnSkStorageGetFnSkStorageDeleteFnSendSignalFnTcpGenSyncookieFnSkbOutputFnProbeReadUserFnProbeReadKernelFnProbeReadUserStrFnProbeReadKernelStrFnTcpSendAckFnSendSignalThreadFnJiffies64FnReadBranchRecordsFnGetNsCurrentPidTgidFnXdpOutputFnGetNetnsCookieFnGetCurrentAncestorCgroupIdFnSkAssignFnKtimeGetBootNsFnSeqPrintfFnSeqWriteFnSkCgroupIdFnSkAncestorCgroupIdFnRingbufOutputFnRingbufReserveFnRingbufSubmitFnRingbufDiscardFnRingbufQueryFnCsumLevelFnSkcToTcp6SockFnSkcToTcpSockFnSkcToTcpTimewaitSockFnSkcToTcpRequestSockFnSkcToUdp6SockFnGetTaskStackFnLoadHdrOptFnStoreHdrOptFnReserveHdrOptFnInodeStorageGetFnInodeStorageDeleteFnDPathFnCopyFromUserFnSnprintfBtfFnSeqPrintfBtfFnSkbCgroupClassidFnRedirectNeighFnPerCpuPtrFnThisCpuPtrFnRedirectPeerFnTaskStorageGetFnTaskStorageDeleteFnGetCurrentTaskBtfFnBprmOptsSetFnKtimeGetCoarseNsFnImaInodeHashFnSockFromFile" - -var _BuiltinFunc_index = [...]uint16{0, 8, 23, 38, 53, 64, 76, 89, 104, 123, 138, 153, 168, 178, 193, 212, 230, 246, 264, 277, 289, 306, 323, 338, 348, 363, 380, 394, 406, 416, 433, 450, 466, 481, 497, 512, 528, 544, 568, 583, 596, 608, 624, 639, 654, 669, 683, 700, 714, 723, 735, 750, 763, 778, 793, 808, 828, 847, 859, 875, 894, 910, 925, 939, 952, 958, 973, 990, 1000, 1022, 1033, 1049, 1066, 1082, 1096, 1115, 1133, 1148, 1158, 1169, 1182, 1202, 1219, 1238, 1259, 1272, 1285, 1296, 1309, 1321, 1334, 1347, 1359, 1373, 1383, 1395, 1407, 1416, 1429, 1446, 1460, 1479, 1494, 1517, 1536, 1555, 1563, 1572, 1586, 1603, 1615, 1632, 1643, 1658, 1675, 1693, 1713, 1725, 1743, 1754, 1773, 1794, 1805, 1821, 1849, 1859, 1875, 1886, 1896, 1908, 1928, 1943, 1959, 1974, 1990, 2004, 2015, 2030, 2044, 2066, 2087, 2102, 2116, 2128, 2141, 2156, 2173, 2193, 2200, 2214, 2227, 2241, 2259, 2274, 2285, 2297, 2311, 2327, 2346, 2365, 2378, 2396, 2410, 2424} - -func (i BuiltinFunc) String() string { - if i < 0 || i >= BuiltinFunc(len(_BuiltinFunc_index)-1) { - return "BuiltinFunc(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _BuiltinFunc_name[_BuiltinFunc_index[i]:_BuiltinFunc_index[i+1]] -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/asm/instruction.go b/src/runtime/vendor/github.com/cilium/ebpf/asm/instruction.go deleted file mode 100644 index e7ac0109e2..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/asm/instruction.go +++ /dev/null @@ -1,506 +0,0 @@ -package asm - -import ( - "crypto/sha1" - "encoding/binary" - "encoding/hex" - "errors" - "fmt" - "io" - "math" - "strings" - - "github.com/cilium/ebpf/internal/unix" -) - -// InstructionSize is the size of a BPF instruction in bytes -const InstructionSize = 8 - -// RawInstructionOffset is an offset in units of raw BPF instructions. -type RawInstructionOffset uint64 - -// Bytes returns the offset of an instruction in bytes. -func (rio RawInstructionOffset) Bytes() uint64 { - return uint64(rio) * InstructionSize -} - -// Instruction is a single eBPF instruction. -type Instruction struct { - OpCode OpCode - Dst Register - Src Register - Offset int16 - Constant int64 - Reference string - Symbol string -} - -// Sym creates a symbol. -func (ins Instruction) Sym(name string) Instruction { - ins.Symbol = name - return ins -} - -// Unmarshal decodes a BPF instruction. -func (ins *Instruction) Unmarshal(r io.Reader, bo binary.ByteOrder) (uint64, error) { - var bi bpfInstruction - err := binary.Read(r, bo, &bi) - if err != nil { - return 0, err - } - - ins.OpCode = bi.OpCode - ins.Offset = bi.Offset - ins.Constant = int64(bi.Constant) - ins.Dst, ins.Src, err = bi.Registers.Unmarshal(bo) - if err != nil { - return 0, fmt.Errorf("can't unmarshal registers: %s", err) - } - - if !bi.OpCode.IsDWordLoad() { - return InstructionSize, nil - } - - var bi2 bpfInstruction - if err := binary.Read(r, bo, &bi2); err != nil { - // No Wrap, to avoid io.EOF clash - return 0, errors.New("64bit immediate is missing second half") - } - if bi2.OpCode != 0 || bi2.Offset != 0 || bi2.Registers != 0 { - return 0, errors.New("64bit immediate has non-zero fields") - } - ins.Constant = int64(uint64(uint32(bi2.Constant))<<32 | uint64(uint32(bi.Constant))) - - return 2 * InstructionSize, nil -} - -// Marshal encodes a BPF instruction. -func (ins Instruction) Marshal(w io.Writer, bo binary.ByteOrder) (uint64, error) { - if ins.OpCode == InvalidOpCode { - return 0, errors.New("invalid opcode") - } - - isDWordLoad := ins.OpCode.IsDWordLoad() - - cons := int32(ins.Constant) - if isDWordLoad { - // Encode least significant 32bit first for 64bit operations. - cons = int32(uint32(ins.Constant)) - } - - regs, err := newBPFRegisters(ins.Dst, ins.Src, bo) - if err != nil { - return 0, fmt.Errorf("can't marshal registers: %s", err) - } - - bpfi := bpfInstruction{ - ins.OpCode, - regs, - ins.Offset, - cons, - } - - if err := binary.Write(w, bo, &bpfi); err != nil { - return 0, err - } - - if !isDWordLoad { - return InstructionSize, nil - } - - bpfi = bpfInstruction{ - Constant: int32(ins.Constant >> 32), - } - - if err := binary.Write(w, bo, &bpfi); err != nil { - return 0, err - } - - return 2 * InstructionSize, nil -} - -// RewriteMapPtr changes an instruction to use a new map fd. -// -// Returns an error if the instruction doesn't load a map. -func (ins *Instruction) RewriteMapPtr(fd int) error { - if !ins.OpCode.IsDWordLoad() { - return fmt.Errorf("%s is not a 64 bit load", ins.OpCode) - } - - if ins.Src != PseudoMapFD && ins.Src != PseudoMapValue { - return errors.New("not a load from a map") - } - - // Preserve the offset value for direct map loads. - offset := uint64(ins.Constant) & (math.MaxUint32 << 32) - rawFd := uint64(uint32(fd)) - ins.Constant = int64(offset | rawFd) - return nil -} - -// MapPtr returns the map fd for this instruction. -// -// The result is undefined if the instruction is not a load from a map, -// see IsLoadFromMap. -func (ins *Instruction) MapPtr() int { - return int(int32(uint64(ins.Constant) & math.MaxUint32)) -} - -// RewriteMapOffset changes the offset of a direct load from a map. -// -// Returns an error if the instruction is not a direct load. -func (ins *Instruction) RewriteMapOffset(offset uint32) error { - if !ins.OpCode.IsDWordLoad() { - return fmt.Errorf("%s is not a 64 bit load", ins.OpCode) - } - - if ins.Src != PseudoMapValue { - return errors.New("not a direct load from a map") - } - - fd := uint64(ins.Constant) & math.MaxUint32 - ins.Constant = int64(uint64(offset)<<32 | fd) - return nil -} - -func (ins *Instruction) mapOffset() uint32 { - return uint32(uint64(ins.Constant) >> 32) -} - -// IsLoadFromMap returns true if the instruction loads from a map. -// -// This covers both loading the map pointer and direct map value loads. -func (ins *Instruction) IsLoadFromMap() bool { - return ins.OpCode == LoadImmOp(DWord) && (ins.Src == PseudoMapFD || ins.Src == PseudoMapValue) -} - -// IsFunctionCall returns true if the instruction calls another BPF function. -// -// This is not the same thing as a BPF helper call. -func (ins *Instruction) IsFunctionCall() bool { - return ins.OpCode.JumpOp() == Call && ins.Src == PseudoCall -} - -// IsConstantLoad returns true if the instruction loads a constant of the -// given size. -func (ins *Instruction) IsConstantLoad(size Size) bool { - return ins.OpCode == LoadImmOp(size) && ins.Src == R0 && ins.Offset == 0 -} - -// Format implements fmt.Formatter. -func (ins Instruction) Format(f fmt.State, c rune) { - if c != 'v' { - fmt.Fprintf(f, "{UNRECOGNIZED: %c}", c) - return - } - - op := ins.OpCode - - if op == InvalidOpCode { - fmt.Fprint(f, "INVALID") - return - } - - // Omit trailing space for Exit - if op.JumpOp() == Exit { - fmt.Fprint(f, op) - return - } - - if ins.IsLoadFromMap() { - fd := ins.MapPtr() - switch ins.Src { - case PseudoMapFD: - fmt.Fprintf(f, "LoadMapPtr dst: %s fd: %d", ins.Dst, fd) - - case PseudoMapValue: - fmt.Fprintf(f, "LoadMapValue dst: %s, fd: %d off: %d", ins.Dst, fd, ins.mapOffset()) - } - - goto ref - } - - fmt.Fprintf(f, "%v ", op) - switch cls := op.Class(); cls { - case LdClass, LdXClass, StClass, StXClass: - switch op.Mode() { - case ImmMode: - fmt.Fprintf(f, "dst: %s imm: %d", ins.Dst, ins.Constant) - case AbsMode: - fmt.Fprintf(f, "imm: %d", ins.Constant) - case IndMode: - fmt.Fprintf(f, "dst: %s src: %s imm: %d", ins.Dst, ins.Src, ins.Constant) - case MemMode: - fmt.Fprintf(f, "dst: %s src: %s off: %d imm: %d", ins.Dst, ins.Src, ins.Offset, ins.Constant) - case XAddMode: - fmt.Fprintf(f, "dst: %s src: %s", ins.Dst, ins.Src) - } - - case ALU64Class, ALUClass: - fmt.Fprintf(f, "dst: %s ", ins.Dst) - if op.ALUOp() == Swap || op.Source() == ImmSource { - fmt.Fprintf(f, "imm: %d", ins.Constant) - } else { - fmt.Fprintf(f, "src: %s", ins.Src) - } - - case JumpClass: - switch jop := op.JumpOp(); jop { - case Call: - if ins.Src == PseudoCall { - // bpf-to-bpf call - fmt.Fprint(f, ins.Constant) - } else { - fmt.Fprint(f, BuiltinFunc(ins.Constant)) - } - - default: - fmt.Fprintf(f, "dst: %s off: %d ", ins.Dst, ins.Offset) - if op.Source() == ImmSource { - fmt.Fprintf(f, "imm: %d", ins.Constant) - } else { - fmt.Fprintf(f, "src: %s", ins.Src) - } - } - } - -ref: - if ins.Reference != "" { - fmt.Fprintf(f, " <%s>", ins.Reference) - } -} - -// Instructions is an eBPF program. -type Instructions []Instruction - -func (insns Instructions) String() string { - return fmt.Sprint(insns) -} - -// RewriteMapPtr rewrites all loads of a specific map pointer to a new fd. -// -// Returns an error if the symbol isn't used, see IsUnreferencedSymbol. -func (insns Instructions) RewriteMapPtr(symbol string, fd int) error { - if symbol == "" { - return errors.New("empty symbol") - } - - found := false - for i := range insns { - ins := &insns[i] - if ins.Reference != symbol { - continue - } - - if err := ins.RewriteMapPtr(fd); err != nil { - return err - } - - found = true - } - - if !found { - return &unreferencedSymbolError{symbol} - } - - return nil -} - -// SymbolOffsets returns the set of symbols and their offset in -// the instructions. -func (insns Instructions) SymbolOffsets() (map[string]int, error) { - offsets := make(map[string]int) - - for i, ins := range insns { - if ins.Symbol == "" { - continue - } - - if _, ok := offsets[ins.Symbol]; ok { - return nil, fmt.Errorf("duplicate symbol %s", ins.Symbol) - } - - offsets[ins.Symbol] = i - } - - return offsets, nil -} - -// ReferenceOffsets returns the set of references and their offset in -// the instructions. -func (insns Instructions) ReferenceOffsets() map[string][]int { - offsets := make(map[string][]int) - - for i, ins := range insns { - if ins.Reference == "" { - continue - } - - offsets[ins.Reference] = append(offsets[ins.Reference], i) - } - - return offsets -} - -// Format implements fmt.Formatter. -// -// You can control indentation of symbols by -// specifying a width. Setting a precision controls the indentation of -// instructions. -// The default character is a tab, which can be overridden by specifying -// the ' ' space flag. -func (insns Instructions) Format(f fmt.State, c rune) { - if c != 's' && c != 'v' { - fmt.Fprintf(f, "{UNKNOWN FORMAT '%c'}", c) - return - } - - // Precision is better in this case, because it allows - // specifying 0 padding easily. - padding, ok := f.Precision() - if !ok { - padding = 1 - } - - indent := strings.Repeat("\t", padding) - if f.Flag(' ') { - indent = strings.Repeat(" ", padding) - } - - symPadding, ok := f.Width() - if !ok { - symPadding = padding - 1 - } - if symPadding < 0 { - symPadding = 0 - } - - symIndent := strings.Repeat("\t", symPadding) - if f.Flag(' ') { - symIndent = strings.Repeat(" ", symPadding) - } - - // Guess how many digits we need at most, by assuming that all instructions - // are double wide. - highestOffset := len(insns) * 2 - offsetWidth := int(math.Ceil(math.Log10(float64(highestOffset)))) - - iter := insns.Iterate() - for iter.Next() { - if iter.Ins.Symbol != "" { - fmt.Fprintf(f, "%s%s:\n", symIndent, iter.Ins.Symbol) - } - fmt.Fprintf(f, "%s%*d: %v\n", indent, offsetWidth, iter.Offset, iter.Ins) - } -} - -// Marshal encodes a BPF program into the kernel format. -func (insns Instructions) Marshal(w io.Writer, bo binary.ByteOrder) error { - for i, ins := range insns { - _, err := ins.Marshal(w, bo) - if err != nil { - return fmt.Errorf("instruction %d: %w", i, err) - } - } - return nil -} - -// Tag calculates the kernel tag for a series of instructions. -// -// It mirrors bpf_prog_calc_tag in the kernel and so can be compared -// to ProgramInfo.Tag to figure out whether a loaded program matches -// certain instructions. -func (insns Instructions) Tag(bo binary.ByteOrder) (string, error) { - h := sha1.New() - for i, ins := range insns { - if ins.IsLoadFromMap() { - ins.Constant = 0 - } - _, err := ins.Marshal(h, bo) - if err != nil { - return "", fmt.Errorf("instruction %d: %w", i, err) - } - } - return hex.EncodeToString(h.Sum(nil)[:unix.BPF_TAG_SIZE]), nil -} - -// Iterate allows iterating a BPF program while keeping track of -// various offsets. -// -// Modifying the instruction slice will lead to undefined behaviour. -func (insns Instructions) Iterate() *InstructionIterator { - return &InstructionIterator{insns: insns} -} - -// InstructionIterator iterates over a BPF program. -type InstructionIterator struct { - insns Instructions - // The instruction in question. - Ins *Instruction - // The index of the instruction in the original instruction slice. - Index int - // The offset of the instruction in raw BPF instructions. This accounts - // for double-wide instructions. - Offset RawInstructionOffset -} - -// Next returns true as long as there are any instructions remaining. -func (iter *InstructionIterator) Next() bool { - if len(iter.insns) == 0 { - return false - } - - if iter.Ins != nil { - iter.Index++ - iter.Offset += RawInstructionOffset(iter.Ins.OpCode.rawInstructions()) - } - iter.Ins = &iter.insns[0] - iter.insns = iter.insns[1:] - return true -} - -type bpfInstruction struct { - OpCode OpCode - Registers bpfRegisters - Offset int16 - Constant int32 -} - -type bpfRegisters uint8 - -func newBPFRegisters(dst, src Register, bo binary.ByteOrder) (bpfRegisters, error) { - switch bo { - case binary.LittleEndian: - return bpfRegisters((src << 4) | (dst & 0xF)), nil - case binary.BigEndian: - return bpfRegisters((dst << 4) | (src & 0xF)), nil - default: - return 0, fmt.Errorf("unrecognized ByteOrder %T", bo) - } -} - -func (r bpfRegisters) Unmarshal(bo binary.ByteOrder) (dst, src Register, err error) { - switch bo { - case binary.LittleEndian: - return Register(r & 0xF), Register(r >> 4), nil - case binary.BigEndian: - return Register(r >> 4), Register(r & 0xf), nil - default: - return 0, 0, fmt.Errorf("unrecognized ByteOrder %T", bo) - } -} - -type unreferencedSymbolError struct { - symbol string -} - -func (use *unreferencedSymbolError) Error() string { - return fmt.Sprintf("unreferenced symbol %s", use.symbol) -} - -// IsUnreferencedSymbol returns true if err was caused by -// an unreferenced symbol. -func IsUnreferencedSymbol(err error) bool { - _, ok := err.(*unreferencedSymbolError) - return ok -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/asm/jump.go b/src/runtime/vendor/github.com/cilium/ebpf/asm/jump.go deleted file mode 100644 index 7757179de6..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/asm/jump.go +++ /dev/null @@ -1,109 +0,0 @@ -package asm - -//go:generate stringer -output jump_string.go -type=JumpOp - -// JumpOp affect control flow. -// -// msb lsb -// +----+-+---+ -// |OP |s|cls| -// +----+-+---+ -type JumpOp uint8 - -const jumpMask OpCode = aluMask - -const ( - // InvalidJumpOp is returned by getters when invoked - // on non branch OpCodes - InvalidJumpOp JumpOp = 0xff - // Ja jumps by offset unconditionally - Ja JumpOp = 0x00 - // JEq jumps by offset if r == imm - JEq JumpOp = 0x10 - // JGT jumps by offset if r > imm - JGT JumpOp = 0x20 - // JGE jumps by offset if r >= imm - JGE JumpOp = 0x30 - // JSet jumps by offset if r & imm - JSet JumpOp = 0x40 - // JNE jumps by offset if r != imm - JNE JumpOp = 0x50 - // JSGT jumps by offset if signed r > signed imm - JSGT JumpOp = 0x60 - // JSGE jumps by offset if signed r >= signed imm - JSGE JumpOp = 0x70 - // Call builtin or user defined function from imm - Call JumpOp = 0x80 - // Exit ends execution, with value in r0 - Exit JumpOp = 0x90 - // JLT jumps by offset if r < imm - JLT JumpOp = 0xa0 - // JLE jumps by offset if r <= imm - JLE JumpOp = 0xb0 - // JSLT jumps by offset if signed r < signed imm - JSLT JumpOp = 0xc0 - // JSLE jumps by offset if signed r <= signed imm - JSLE JumpOp = 0xd0 -) - -// Return emits an exit instruction. -// -// Requires a return value in R0. -func Return() Instruction { - return Instruction{ - OpCode: OpCode(JumpClass).SetJumpOp(Exit), - } -} - -// Op returns the OpCode for a given jump source. -func (op JumpOp) Op(source Source) OpCode { - return OpCode(JumpClass).SetJumpOp(op).SetSource(source) -} - -// Imm compares dst to value, and adjusts PC by offset if the condition is fulfilled. -func (op JumpOp) Imm(dst Register, value int32, label string) Instruction { - if op == Exit || op == Call || op == Ja { - return Instruction{OpCode: InvalidOpCode} - } - - return Instruction{ - OpCode: OpCode(JumpClass).SetJumpOp(op).SetSource(ImmSource), - Dst: dst, - Offset: -1, - Constant: int64(value), - Reference: label, - } -} - -// Reg compares dst to src, and adjusts PC by offset if the condition is fulfilled. -func (op JumpOp) Reg(dst, src Register, label string) Instruction { - if op == Exit || op == Call || op == Ja { - return Instruction{OpCode: InvalidOpCode} - } - - return Instruction{ - OpCode: OpCode(JumpClass).SetJumpOp(op).SetSource(RegSource), - Dst: dst, - Src: src, - Offset: -1, - Reference: label, - } -} - -// Label adjusts PC to the address of the label. -func (op JumpOp) Label(label string) Instruction { - if op == Call { - return Instruction{ - OpCode: OpCode(JumpClass).SetJumpOp(Call), - Src: PseudoCall, - Constant: -1, - Reference: label, - } - } - - return Instruction{ - OpCode: OpCode(JumpClass).SetJumpOp(op), - Offset: -1, - Reference: label, - } -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/asm/jump_string.go b/src/runtime/vendor/github.com/cilium/ebpf/asm/jump_string.go deleted file mode 100644 index 85a4aaffa5..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/asm/jump_string.go +++ /dev/null @@ -1,53 +0,0 @@ -// Code generated by "stringer -output jump_string.go -type=JumpOp"; DO NOT EDIT. - -package asm - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[InvalidJumpOp-255] - _ = x[Ja-0] - _ = x[JEq-16] - _ = x[JGT-32] - _ = x[JGE-48] - _ = x[JSet-64] - _ = x[JNE-80] - _ = x[JSGT-96] - _ = x[JSGE-112] - _ = x[Call-128] - _ = x[Exit-144] - _ = x[JLT-160] - _ = x[JLE-176] - _ = x[JSLT-192] - _ = x[JSLE-208] -} - -const _JumpOp_name = "JaJEqJGTJGEJSetJNEJSGTJSGECallExitJLTJLEJSLTJSLEInvalidJumpOp" - -var _JumpOp_map = map[JumpOp]string{ - 0: _JumpOp_name[0:2], - 16: _JumpOp_name[2:5], - 32: _JumpOp_name[5:8], - 48: _JumpOp_name[8:11], - 64: _JumpOp_name[11:15], - 80: _JumpOp_name[15:18], - 96: _JumpOp_name[18:22], - 112: _JumpOp_name[22:26], - 128: _JumpOp_name[26:30], - 144: _JumpOp_name[30:34], - 160: _JumpOp_name[34:37], - 176: _JumpOp_name[37:40], - 192: _JumpOp_name[40:44], - 208: _JumpOp_name[44:48], - 255: _JumpOp_name[48:61], -} - -func (i JumpOp) String() string { - if str, ok := _JumpOp_map[i]; ok { - return str - } - return "JumpOp(" + strconv.FormatInt(int64(i), 10) + ")" -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/asm/load_store.go b/src/runtime/vendor/github.com/cilium/ebpf/asm/load_store.go deleted file mode 100644 index 85ed286b02..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/asm/load_store.go +++ /dev/null @@ -1,204 +0,0 @@ -package asm - -//go:generate stringer -output load_store_string.go -type=Mode,Size - -// Mode for load and store operations -// -// msb lsb -// +---+--+---+ -// |MDE|sz|cls| -// +---+--+---+ -type Mode uint8 - -const modeMask OpCode = 0xe0 - -const ( - // InvalidMode is returned by getters when invoked - // on non load / store OpCodes - InvalidMode Mode = 0xff - // ImmMode - immediate value - ImmMode Mode = 0x00 - // AbsMode - immediate value + offset - AbsMode Mode = 0x20 - // IndMode - indirect (imm+src) - IndMode Mode = 0x40 - // MemMode - load from memory - MemMode Mode = 0x60 - // XAddMode - add atomically across processors. - XAddMode Mode = 0xc0 -) - -// Size of load and store operations -// -// msb lsb -// +---+--+---+ -// |mde|SZ|cls| -// +---+--+---+ -type Size uint8 - -const sizeMask OpCode = 0x18 - -const ( - // InvalidSize is returned by getters when invoked - // on non load / store OpCodes - InvalidSize Size = 0xff - // DWord - double word; 64 bits - DWord Size = 0x18 - // Word - word; 32 bits - Word Size = 0x00 - // Half - half-word; 16 bits - Half Size = 0x08 - // Byte - byte; 8 bits - Byte Size = 0x10 -) - -// Sizeof returns the size in bytes. -func (s Size) Sizeof() int { - switch s { - case DWord: - return 8 - case Word: - return 4 - case Half: - return 2 - case Byte: - return 1 - default: - return -1 - } -} - -// LoadMemOp returns the OpCode to load a value of given size from memory. -func LoadMemOp(size Size) OpCode { - return OpCode(LdXClass).SetMode(MemMode).SetSize(size) -} - -// LoadMem emits `dst = *(size *)(src + offset)`. -func LoadMem(dst, src Register, offset int16, size Size) Instruction { - return Instruction{ - OpCode: LoadMemOp(size), - Dst: dst, - Src: src, - Offset: offset, - } -} - -// LoadImmOp returns the OpCode to load an immediate of given size. -// -// As of kernel 4.20, only DWord size is accepted. -func LoadImmOp(size Size) OpCode { - return OpCode(LdClass).SetMode(ImmMode).SetSize(size) -} - -// LoadImm emits `dst = (size)value`. -// -// As of kernel 4.20, only DWord size is accepted. -func LoadImm(dst Register, value int64, size Size) Instruction { - return Instruction{ - OpCode: LoadImmOp(size), - Dst: dst, - Constant: value, - } -} - -// LoadMapPtr stores a pointer to a map in dst. -func LoadMapPtr(dst Register, fd int) Instruction { - if fd < 0 { - return Instruction{OpCode: InvalidOpCode} - } - - return Instruction{ - OpCode: LoadImmOp(DWord), - Dst: dst, - Src: PseudoMapFD, - Constant: int64(uint32(fd)), - } -} - -// LoadMapValue stores a pointer to the value at a certain offset of a map. -func LoadMapValue(dst Register, fd int, offset uint32) Instruction { - if fd < 0 { - return Instruction{OpCode: InvalidOpCode} - } - - fdAndOffset := (uint64(offset) << 32) | uint64(uint32(fd)) - return Instruction{ - OpCode: LoadImmOp(DWord), - Dst: dst, - Src: PseudoMapValue, - Constant: int64(fdAndOffset), - } -} - -// LoadIndOp returns the OpCode for loading a value of given size from an sk_buff. -func LoadIndOp(size Size) OpCode { - return OpCode(LdClass).SetMode(IndMode).SetSize(size) -} - -// LoadInd emits `dst = ntoh(*(size *)(((sk_buff *)R6)->data + src + offset))`. -func LoadInd(dst, src Register, offset int32, size Size) Instruction { - return Instruction{ - OpCode: LoadIndOp(size), - Dst: dst, - Src: src, - Constant: int64(offset), - } -} - -// LoadAbsOp returns the OpCode for loading a value of given size from an sk_buff. -func LoadAbsOp(size Size) OpCode { - return OpCode(LdClass).SetMode(AbsMode).SetSize(size) -} - -// LoadAbs emits `r0 = ntoh(*(size *)(((sk_buff *)R6)->data + offset))`. -func LoadAbs(offset int32, size Size) Instruction { - return Instruction{ - OpCode: LoadAbsOp(size), - Dst: R0, - Constant: int64(offset), - } -} - -// StoreMemOp returns the OpCode for storing a register of given size in memory. -func StoreMemOp(size Size) OpCode { - return OpCode(StXClass).SetMode(MemMode).SetSize(size) -} - -// StoreMem emits `*(size *)(dst + offset) = src` -func StoreMem(dst Register, offset int16, src Register, size Size) Instruction { - return Instruction{ - OpCode: StoreMemOp(size), - Dst: dst, - Src: src, - Offset: offset, - } -} - -// StoreImmOp returns the OpCode for storing an immediate of given size in memory. -func StoreImmOp(size Size) OpCode { - return OpCode(StClass).SetMode(MemMode).SetSize(size) -} - -// StoreImm emits `*(size *)(dst + offset) = value`. -func StoreImm(dst Register, offset int16, value int64, size Size) Instruction { - return Instruction{ - OpCode: StoreImmOp(size), - Dst: dst, - Offset: offset, - Constant: value, - } -} - -// StoreXAddOp returns the OpCode to atomically add a register to a value in memory. -func StoreXAddOp(size Size) OpCode { - return OpCode(StXClass).SetMode(XAddMode).SetSize(size) -} - -// StoreXAdd atomically adds src to *dst. -func StoreXAdd(dst, src Register, size Size) Instruction { - return Instruction{ - OpCode: StoreXAddOp(size), - Dst: dst, - Src: src, - } -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/asm/load_store_string.go b/src/runtime/vendor/github.com/cilium/ebpf/asm/load_store_string.go deleted file mode 100644 index 76d29a0756..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/asm/load_store_string.go +++ /dev/null @@ -1,80 +0,0 @@ -// Code generated by "stringer -output load_store_string.go -type=Mode,Size"; DO NOT EDIT. - -package asm - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[InvalidMode-255] - _ = x[ImmMode-0] - _ = x[AbsMode-32] - _ = x[IndMode-64] - _ = x[MemMode-96] - _ = x[XAddMode-192] -} - -const ( - _Mode_name_0 = "ImmMode" - _Mode_name_1 = "AbsMode" - _Mode_name_2 = "IndMode" - _Mode_name_3 = "MemMode" - _Mode_name_4 = "XAddMode" - _Mode_name_5 = "InvalidMode" -) - -func (i Mode) String() string { - switch { - case i == 0: - return _Mode_name_0 - case i == 32: - return _Mode_name_1 - case i == 64: - return _Mode_name_2 - case i == 96: - return _Mode_name_3 - case i == 192: - return _Mode_name_4 - case i == 255: - return _Mode_name_5 - default: - return "Mode(" + strconv.FormatInt(int64(i), 10) + ")" - } -} -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[InvalidSize-255] - _ = x[DWord-24] - _ = x[Word-0] - _ = x[Half-8] - _ = x[Byte-16] -} - -const ( - _Size_name_0 = "Word" - _Size_name_1 = "Half" - _Size_name_2 = "Byte" - _Size_name_3 = "DWord" - _Size_name_4 = "InvalidSize" -) - -func (i Size) String() string { - switch { - case i == 0: - return _Size_name_0 - case i == 8: - return _Size_name_1 - case i == 16: - return _Size_name_2 - case i == 24: - return _Size_name_3 - case i == 255: - return _Size_name_4 - default: - return "Size(" + strconv.FormatInt(int64(i), 10) + ")" - } -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/asm/opcode.go b/src/runtime/vendor/github.com/cilium/ebpf/asm/opcode.go deleted file mode 100644 index 6edc3cf591..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/asm/opcode.go +++ /dev/null @@ -1,237 +0,0 @@ -package asm - -import ( - "fmt" - "strings" -) - -//go:generate stringer -output opcode_string.go -type=Class - -type encoding int - -const ( - unknownEncoding encoding = iota - loadOrStore - jumpOrALU -) - -// Class of operations -// -// msb lsb -// +---+--+---+ -// | ?? |CLS| -// +---+--+---+ -type Class uint8 - -const classMask OpCode = 0x07 - -const ( - // LdClass load memory - LdClass Class = 0x00 - // LdXClass load memory from constant - LdXClass Class = 0x01 - // StClass load register from memory - StClass Class = 0x02 - // StXClass load register from constant - StXClass Class = 0x03 - // ALUClass arithmetic operators - ALUClass Class = 0x04 - // JumpClass jump operators - JumpClass Class = 0x05 - // ALU64Class arithmetic in 64 bit mode - ALU64Class Class = 0x07 -) - -func (cls Class) encoding() encoding { - switch cls { - case LdClass, LdXClass, StClass, StXClass: - return loadOrStore - case ALU64Class, ALUClass, JumpClass: - return jumpOrALU - default: - return unknownEncoding - } -} - -// OpCode is a packed eBPF opcode. -// -// Its encoding is defined by a Class value: -// -// msb lsb -// +----+-+---+ -// | ???? |CLS| -// +----+-+---+ -type OpCode uint8 - -// InvalidOpCode is returned by setters on OpCode -const InvalidOpCode OpCode = 0xff - -// rawInstructions returns the number of BPF instructions required -// to encode this opcode. -func (op OpCode) rawInstructions() int { - if op.IsDWordLoad() { - return 2 - } - return 1 -} - -func (op OpCode) IsDWordLoad() bool { - return op == LoadImmOp(DWord) -} - -// Class returns the class of operation. -func (op OpCode) Class() Class { - return Class(op & classMask) -} - -// Mode returns the mode for load and store operations. -func (op OpCode) Mode() Mode { - if op.Class().encoding() != loadOrStore { - return InvalidMode - } - return Mode(op & modeMask) -} - -// Size returns the size for load and store operations. -func (op OpCode) Size() Size { - if op.Class().encoding() != loadOrStore { - return InvalidSize - } - return Size(op & sizeMask) -} - -// Source returns the source for branch and ALU operations. -func (op OpCode) Source() Source { - if op.Class().encoding() != jumpOrALU || op.ALUOp() == Swap { - return InvalidSource - } - return Source(op & sourceMask) -} - -// ALUOp returns the ALUOp. -func (op OpCode) ALUOp() ALUOp { - if op.Class().encoding() != jumpOrALU { - return InvalidALUOp - } - return ALUOp(op & aluMask) -} - -// Endianness returns the Endianness for a byte swap instruction. -func (op OpCode) Endianness() Endianness { - if op.ALUOp() != Swap { - return InvalidEndian - } - return Endianness(op & endianMask) -} - -// JumpOp returns the JumpOp. -func (op OpCode) JumpOp() JumpOp { - if op.Class().encoding() != jumpOrALU { - return InvalidJumpOp - } - return JumpOp(op & jumpMask) -} - -// SetMode sets the mode on load and store operations. -// -// Returns InvalidOpCode if op is of the wrong class. -func (op OpCode) SetMode(mode Mode) OpCode { - if op.Class().encoding() != loadOrStore || !valid(OpCode(mode), modeMask) { - return InvalidOpCode - } - return (op & ^modeMask) | OpCode(mode) -} - -// SetSize sets the size on load and store operations. -// -// Returns InvalidOpCode if op is of the wrong class. -func (op OpCode) SetSize(size Size) OpCode { - if op.Class().encoding() != loadOrStore || !valid(OpCode(size), sizeMask) { - return InvalidOpCode - } - return (op & ^sizeMask) | OpCode(size) -} - -// SetSource sets the source on jump and ALU operations. -// -// Returns InvalidOpCode if op is of the wrong class. -func (op OpCode) SetSource(source Source) OpCode { - if op.Class().encoding() != jumpOrALU || !valid(OpCode(source), sourceMask) { - return InvalidOpCode - } - return (op & ^sourceMask) | OpCode(source) -} - -// SetALUOp sets the ALUOp on ALU operations. -// -// Returns InvalidOpCode if op is of the wrong class. -func (op OpCode) SetALUOp(alu ALUOp) OpCode { - class := op.Class() - if (class != ALUClass && class != ALU64Class) || !valid(OpCode(alu), aluMask) { - return InvalidOpCode - } - return (op & ^aluMask) | OpCode(alu) -} - -// SetJumpOp sets the JumpOp on jump operations. -// -// Returns InvalidOpCode if op is of the wrong class. -func (op OpCode) SetJumpOp(jump JumpOp) OpCode { - if op.Class() != JumpClass || !valid(OpCode(jump), jumpMask) { - return InvalidOpCode - } - return (op & ^jumpMask) | OpCode(jump) -} - -func (op OpCode) String() string { - var f strings.Builder - - switch class := op.Class(); class { - case LdClass, LdXClass, StClass, StXClass: - f.WriteString(strings.TrimSuffix(class.String(), "Class")) - - mode := op.Mode() - f.WriteString(strings.TrimSuffix(mode.String(), "Mode")) - - switch op.Size() { - case DWord: - f.WriteString("DW") - case Word: - f.WriteString("W") - case Half: - f.WriteString("H") - case Byte: - f.WriteString("B") - } - - case ALU64Class, ALUClass: - f.WriteString(op.ALUOp().String()) - - if op.ALUOp() == Swap { - // Width for Endian is controlled by Constant - f.WriteString(op.Endianness().String()) - } else { - if class == ALUClass { - f.WriteString("32") - } - - f.WriteString(strings.TrimSuffix(op.Source().String(), "Source")) - } - - case JumpClass: - f.WriteString(op.JumpOp().String()) - if jop := op.JumpOp(); jop != Exit && jop != Call { - f.WriteString(strings.TrimSuffix(op.Source().String(), "Source")) - } - - default: - fmt.Fprintf(&f, "OpCode(%#x)", uint8(op)) - } - - return f.String() -} - -// valid returns true if all bits in value are covered by mask. -func valid(value, mask OpCode) bool { - return value & ^mask == 0 -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/asm/opcode_string.go b/src/runtime/vendor/github.com/cilium/ebpf/asm/opcode_string.go deleted file mode 100644 index 079ce1db0b..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/asm/opcode_string.go +++ /dev/null @@ -1,38 +0,0 @@ -// Code generated by "stringer -output opcode_string.go -type=Class"; DO NOT EDIT. - -package asm - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[LdClass-0] - _ = x[LdXClass-1] - _ = x[StClass-2] - _ = x[StXClass-3] - _ = x[ALUClass-4] - _ = x[JumpClass-5] - _ = x[ALU64Class-7] -} - -const ( - _Class_name_0 = "LdClassLdXClassStClassStXClassALUClassJumpClass" - _Class_name_1 = "ALU64Class" -) - -var ( - _Class_index_0 = [...]uint8{0, 7, 15, 22, 30, 38, 47} -) - -func (i Class) String() string { - switch { - case 0 <= i && i <= 5: - return _Class_name_0[_Class_index_0[i]:_Class_index_0[i+1]] - case i == 7: - return _Class_name_1 - default: - return "Class(" + strconv.FormatInt(int64(i), 10) + ")" - } -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/asm/register.go b/src/runtime/vendor/github.com/cilium/ebpf/asm/register.go deleted file mode 100644 index 76cb44bffc..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/asm/register.go +++ /dev/null @@ -1,49 +0,0 @@ -package asm - -import ( - "fmt" -) - -// Register is the source or destination of most operations. -type Register uint8 - -// R0 contains return values. -const R0 Register = 0 - -// Registers for function arguments. -const ( - R1 Register = R0 + 1 + iota - R2 - R3 - R4 - R5 -) - -// Callee saved registers preserved by function calls. -const ( - R6 Register = R5 + 1 + iota - R7 - R8 - R9 -) - -// Read-only frame pointer to access stack. -const ( - R10 Register = R9 + 1 - RFP = R10 -) - -// Pseudo registers used by 64bit loads and jumps -const ( - PseudoMapFD = R1 // BPF_PSEUDO_MAP_FD - PseudoMapValue = R2 // BPF_PSEUDO_MAP_VALUE - PseudoCall = R1 // BPF_PSEUDO_CALL -) - -func (r Register) String() string { - v := uint8(r) - if v == 10 { - return "rfp" - } - return fmt.Sprintf("r%d", v) -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/collection.go b/src/runtime/vendor/github.com/cilium/ebpf/collection.go deleted file mode 100644 index 17cc69492e..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/collection.go +++ /dev/null @@ -1,616 +0,0 @@ -package ebpf - -import ( - "errors" - "fmt" - "io" - "math" - "reflect" - "strings" - - "github.com/cilium/ebpf/asm" - "github.com/cilium/ebpf/internal" - "github.com/cilium/ebpf/internal/btf" -) - -// CollectionOptions control loading a collection into the kernel. -// -// Maps and Programs are passed to NewMapWithOptions and NewProgramsWithOptions. -type CollectionOptions struct { - Maps MapOptions - Programs ProgramOptions -} - -// CollectionSpec describes a collection. -type CollectionSpec struct { - Maps map[string]*MapSpec - Programs map[string]*ProgramSpec -} - -// Copy returns a recursive copy of the spec. -func (cs *CollectionSpec) Copy() *CollectionSpec { - if cs == nil { - return nil - } - - cpy := CollectionSpec{ - Maps: make(map[string]*MapSpec, len(cs.Maps)), - Programs: make(map[string]*ProgramSpec, len(cs.Programs)), - } - - for name, spec := range cs.Maps { - cpy.Maps[name] = spec.Copy() - } - - for name, spec := range cs.Programs { - cpy.Programs[name] = spec.Copy() - } - - return &cpy -} - -// RewriteMaps replaces all references to specific maps. -// -// Use this function to use pre-existing maps instead of creating new ones -// when calling NewCollection. Any named maps are removed from CollectionSpec.Maps. -// -// Returns an error if a named map isn't used in at least one program. -func (cs *CollectionSpec) RewriteMaps(maps map[string]*Map) error { - for symbol, m := range maps { - // have we seen a program that uses this symbol / map - seen := false - fd := m.FD() - for progName, progSpec := range cs.Programs { - err := progSpec.Instructions.RewriteMapPtr(symbol, fd) - - switch { - case err == nil: - seen = true - - case asm.IsUnreferencedSymbol(err): - // Not all programs need to use the map - - default: - return fmt.Errorf("program %s: %w", progName, err) - } - } - - if !seen { - return fmt.Errorf("map %s not referenced by any programs", symbol) - } - - // Prevent NewCollection from creating rewritten maps - delete(cs.Maps, symbol) - } - - return nil -} - -// RewriteConstants replaces the value of multiple constants. -// -// The constant must be defined like so in the C program: -// -// volatile const type foobar; -// volatile const type foobar = default; -// -// Replacement values must be of the same length as the C sizeof(type). -// If necessary, they are marshalled according to the same rules as -// map values. -// -// From Linux 5.5 the verifier will use constants to eliminate dead code. -// -// Returns an error if a constant doesn't exist. -func (cs *CollectionSpec) RewriteConstants(consts map[string]interface{}) error { - rodata := cs.Maps[".rodata"] - if rodata == nil { - return errors.New("missing .rodata section") - } - - if rodata.BTF == nil { - return errors.New(".rodata section has no BTF") - } - - if n := len(rodata.Contents); n != 1 { - return fmt.Errorf("expected one key in .rodata, found %d", n) - } - - kv := rodata.Contents[0] - value, ok := kv.Value.([]byte) - if !ok { - return fmt.Errorf("first value in .rodata is %T not []byte", kv.Value) - } - - buf := make([]byte, len(value)) - copy(buf, value) - - err := patchValue(buf, btf.MapValue(rodata.BTF), consts) - if err != nil { - return err - } - - rodata.Contents[0] = MapKV{kv.Key, buf} - return nil -} - -// Assign the contents of a CollectionSpec to a struct. -// -// This function is a short-cut to manually checking the presence -// of maps and programs in a collection spec. Consider using bpf2go if this -// sounds useful. -// -// The argument to must be a pointer to a struct. A field of the -// struct is updated with values from Programs or Maps if it -// has an `ebpf` tag and its type is *ProgramSpec or *MapSpec. -// The tag gives the name of the program or map as found in -// the CollectionSpec. -// -// struct { -// Foo *ebpf.ProgramSpec `ebpf:"xdp_foo"` -// Bar *ebpf.MapSpec `ebpf:"bar_map"` -// Ignored int -// } -// -// Returns an error if any of the fields can't be found, or -// if the same map or program is assigned multiple times. -func (cs *CollectionSpec) Assign(to interface{}) error { - valueOf := func(typ reflect.Type, name string) (reflect.Value, error) { - switch typ { - case reflect.TypeOf((*ProgramSpec)(nil)): - p := cs.Programs[name] - if p == nil { - return reflect.Value{}, fmt.Errorf("missing program %q", name) - } - return reflect.ValueOf(p), nil - case reflect.TypeOf((*MapSpec)(nil)): - m := cs.Maps[name] - if m == nil { - return reflect.Value{}, fmt.Errorf("missing map %q", name) - } - return reflect.ValueOf(m), nil - default: - return reflect.Value{}, fmt.Errorf("unsupported type %s", typ) - } - } - - return assignValues(to, valueOf) -} - -// LoadAndAssign maps and programs into the kernel and assign them to a struct. -// -// This function is a short-cut to manually checking the presence -// of maps and programs in a collection spec. Consider using bpf2go if this -// sounds useful. -// -// The argument to must be a pointer to a struct. A field of the -// struct is updated with values from Programs or Maps if it -// has an `ebpf` tag and its type is *Program or *Map. -// The tag gives the name of the program or map as found in -// the CollectionSpec. -// -// struct { -// Foo *ebpf.Program `ebpf:"xdp_foo"` -// Bar *ebpf.Map `ebpf:"bar_map"` -// Ignored int -// } -// -// opts may be nil. -// -// Returns an error if any of the fields can't be found, or -// if the same map or program is assigned multiple times. -func (cs *CollectionSpec) LoadAndAssign(to interface{}, opts *CollectionOptions) error { - if opts == nil { - opts = &CollectionOptions{} - } - - loadMap, loadProgram, done, cleanup := lazyLoadCollection(cs, opts) - defer cleanup() - - valueOf := func(typ reflect.Type, name string) (reflect.Value, error) { - switch typ { - case reflect.TypeOf((*Program)(nil)): - p, err := loadProgram(name) - if err != nil { - return reflect.Value{}, err - } - return reflect.ValueOf(p), nil - case reflect.TypeOf((*Map)(nil)): - m, err := loadMap(name) - if err != nil { - return reflect.Value{}, err - } - return reflect.ValueOf(m), nil - default: - return reflect.Value{}, fmt.Errorf("unsupported type %s", typ) - } - } - - if err := assignValues(to, valueOf); err != nil { - return err - } - - done() - return nil -} - -// Collection is a collection of Programs and Maps associated -// with their symbols -type Collection struct { - Programs map[string]*Program - Maps map[string]*Map -} - -// NewCollection creates a Collection from a specification. -func NewCollection(spec *CollectionSpec) (*Collection, error) { - return NewCollectionWithOptions(spec, CollectionOptions{}) -} - -// NewCollectionWithOptions creates a Collection from a specification. -func NewCollectionWithOptions(spec *CollectionSpec, opts CollectionOptions) (*Collection, error) { - loadMap, loadProgram, done, cleanup := lazyLoadCollection(spec, &opts) - defer cleanup() - - for mapName := range spec.Maps { - _, err := loadMap(mapName) - if err != nil { - return nil, err - } - } - - for progName := range spec.Programs { - _, err := loadProgram(progName) - if err != nil { - return nil, err - } - } - - maps, progs := done() - return &Collection{ - progs, - maps, - }, nil -} - -type handleCache struct { - btfHandles map[*btf.Spec]*btf.Handle - btfSpecs map[io.ReaderAt]*btf.Spec -} - -func newHandleCache() *handleCache { - return &handleCache{ - btfHandles: make(map[*btf.Spec]*btf.Handle), - btfSpecs: make(map[io.ReaderAt]*btf.Spec), - } -} - -func (hc handleCache) btfHandle(spec *btf.Spec) (*btf.Handle, error) { - if hc.btfHandles[spec] != nil { - return hc.btfHandles[spec], nil - } - - handle, err := btf.NewHandle(spec) - if err != nil { - return nil, err - } - - hc.btfHandles[spec] = handle - return handle, nil -} - -func (hc handleCache) btfSpec(rd io.ReaderAt) (*btf.Spec, error) { - if hc.btfSpecs[rd] != nil { - return hc.btfSpecs[rd], nil - } - - spec, err := btf.LoadSpecFromReader(rd) - if err != nil { - return nil, err - } - - hc.btfSpecs[rd] = spec - return spec, nil -} - -func (hc handleCache) close() { - for _, handle := range hc.btfHandles { - handle.Close() - } - hc.btfHandles = nil - hc.btfSpecs = nil -} - -func lazyLoadCollection(coll *CollectionSpec, opts *CollectionOptions) ( - loadMap func(string) (*Map, error), - loadProgram func(string) (*Program, error), - done func() (map[string]*Map, map[string]*Program), - cleanup func(), -) { - var ( - maps = make(map[string]*Map) - progs = make(map[string]*Program) - handles = newHandleCache() - skipMapsAndProgs = false - ) - - cleanup = func() { - handles.close() - - if skipMapsAndProgs { - return - } - - for _, m := range maps { - m.Close() - } - - for _, p := range progs { - p.Close() - } - } - - done = func() (map[string]*Map, map[string]*Program) { - skipMapsAndProgs = true - return maps, progs - } - - loadMap = func(mapName string) (*Map, error) { - if m := maps[mapName]; m != nil { - return m, nil - } - - mapSpec := coll.Maps[mapName] - if mapSpec == nil { - return nil, fmt.Errorf("missing map %s", mapName) - } - - m, err := newMapWithOptions(mapSpec, opts.Maps, handles) - if err != nil { - return nil, fmt.Errorf("map %s: %w", mapName, err) - } - - maps[mapName] = m - return m, nil - } - - loadProgram = func(progName string) (*Program, error) { - if prog := progs[progName]; prog != nil { - return prog, nil - } - - progSpec := coll.Programs[progName] - if progSpec == nil { - return nil, fmt.Errorf("unknown program %s", progName) - } - - progSpec = progSpec.Copy() - - // Rewrite any reference to a valid map. - for i := range progSpec.Instructions { - ins := &progSpec.Instructions[i] - - if !ins.IsLoadFromMap() || ins.Reference == "" { - continue - } - - if uint32(ins.Constant) != math.MaxUint32 { - // Don't overwrite maps already rewritten, users can - // rewrite programs in the spec themselves - continue - } - - m, err := loadMap(ins.Reference) - if err != nil { - return nil, fmt.Errorf("program %s: %w", progName, err) - } - - fd := m.FD() - if fd < 0 { - return nil, fmt.Errorf("map %s: %w", ins.Reference, internal.ErrClosedFd) - } - if err := ins.RewriteMapPtr(m.FD()); err != nil { - return nil, fmt.Errorf("progam %s: map %s: %w", progName, ins.Reference, err) - } - } - - prog, err := newProgramWithOptions(progSpec, opts.Programs, handles) - if err != nil { - return nil, fmt.Errorf("program %s: %w", progName, err) - } - - progs[progName] = prog - return prog, nil - } - - return -} - -// LoadCollection parses an object file and converts it to a collection. -func LoadCollection(file string) (*Collection, error) { - spec, err := LoadCollectionSpec(file) - if err != nil { - return nil, err - } - return NewCollection(spec) -} - -// Close frees all maps and programs associated with the collection. -// -// The collection mustn't be used afterwards. -func (coll *Collection) Close() { - for _, prog := range coll.Programs { - prog.Close() - } - for _, m := range coll.Maps { - m.Close() - } -} - -// DetachMap removes the named map from the Collection. -// -// This means that a later call to Close() will not affect this map. -// -// Returns nil if no map of that name exists. -func (coll *Collection) DetachMap(name string) *Map { - m := coll.Maps[name] - delete(coll.Maps, name) - return m -} - -// DetachProgram removes the named program from the Collection. -// -// This means that a later call to Close() will not affect this program. -// -// Returns nil if no program of that name exists. -func (coll *Collection) DetachProgram(name string) *Program { - p := coll.Programs[name] - delete(coll.Programs, name) - return p -} - -// Assign the contents of a collection to a struct. -// -// Deprecated: use CollectionSpec.Assign instead. It provides the same -// functionality but creates only the maps and programs requested. -func (coll *Collection) Assign(to interface{}) error { - assignedMaps := make(map[string]struct{}) - assignedPrograms := make(map[string]struct{}) - valueOf := func(typ reflect.Type, name string) (reflect.Value, error) { - switch typ { - case reflect.TypeOf((*Program)(nil)): - p := coll.Programs[name] - if p == nil { - return reflect.Value{}, fmt.Errorf("missing program %q", name) - } - assignedPrograms[name] = struct{}{} - return reflect.ValueOf(p), nil - case reflect.TypeOf((*Map)(nil)): - m := coll.Maps[name] - if m == nil { - return reflect.Value{}, fmt.Errorf("missing map %q", name) - } - assignedMaps[name] = struct{}{} - return reflect.ValueOf(m), nil - default: - return reflect.Value{}, fmt.Errorf("unsupported type %s", typ) - } - } - - if err := assignValues(to, valueOf); err != nil { - return err - } - - for name := range assignedPrograms { - coll.DetachProgram(name) - } - - for name := range assignedMaps { - coll.DetachMap(name) - } - - return nil -} - -func assignValues(to interface{}, valueOf func(reflect.Type, string) (reflect.Value, error)) error { - type structField struct { - reflect.StructField - value reflect.Value - } - - var ( - fields []structField - visitedTypes = make(map[reflect.Type]bool) - flattenStruct func(reflect.Value) error - ) - - flattenStruct = func(structVal reflect.Value) error { - structType := structVal.Type() - if structType.Kind() != reflect.Struct { - return fmt.Errorf("%s is not a struct", structType) - } - - if visitedTypes[structType] { - return fmt.Errorf("recursion on type %s", structType) - } - - for i := 0; i < structType.NumField(); i++ { - field := structField{structType.Field(i), structVal.Field(i)} - - name := field.Tag.Get("ebpf") - if name != "" { - fields = append(fields, field) - continue - } - - var err error - switch field.Type.Kind() { - case reflect.Ptr: - if field.Type.Elem().Kind() != reflect.Struct { - continue - } - - if field.value.IsNil() { - return fmt.Errorf("nil pointer to %s", structType) - } - - err = flattenStruct(field.value.Elem()) - - case reflect.Struct: - err = flattenStruct(field.value) - - default: - continue - } - - if err != nil { - return fmt.Errorf("field %s: %w", field.Name, err) - } - } - - return nil - } - - toValue := reflect.ValueOf(to) - if toValue.Type().Kind() != reflect.Ptr { - return fmt.Errorf("%T is not a pointer to struct", to) - } - - if toValue.IsNil() { - return fmt.Errorf("nil pointer to %T", to) - } - - if err := flattenStruct(toValue.Elem()); err != nil { - return err - } - - type elem struct { - // Either *Map or *Program - typ reflect.Type - name string - } - - assignedTo := make(map[elem]string) - for _, field := range fields { - name := field.Tag.Get("ebpf") - if strings.Contains(name, ",") { - return fmt.Errorf("field %s: ebpf tag contains a comma", field.Name) - } - - e := elem{field.Type, name} - if assignedField := assignedTo[e]; assignedField != "" { - return fmt.Errorf("field %s: %q was already assigned to %s", field.Name, name, assignedField) - } - - value, err := valueOf(field.Type, name) - if err != nil { - return fmt.Errorf("field %s: %w", field.Name, err) - } - - if !field.value.CanSet() { - return fmt.Errorf("field %s: can't set value", field.Name) - } - - field.value.Set(value) - assignedTo[e] = field.Name - } - - return nil -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/doc.go b/src/runtime/vendor/github.com/cilium/ebpf/doc.go deleted file mode 100644 index f7f34da8f4..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/doc.go +++ /dev/null @@ -1,16 +0,0 @@ -// Package ebpf is a toolkit for working with eBPF programs. -// -// eBPF programs are small snippets of code which are executed directly -// in a VM in the Linux kernel, which makes them very fast and flexible. -// Many Linux subsystems now accept eBPF programs. This makes it possible -// to implement highly application specific logic inside the kernel, -// without having to modify the actual kernel itself. -// -// This package is designed for long-running processes which -// want to use eBPF to implement part of their application logic. It has no -// run-time dependencies outside of the library and the Linux kernel itself. -// eBPF code should be compiled ahead of time using clang, and shipped with -// your application as any other resource. -// -// Use the link subpackage to attach a loaded program to a hook in the kernel. -package ebpf diff --git a/src/runtime/vendor/github.com/cilium/ebpf/elf_reader.go b/src/runtime/vendor/github.com/cilium/ebpf/elf_reader.go deleted file mode 100644 index c2afbc36a5..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/elf_reader.go +++ /dev/null @@ -1,953 +0,0 @@ -package ebpf - -import ( - "bufio" - "bytes" - "debug/elf" - "encoding/binary" - "errors" - "fmt" - "io" - "math" - "os" - "strings" - - "github.com/cilium/ebpf/asm" - "github.com/cilium/ebpf/internal" - "github.com/cilium/ebpf/internal/btf" - "github.com/cilium/ebpf/internal/unix" -) - -// elfCode is a convenience to reduce the amount of arguments that have to -// be passed around explicitly. You should treat it's contents as immutable. -type elfCode struct { - *internal.SafeELFFile - sections map[elf.SectionIndex]*elfSection - license string - version uint32 - btf *btf.Spec -} - -// LoadCollectionSpec parses an ELF file into a CollectionSpec. -func LoadCollectionSpec(file string) (*CollectionSpec, error) { - f, err := os.Open(file) - if err != nil { - return nil, err - } - defer f.Close() - - spec, err := LoadCollectionSpecFromReader(f) - if err != nil { - return nil, fmt.Errorf("file %s: %w", file, err) - } - return spec, nil -} - -// LoadCollectionSpecFromReader parses an ELF file into a CollectionSpec. -func LoadCollectionSpecFromReader(rd io.ReaderAt) (*CollectionSpec, error) { - f, err := internal.NewSafeELFFile(rd) - if err != nil { - return nil, err - } - defer f.Close() - - var ( - licenseSection *elf.Section - versionSection *elf.Section - sections = make(map[elf.SectionIndex]*elfSection) - relSections = make(map[elf.SectionIndex]*elf.Section) - ) - - // This is the target of relocations generated by inline assembly. - sections[elf.SHN_UNDEF] = newElfSection(new(elf.Section), undefSection) - - // Collect all the sections we're interested in. This includes relocations - // which we parse later. - for i, sec := range f.Sections { - idx := elf.SectionIndex(i) - - switch { - case strings.HasPrefix(sec.Name, "license"): - licenseSection = sec - case strings.HasPrefix(sec.Name, "version"): - versionSection = sec - case strings.HasPrefix(sec.Name, "maps"): - sections[idx] = newElfSection(sec, mapSection) - case sec.Name == ".maps": - sections[idx] = newElfSection(sec, btfMapSection) - case sec.Name == ".bss" || sec.Name == ".data" || strings.HasPrefix(sec.Name, ".rodata"): - sections[idx] = newElfSection(sec, dataSection) - case sec.Type == elf.SHT_REL: - // Store relocations under the section index of the target - relSections[elf.SectionIndex(sec.Info)] = sec - case sec.Type == elf.SHT_PROGBITS && (sec.Flags&elf.SHF_EXECINSTR) != 0 && sec.Size > 0: - sections[idx] = newElfSection(sec, programSection) - } - } - - license, err := loadLicense(licenseSection) - if err != nil { - return nil, fmt.Errorf("load license: %w", err) - } - - version, err := loadVersion(versionSection, f.ByteOrder) - if err != nil { - return nil, fmt.Errorf("load version: %w", err) - } - - btfSpec, err := btf.LoadSpecFromReader(rd) - if err != nil && !errors.Is(err, btf.ErrNotFound) { - return nil, fmt.Errorf("load BTF: %w", err) - } - - // Assign symbols to all the sections we're interested in. - symbols, err := f.Symbols() - if err != nil { - return nil, fmt.Errorf("load symbols: %v", err) - } - - for _, symbol := range symbols { - idx := symbol.Section - symType := elf.ST_TYPE(symbol.Info) - - section := sections[idx] - if section == nil { - continue - } - - // Older versions of LLVM don't tag symbols correctly, so keep - // all NOTYPE ones. - keep := symType == elf.STT_NOTYPE - switch section.kind { - case mapSection, btfMapSection, dataSection: - keep = keep || symType == elf.STT_OBJECT - case programSection: - keep = keep || symType == elf.STT_FUNC - } - if !keep || symbol.Name == "" { - continue - } - - section.symbols[symbol.Value] = symbol - } - - ec := &elfCode{ - SafeELFFile: f, - sections: sections, - license: license, - version: version, - btf: btfSpec, - } - - // Go through relocation sections, and parse the ones for sections we're - // interested in. Make sure that relocations point at valid sections. - for idx, relSection := range relSections { - section := sections[idx] - if section == nil { - continue - } - - rels, err := ec.loadRelocations(relSection, symbols) - if err != nil { - return nil, fmt.Errorf("relocation for section %q: %w", section.Name, err) - } - - for _, rel := range rels { - target := sections[rel.Section] - if target == nil { - return nil, fmt.Errorf("section %q: reference to %q in section %s: %w", section.Name, rel.Name, rel.Section, ErrNotSupported) - } - - if target.Flags&elf.SHF_STRINGS > 0 { - return nil, fmt.Errorf("section %q: string is not stack allocated: %w", section.Name, ErrNotSupported) - } - - target.references++ - } - - section.relocations = rels - } - - // Collect all the various ways to define maps. - maps := make(map[string]*MapSpec) - if err := ec.loadMaps(maps); err != nil { - return nil, fmt.Errorf("load maps: %w", err) - } - - if err := ec.loadBTFMaps(maps); err != nil { - return nil, fmt.Errorf("load BTF maps: %w", err) - } - - if err := ec.loadDataSections(maps); err != nil { - return nil, fmt.Errorf("load data sections: %w", err) - } - - // Finally, collect programs and link them. - progs, err := ec.loadPrograms() - if err != nil { - return nil, fmt.Errorf("load programs: %w", err) - } - - return &CollectionSpec{maps, progs}, nil -} - -func loadLicense(sec *elf.Section) (string, error) { - if sec == nil { - return "", nil - } - - data, err := sec.Data() - if err != nil { - return "", fmt.Errorf("section %s: %v", sec.Name, err) - } - return string(bytes.TrimRight(data, "\000")), nil -} - -func loadVersion(sec *elf.Section, bo binary.ByteOrder) (uint32, error) { - if sec == nil { - return 0, nil - } - - var version uint32 - if err := binary.Read(sec.Open(), bo, &version); err != nil { - return 0, fmt.Errorf("section %s: %v", sec.Name, err) - } - return version, nil -} - -type elfSectionKind int - -const ( - undefSection elfSectionKind = iota - mapSection - btfMapSection - programSection - dataSection -) - -type elfSection struct { - *elf.Section - kind elfSectionKind - // Offset from the start of the section to a symbol - symbols map[uint64]elf.Symbol - // Offset from the start of the section to a relocation, which points at - // a symbol in another section. - relocations map[uint64]elf.Symbol - // The number of relocations pointing at this section. - references int -} - -func newElfSection(section *elf.Section, kind elfSectionKind) *elfSection { - return &elfSection{ - section, - kind, - make(map[uint64]elf.Symbol), - make(map[uint64]elf.Symbol), - 0, - } -} - -func (ec *elfCode) loadPrograms() (map[string]*ProgramSpec, error) { - var ( - progs []*ProgramSpec - libs []*ProgramSpec - ) - - for _, sec := range ec.sections { - if sec.kind != programSection { - continue - } - - if len(sec.symbols) == 0 { - return nil, fmt.Errorf("section %v: missing symbols", sec.Name) - } - - funcSym, ok := sec.symbols[0] - if !ok { - return nil, fmt.Errorf("section %v: no label at start", sec.Name) - } - - insns, length, err := ec.loadInstructions(sec) - if err != nil { - return nil, fmt.Errorf("program %s: %w", funcSym.Name, err) - } - - progType, attachType, progFlags, attachTo := getProgType(sec.Name) - - spec := &ProgramSpec{ - Name: funcSym.Name, - Type: progType, - Flags: progFlags, - AttachType: attachType, - AttachTo: attachTo, - License: ec.license, - KernelVersion: ec.version, - Instructions: insns, - ByteOrder: ec.ByteOrder, - } - - if ec.btf != nil { - spec.BTF, err = ec.btf.Program(sec.Name, length) - if err != nil && !errors.Is(err, btf.ErrNoExtendedInfo) { - return nil, fmt.Errorf("program %s: %w", funcSym.Name, err) - } - } - - if spec.Type == UnspecifiedProgram { - // There is no single name we can use for "library" sections, - // since they may contain multiple functions. We'll decode the - // labels they contain later on, and then link sections that way. - libs = append(libs, spec) - } else { - progs = append(progs, spec) - } - } - - res := make(map[string]*ProgramSpec, len(progs)) - for _, prog := range progs { - err := link(prog, libs) - if err != nil { - return nil, fmt.Errorf("program %s: %w", prog.Name, err) - } - res[prog.Name] = prog - } - - return res, nil -} - -func (ec *elfCode) loadInstructions(section *elfSection) (asm.Instructions, uint64, error) { - var ( - r = bufio.NewReader(section.Open()) - insns asm.Instructions - offset uint64 - ) - for { - var ins asm.Instruction - n, err := ins.Unmarshal(r, ec.ByteOrder) - if err == io.EOF { - return insns, offset, nil - } - if err != nil { - return nil, 0, fmt.Errorf("offset %d: %w", offset, err) - } - - ins.Symbol = section.symbols[offset].Name - - if rel, ok := section.relocations[offset]; ok { - if err = ec.relocateInstruction(&ins, rel); err != nil { - return nil, 0, fmt.Errorf("offset %d: relocate instruction: %w", offset, err) - } - } - - insns = append(insns, ins) - offset += n - } -} - -func (ec *elfCode) relocateInstruction(ins *asm.Instruction, rel elf.Symbol) error { - var ( - typ = elf.ST_TYPE(rel.Info) - bind = elf.ST_BIND(rel.Info) - name = rel.Name - ) - - target := ec.sections[rel.Section] - - switch target.kind { - case mapSection, btfMapSection: - if bind != elf.STB_GLOBAL { - return fmt.Errorf("possible erroneous static qualifier on map definition: found reference to %q", name) - } - - if typ != elf.STT_OBJECT && typ != elf.STT_NOTYPE { - // STT_NOTYPE is generated on clang < 8 which doesn't tag - // relocations appropriately. - return fmt.Errorf("map load: incorrect relocation type %v", typ) - } - - ins.Src = asm.PseudoMapFD - - // Mark the instruction as needing an update when creating the - // collection. - if err := ins.RewriteMapPtr(-1); err != nil { - return err - } - - case dataSection: - var offset uint32 - switch typ { - case elf.STT_SECTION: - if bind != elf.STB_LOCAL { - return fmt.Errorf("direct load: %s: unsupported relocation %s", name, bind) - } - - // This is really a reference to a static symbol, which clang doesn't - // emit a symbol table entry for. Instead it encodes the offset in - // the instruction itself. - offset = uint32(uint64(ins.Constant)) - - case elf.STT_OBJECT: - if bind != elf.STB_GLOBAL { - return fmt.Errorf("direct load: %s: unsupported relocation %s", name, bind) - } - - offset = uint32(rel.Value) - - default: - return fmt.Errorf("incorrect relocation type %v for direct map load", typ) - } - - // We rely on using the name of the data section as the reference. It - // would be nicer to keep the real name in case of an STT_OBJECT, but - // it's not clear how to encode that into Instruction. - name = target.Name - - // The kernel expects the offset in the second basic BPF instruction. - ins.Constant = int64(uint64(offset) << 32) - ins.Src = asm.PseudoMapValue - - // Mark the instruction as needing an update when creating the - // collection. - if err := ins.RewriteMapPtr(-1); err != nil { - return err - } - - case programSection: - if ins.OpCode.JumpOp() != asm.Call { - return fmt.Errorf("not a call instruction: %s", ins) - } - - if ins.Src != asm.PseudoCall { - return fmt.Errorf("call: %s: incorrect source register", name) - } - - switch typ { - case elf.STT_NOTYPE, elf.STT_FUNC: - if bind != elf.STB_GLOBAL { - return fmt.Errorf("call: %s: unsupported binding: %s", name, bind) - } - - case elf.STT_SECTION: - if bind != elf.STB_LOCAL { - return fmt.Errorf("call: %s: unsupported binding: %s", name, bind) - } - - // The function we want to call is in the indicated section, - // at the offset encoded in the instruction itself. Reverse - // the calculation to find the real function we're looking for. - // A value of -1 references the first instruction in the section. - offset := int64(int32(ins.Constant)+1) * asm.InstructionSize - if offset < 0 { - return fmt.Errorf("call: %s: invalid offset %d", name, offset) - } - - sym, ok := target.symbols[uint64(offset)] - if !ok { - return fmt.Errorf("call: %s: no symbol at offset %d", name, offset) - } - - ins.Constant = -1 - name = sym.Name - - default: - return fmt.Errorf("call: %s: invalid symbol type %s", name, typ) - } - - case undefSection: - if bind != elf.STB_GLOBAL { - return fmt.Errorf("asm relocation: %s: unsupported binding: %s", name, bind) - } - - if typ != elf.STT_NOTYPE { - return fmt.Errorf("asm relocation: %s: unsupported type %s", name, typ) - } - - // There is nothing to do here but set ins.Reference. - - default: - return fmt.Errorf("relocation to %q: %w", target.Name, ErrNotSupported) - } - - ins.Reference = name - return nil -} - -func (ec *elfCode) loadMaps(maps map[string]*MapSpec) error { - for _, sec := range ec.sections { - if sec.kind != mapSection { - continue - } - - nSym := len(sec.symbols) - if nSym == 0 { - return fmt.Errorf("section %v: no symbols", sec.Name) - } - - if sec.Size%uint64(nSym) != 0 { - return fmt.Errorf("section %v: map descriptors are not of equal size", sec.Name) - } - - var ( - r = bufio.NewReader(sec.Open()) - size = sec.Size / uint64(nSym) - ) - for i, offset := 0, uint64(0); i < nSym; i, offset = i+1, offset+size { - mapSym, ok := sec.symbols[offset] - if !ok { - return fmt.Errorf("section %s: missing symbol for map at offset %d", sec.Name, offset) - } - - mapName := mapSym.Name - if maps[mapName] != nil { - return fmt.Errorf("section %v: map %v already exists", sec.Name, mapSym) - } - - lr := io.LimitReader(r, int64(size)) - - spec := MapSpec{ - Name: SanitizeName(mapName, -1), - } - switch { - case binary.Read(lr, ec.ByteOrder, &spec.Type) != nil: - return fmt.Errorf("map %s: missing type", mapName) - case binary.Read(lr, ec.ByteOrder, &spec.KeySize) != nil: - return fmt.Errorf("map %s: missing key size", mapName) - case binary.Read(lr, ec.ByteOrder, &spec.ValueSize) != nil: - return fmt.Errorf("map %s: missing value size", mapName) - case binary.Read(lr, ec.ByteOrder, &spec.MaxEntries) != nil: - return fmt.Errorf("map %s: missing max entries", mapName) - case binary.Read(lr, ec.ByteOrder, &spec.Flags) != nil: - return fmt.Errorf("map %s: missing flags", mapName) - } - - if _, err := io.Copy(internal.DiscardZeroes{}, lr); err != nil { - return fmt.Errorf("map %s: unknown and non-zero fields in definition", mapName) - } - - if err := spec.clampPerfEventArraySize(); err != nil { - return fmt.Errorf("map %s: %w", mapName, err) - } - - maps[mapName] = &spec - } - } - - return nil -} - -func (ec *elfCode) loadBTFMaps(maps map[string]*MapSpec) error { - for _, sec := range ec.sections { - if sec.kind != btfMapSection { - continue - } - - if ec.btf == nil { - return fmt.Errorf("missing BTF") - } - - _, err := io.Copy(internal.DiscardZeroes{}, bufio.NewReader(sec.Open())) - if err != nil { - return fmt.Errorf("section %v: initializing BTF map definitions: %w", sec.Name, internal.ErrNotSupported) - } - - var ds btf.Datasec - if err := ec.btf.FindType(sec.Name, &ds); err != nil { - return fmt.Errorf("cannot find section '%s' in BTF: %w", sec.Name, err) - } - - for _, vs := range ds.Vars { - v, ok := vs.Type.(*btf.Var) - if !ok { - return fmt.Errorf("section %v: unexpected type %s", sec.Name, vs.Type) - } - name := string(v.Name) - - if maps[name] != nil { - return fmt.Errorf("section %v: map %s already exists", sec.Name, name) - } - - mapStruct, ok := v.Type.(*btf.Struct) - if !ok { - return fmt.Errorf("expected struct, got %s", v.Type) - } - - mapSpec, err := mapSpecFromBTF(name, mapStruct, false, ec.btf) - if err != nil { - return fmt.Errorf("map %v: %w", name, err) - } - - if err := mapSpec.clampPerfEventArraySize(); err != nil { - return fmt.Errorf("map %v: %w", name, err) - } - - maps[name] = mapSpec - } - } - - return nil -} - -// mapSpecFromBTF produces a MapSpec based on a btf.Struct def representing -// a BTF map definition. The name and spec arguments will be copied to the -// resulting MapSpec, and inner must be true on any resursive invocations. -func mapSpecFromBTF(name string, def *btf.Struct, inner bool, spec *btf.Spec) (*MapSpec, error) { - - var ( - key, value btf.Type - keySize, valueSize uint32 - mapType, flags, maxEntries uint32 - pinType PinType - innerMapSpec *MapSpec - err error - ) - - for i, member := range def.Members { - switch member.Name { - case "type": - mapType, err = uintFromBTF(member.Type) - if err != nil { - return nil, fmt.Errorf("can't get type: %w", err) - } - - case "map_flags": - flags, err = uintFromBTF(member.Type) - if err != nil { - return nil, fmt.Errorf("can't get BTF map flags: %w", err) - } - - case "max_entries": - maxEntries, err = uintFromBTF(member.Type) - if err != nil { - return nil, fmt.Errorf("can't get BTF map max entries: %w", err) - } - - case "key": - if keySize != 0 { - return nil, errors.New("both key and key_size given") - } - - pk, ok := member.Type.(*btf.Pointer) - if !ok { - return nil, fmt.Errorf("key type is not a pointer: %T", member.Type) - } - - key = pk.Target - - size, err := btf.Sizeof(pk.Target) - if err != nil { - return nil, fmt.Errorf("can't get size of BTF key: %w", err) - } - - keySize = uint32(size) - - case "value": - if valueSize != 0 { - return nil, errors.New("both value and value_size given") - } - - vk, ok := member.Type.(*btf.Pointer) - if !ok { - return nil, fmt.Errorf("value type is not a pointer: %T", member.Type) - } - - value = vk.Target - - size, err := btf.Sizeof(vk.Target) - if err != nil { - return nil, fmt.Errorf("can't get size of BTF value: %w", err) - } - - valueSize = uint32(size) - - case "key_size": - // Key needs to be nil and keySize needs to be 0 for key_size to be - // considered a valid member. - if key != nil || keySize != 0 { - return nil, errors.New("both key and key_size given") - } - - keySize, err = uintFromBTF(member.Type) - if err != nil { - return nil, fmt.Errorf("can't get BTF key size: %w", err) - } - - case "value_size": - // Value needs to be nil and valueSize needs to be 0 for value_size to be - // considered a valid member. - if value != nil || valueSize != 0 { - return nil, errors.New("both value and value_size given") - } - - valueSize, err = uintFromBTF(member.Type) - if err != nil { - return nil, fmt.Errorf("can't get BTF value size: %w", err) - } - - case "pinning": - if inner { - return nil, errors.New("inner maps can't be pinned") - } - - pinning, err := uintFromBTF(member.Type) - if err != nil { - return nil, fmt.Errorf("can't get pinning: %w", err) - } - - pinType = PinType(pinning) - - case "values": - // The 'values' field in BTF map definitions is used for declaring map - // value types that are references to other BPF objects, like other maps - // or programs. It is always expected to be an array of pointers. - if i != len(def.Members)-1 { - return nil, errors.New("'values' must be the last member in a BTF map definition") - } - - if valueSize != 0 && valueSize != 4 { - return nil, errors.New("value_size must be 0 or 4") - } - valueSize = 4 - - valueType, err := resolveBTFArrayMacro(member.Type) - if err != nil { - return nil, fmt.Errorf("can't resolve type of member 'values': %w", err) - } - - switch t := valueType.(type) { - case *btf.Struct: - // The values member pointing to an array of structs means we're expecting - // a map-in-map declaration. - if MapType(mapType) != ArrayOfMaps && MapType(mapType) != HashOfMaps { - return nil, errors.New("outer map needs to be an array or a hash of maps") - } - if inner { - return nil, fmt.Errorf("nested inner maps are not supported") - } - - // This inner map spec is used as a map template, but it needs to be - // created as a traditional map before it can be used to do so. - // libbpf names the inner map template '.inner', but we - // opted for _inner to simplify validation logic. (dots only supported - // on kernels 5.2 and up) - // Pass the BTF spec from the parent object, since both parent and - // child must be created from the same BTF blob (on kernels that support BTF). - innerMapSpec, err = mapSpecFromBTF(name+"_inner", t, true, spec) - if err != nil { - return nil, fmt.Errorf("can't parse BTF map definition of inner map: %w", err) - } - - default: - return nil, fmt.Errorf("unsupported value type %q in 'values' field", t) - } - - default: - return nil, fmt.Errorf("unrecognized field %s in BTF map definition", member.Name) - } - } - - bm := btf.NewMap(spec, key, value) - - return &MapSpec{ - Name: SanitizeName(name, -1), - Type: MapType(mapType), - KeySize: keySize, - ValueSize: valueSize, - MaxEntries: maxEntries, - Flags: flags, - BTF: &bm, - Pinning: pinType, - InnerMap: innerMapSpec, - }, nil -} - -// uintFromBTF resolves the __uint macro, which is a pointer to a sized -// array, e.g. for int (*foo)[10], this function will return 10. -func uintFromBTF(typ btf.Type) (uint32, error) { - ptr, ok := typ.(*btf.Pointer) - if !ok { - return 0, fmt.Errorf("not a pointer: %v", typ) - } - - arr, ok := ptr.Target.(*btf.Array) - if !ok { - return 0, fmt.Errorf("not a pointer to array: %v", typ) - } - - return arr.Nelems, nil -} - -// resolveBTFArrayMacro resolves the __array macro, which declares an array -// of pointers to a given type. This function returns the target Type of -// the pointers in the array. -func resolveBTFArrayMacro(typ btf.Type) (btf.Type, error) { - arr, ok := typ.(*btf.Array) - if !ok { - return nil, fmt.Errorf("not an array: %v", typ) - } - - ptr, ok := arr.Type.(*btf.Pointer) - if !ok { - return nil, fmt.Errorf("not an array of pointers: %v", typ) - } - - return ptr.Target, nil -} - -func (ec *elfCode) loadDataSections(maps map[string]*MapSpec) error { - for _, sec := range ec.sections { - if sec.kind != dataSection { - continue - } - - if sec.references == 0 { - // Prune data sections which are not referenced by any - // instructions. - continue - } - - if ec.btf == nil { - return errors.New("data sections require BTF, make sure all consts are marked as static") - } - - btfMap, err := ec.btf.Datasec(sec.Name) - if err != nil { - return err - } - - data, err := sec.Data() - if err != nil { - return fmt.Errorf("data section %s: can't get contents: %w", sec.Name, err) - } - - if uint64(len(data)) > math.MaxUint32 { - return fmt.Errorf("data section %s: contents exceed maximum size", sec.Name) - } - - mapSpec := &MapSpec{ - Name: SanitizeName(sec.Name, -1), - Type: Array, - KeySize: 4, - ValueSize: uint32(len(data)), - MaxEntries: 1, - Contents: []MapKV{{uint32(0), data}}, - BTF: btfMap, - } - - switch sec.Name { - case ".rodata": - mapSpec.Flags = unix.BPF_F_RDONLY_PROG - mapSpec.Freeze = true - case ".bss": - // The kernel already zero-initializes the map - mapSpec.Contents = nil - } - - maps[sec.Name] = mapSpec - } - return nil -} - -func getProgType(sectionName string) (ProgramType, AttachType, uint32, string) { - types := map[string]struct { - progType ProgramType - attachType AttachType - progFlags uint32 - }{ - // From https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/tools/lib/bpf/libbpf.c - "socket": {SocketFilter, AttachNone, 0}, - "seccomp": {SocketFilter, AttachNone, 0}, - "kprobe/": {Kprobe, AttachNone, 0}, - "uprobe/": {Kprobe, AttachNone, 0}, - "kretprobe/": {Kprobe, AttachNone, 0}, - "uretprobe/": {Kprobe, AttachNone, 0}, - "tracepoint/": {TracePoint, AttachNone, 0}, - "raw_tracepoint/": {RawTracepoint, AttachNone, 0}, - "raw_tp/": {RawTracepoint, AttachNone, 0}, - "tp_btf/": {Tracing, AttachTraceRawTp, 0}, - "xdp": {XDP, AttachNone, 0}, - "perf_event": {PerfEvent, AttachNone, 0}, - "lwt_in": {LWTIn, AttachNone, 0}, - "lwt_out": {LWTOut, AttachNone, 0}, - "lwt_xmit": {LWTXmit, AttachNone, 0}, - "lwt_seg6local": {LWTSeg6Local, AttachNone, 0}, - "sockops": {SockOps, AttachCGroupSockOps, 0}, - "sk_skb/stream_parser": {SkSKB, AttachSkSKBStreamParser, 0}, - "sk_skb/stream_verdict": {SkSKB, AttachSkSKBStreamParser, 0}, - "sk_msg": {SkMsg, AttachSkSKBStreamVerdict, 0}, - "lirc_mode2": {LircMode2, AttachLircMode2, 0}, - "flow_dissector": {FlowDissector, AttachFlowDissector, 0}, - "iter/": {Tracing, AttachTraceIter, 0}, - "fentry/": {Tracing, AttachTraceFEntry, 0}, - "fmod_ret/": {Tracing, AttachModifyReturn, 0}, - "fexit/": {Tracing, AttachTraceFExit, 0}, - "fentry.s/": {Tracing, AttachTraceFEntry, unix.BPF_F_SLEEPABLE}, - "fmod_ret.s/": {Tracing, AttachModifyReturn, unix.BPF_F_SLEEPABLE}, - "fexit.s/": {Tracing, AttachTraceFExit, unix.BPF_F_SLEEPABLE}, - "sk_lookup/": {SkLookup, AttachSkLookup, 0}, - "lsm/": {LSM, AttachLSMMac, 0}, - "lsm.s/": {LSM, AttachLSMMac, unix.BPF_F_SLEEPABLE}, - - "cgroup_skb/ingress": {CGroupSKB, AttachCGroupInetIngress, 0}, - "cgroup_skb/egress": {CGroupSKB, AttachCGroupInetEgress, 0}, - "cgroup/dev": {CGroupDevice, AttachCGroupDevice, 0}, - "cgroup/skb": {CGroupSKB, AttachNone, 0}, - "cgroup/sock": {CGroupSock, AttachCGroupInetSockCreate, 0}, - "cgroup/post_bind4": {CGroupSock, AttachCGroupInet4PostBind, 0}, - "cgroup/post_bind6": {CGroupSock, AttachCGroupInet6PostBind, 0}, - "cgroup/bind4": {CGroupSockAddr, AttachCGroupInet4Bind, 0}, - "cgroup/bind6": {CGroupSockAddr, AttachCGroupInet6Bind, 0}, - "cgroup/connect4": {CGroupSockAddr, AttachCGroupInet4Connect, 0}, - "cgroup/connect6": {CGroupSockAddr, AttachCGroupInet6Connect, 0}, - "cgroup/sendmsg4": {CGroupSockAddr, AttachCGroupUDP4Sendmsg, 0}, - "cgroup/sendmsg6": {CGroupSockAddr, AttachCGroupUDP6Sendmsg, 0}, - "cgroup/recvmsg4": {CGroupSockAddr, AttachCGroupUDP4Recvmsg, 0}, - "cgroup/recvmsg6": {CGroupSockAddr, AttachCGroupUDP6Recvmsg, 0}, - "cgroup/sysctl": {CGroupSysctl, AttachCGroupSysctl, 0}, - "cgroup/getsockopt": {CGroupSockopt, AttachCGroupGetsockopt, 0}, - "cgroup/setsockopt": {CGroupSockopt, AttachCGroupSetsockopt, 0}, - "classifier": {SchedCLS, AttachNone, 0}, - "action": {SchedACT, AttachNone, 0}, - } - - for prefix, t := range types { - if !strings.HasPrefix(sectionName, prefix) { - continue - } - - if !strings.HasSuffix(prefix, "/") { - return t.progType, t.attachType, t.progFlags, "" - } - - return t.progType, t.attachType, t.progFlags, sectionName[len(prefix):] - } - - return UnspecifiedProgram, AttachNone, 0, "" -} - -func (ec *elfCode) loadRelocations(sec *elf.Section, symbols []elf.Symbol) (map[uint64]elf.Symbol, error) { - rels := make(map[uint64]elf.Symbol) - - if sec.Entsize < 16 { - return nil, fmt.Errorf("section %s: relocations are less than 16 bytes", sec.Name) - } - - r := bufio.NewReader(sec.Open()) - for off := uint64(0); off < sec.Size; off += sec.Entsize { - ent := io.LimitReader(r, int64(sec.Entsize)) - - var rel elf.Rel64 - if binary.Read(ent, ec.ByteOrder, &rel) != nil { - return nil, fmt.Errorf("can't parse relocation at offset %v", off) - } - - symNo := int(elf.R_SYM64(rel.Info) - 1) - if symNo >= len(symbols) { - return nil, fmt.Errorf("offset %d: symbol %d doesn't exist", off, symNo) - } - - symbol := symbols[symNo] - rels[rel.Off] = symbol - } - - return rels, nil -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/elf_reader_fuzz.go b/src/runtime/vendor/github.com/cilium/ebpf/elf_reader_fuzz.go deleted file mode 100644 index d46d135f2f..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/elf_reader_fuzz.go +++ /dev/null @@ -1,21 +0,0 @@ -// +build gofuzz - -// Use with https://github.com/dvyukov/go-fuzz - -package ebpf - -import "bytes" - -func FuzzLoadCollectionSpec(data []byte) int { - spec, err := LoadCollectionSpecFromReader(bytes.NewReader(data)) - if err != nil { - if spec != nil { - panic("spec is not nil") - } - return 0 - } - if spec == nil { - panic("spec is nil") - } - return 1 -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/go.mod b/src/runtime/vendor/github.com/cilium/ebpf/go.mod deleted file mode 100644 index df8139621c..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/go.mod +++ /dev/null @@ -1,9 +0,0 @@ -module github.com/cilium/ebpf - -go 1.15 - -require ( - github.com/frankban/quicktest v1.11.3 - github.com/google/go-cmp v0.5.4 - golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c -) diff --git a/src/runtime/vendor/github.com/cilium/ebpf/go.sum b/src/runtime/vendor/github.com/cilium/ebpf/go.sum deleted file mode 100644 index a5039262aa..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/go.sum +++ /dev/null @@ -1,13 +0,0 @@ -github.com/frankban/quicktest v1.11.3 h1:8sXhOn0uLys67V8EsXLc6eszDs8VXWxL3iRvebPhedY= -github.com/frankban/quicktest v1.11.3/go.mod h1:wRf/ReqHper53s+kmmSZizM8NamnL3IM0I9ntUbOk+k= -github.com/google/go-cmp v0.5.4 h1:L8R9j+yAqZuZjsqh/z+F1NCffTKKLShY6zXTItVIZ8M= -github.com/google/go-cmp v0.5.4/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/kr/pretty v0.2.1 h1:Fmg33tUaq4/8ym9TJN1x7sLJnHVwhP33CNkpYV/7rwI= -github.com/kr/pretty v0.2.1/go.mod h1:ipq/a2n7PKx3OHsz4KJII5eveXtPO4qwEXGdVfWzfnI= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c h1:VwygUrnw9jn88c4u8GD3rZQbqrP/tgas88tPUbBxQrk= -golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/src/runtime/vendor/github.com/cilium/ebpf/info.go b/src/runtime/vendor/github.com/cilium/ebpf/info.go deleted file mode 100644 index b95131ef57..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/info.go +++ /dev/null @@ -1,239 +0,0 @@ -package ebpf - -import ( - "bufio" - "encoding/hex" - "errors" - "fmt" - "io" - "os" - "strings" - "syscall" - "time" - - "github.com/cilium/ebpf/internal" -) - -// MapInfo describes a map. -type MapInfo struct { - Type MapType - id MapID - KeySize uint32 - ValueSize uint32 - MaxEntries uint32 - Flags uint32 - // Name as supplied by user space at load time. - Name string -} - -func newMapInfoFromFd(fd *internal.FD) (*MapInfo, error) { - info, err := bpfGetMapInfoByFD(fd) - if errors.Is(err, syscall.EINVAL) { - return newMapInfoFromProc(fd) - } - if err != nil { - return nil, err - } - - return &MapInfo{ - MapType(info.map_type), - MapID(info.id), - info.key_size, - info.value_size, - info.max_entries, - info.map_flags, - // name is available from 4.15. - internal.CString(info.name[:]), - }, nil -} - -func newMapInfoFromProc(fd *internal.FD) (*MapInfo, error) { - var mi MapInfo - err := scanFdInfo(fd, map[string]interface{}{ - "map_type": &mi.Type, - "key_size": &mi.KeySize, - "value_size": &mi.ValueSize, - "max_entries": &mi.MaxEntries, - "map_flags": &mi.Flags, - }) - if err != nil { - return nil, err - } - return &mi, nil -} - -// ID returns the map ID. -// -// Available from 4.13. -// -// The bool return value indicates whether this optional field is available. -func (mi *MapInfo) ID() (MapID, bool) { - return mi.id, mi.id > 0 -} - -// programStats holds statistics of a program. -type programStats struct { - // Total accumulated runtime of the program ins ns. - runtime time.Duration - // Total number of times the program was called. - runCount uint64 -} - -// ProgramInfo describes a program. -type ProgramInfo struct { - Type ProgramType - id ProgramID - // Truncated hash of the BPF bytecode. - Tag string - // Name as supplied by user space at load time. - Name string - - stats *programStats -} - -func newProgramInfoFromFd(fd *internal.FD) (*ProgramInfo, error) { - info, err := bpfGetProgInfoByFD(fd) - if errors.Is(err, syscall.EINVAL) { - return newProgramInfoFromProc(fd) - } - if err != nil { - return nil, err - } - - return &ProgramInfo{ - Type: ProgramType(info.prog_type), - id: ProgramID(info.id), - // tag is available if the kernel supports BPF_PROG_GET_INFO_BY_FD. - Tag: hex.EncodeToString(info.tag[:]), - // name is available from 4.15. - Name: internal.CString(info.name[:]), - stats: &programStats{ - runtime: time.Duration(info.run_time_ns), - runCount: info.run_cnt, - }, - }, nil -} - -func newProgramInfoFromProc(fd *internal.FD) (*ProgramInfo, error) { - var info ProgramInfo - err := scanFdInfo(fd, map[string]interface{}{ - "prog_type": &info.Type, - "prog_tag": &info.Tag, - }) - if errors.Is(err, errMissingFields) { - return nil, &internal.UnsupportedFeatureError{ - Name: "reading program info from /proc/self/fdinfo", - MinimumVersion: internal.Version{4, 10, 0}, - } - } - if err != nil { - return nil, err - } - - return &info, nil -} - -// ID returns the program ID. -// -// Available from 4.13. -// -// The bool return value indicates whether this optional field is available. -func (pi *ProgramInfo) ID() (ProgramID, bool) { - return pi.id, pi.id > 0 -} - -// RunCount returns the total number of times the program was called. -// -// Can return 0 if the collection of statistics is not enabled. See EnableStats(). -// The bool return value indicates whether this optional field is available. -func (pi *ProgramInfo) RunCount() (uint64, bool) { - if pi.stats != nil { - return pi.stats.runCount, true - } - return 0, false -} - -// Runtime returns the total accumulated runtime of the program. -// -// Can return 0 if the collection of statistics is not enabled. See EnableStats(). -// The bool return value indicates whether this optional field is available. -func (pi *ProgramInfo) Runtime() (time.Duration, bool) { - if pi.stats != nil { - return pi.stats.runtime, true - } - return time.Duration(0), false -} - -func scanFdInfo(fd *internal.FD, fields map[string]interface{}) error { - raw, err := fd.Value() - if err != nil { - return err - } - - fh, err := os.Open(fmt.Sprintf("/proc/self/fdinfo/%d", raw)) - if err != nil { - return err - } - defer fh.Close() - - if err := scanFdInfoReader(fh, fields); err != nil { - return fmt.Errorf("%s: %w", fh.Name(), err) - } - return nil -} - -var errMissingFields = errors.New("missing fields") - -func scanFdInfoReader(r io.Reader, fields map[string]interface{}) error { - var ( - scanner = bufio.NewScanner(r) - scanned int - ) - - for scanner.Scan() { - parts := strings.SplitN(scanner.Text(), "\t", 2) - if len(parts) != 2 { - continue - } - - name := strings.TrimSuffix(parts[0], ":") - field, ok := fields[string(name)] - if !ok { - continue - } - - if n, err := fmt.Sscanln(parts[1], field); err != nil || n != 1 { - return fmt.Errorf("can't parse field %s: %v", name, err) - } - - scanned++ - } - - if err := scanner.Err(); err != nil { - return err - } - - if scanned != len(fields) { - return errMissingFields - } - - return nil -} - -// EnableStats starts the measuring of the runtime -// and run counts of eBPF programs. -// -// Collecting statistics can have an impact on the performance. -// -// Requires at least 5.8. -func EnableStats(which uint32) (io.Closer, error) { - attr := internal.BPFEnableStatsAttr{ - StatsType: which, - } - - fd, err := internal.BPFEnableStats(&attr) - if err != nil { - return nil, err - } - return fd, nil -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/btf.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/btf.go deleted file mode 100644 index 5da9e11921..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/btf.go +++ /dev/null @@ -1,799 +0,0 @@ -package btf - -import ( - "bytes" - "debug/elf" - "encoding/binary" - "errors" - "fmt" - "io" - "io/ioutil" - "math" - "os" - "reflect" - "sync" - "unsafe" - - "github.com/cilium/ebpf/internal" - "github.com/cilium/ebpf/internal/unix" -) - -const btfMagic = 0xeB9F - -// Errors returned by BTF functions. -var ( - ErrNotSupported = internal.ErrNotSupported - ErrNotFound = errors.New("not found") - ErrNoExtendedInfo = errors.New("no extended info") -) - -// Spec represents decoded BTF. -type Spec struct { - rawTypes []rawType - strings stringTable - types []Type - namedTypes map[string][]namedType - funcInfos map[string]extInfo - lineInfos map[string]extInfo - coreRelos map[string]coreRelos - byteOrder binary.ByteOrder -} - -type btfHeader struct { - Magic uint16 - Version uint8 - Flags uint8 - HdrLen uint32 - - TypeOff uint32 - TypeLen uint32 - StringOff uint32 - StringLen uint32 -} - -// LoadSpecFromReader reads BTF sections from an ELF. -// -// Returns ErrNotFound if the reader contains no BTF. -func LoadSpecFromReader(rd io.ReaderAt) (*Spec, error) { - file, err := internal.NewSafeELFFile(rd) - if err != nil { - return nil, err - } - defer file.Close() - - btfSection, btfExtSection, sectionSizes, err := findBtfSections(file) - if err != nil { - return nil, err - } - - if btfSection == nil { - return nil, fmt.Errorf("btf: %w", ErrNotFound) - } - - symbols, err := file.Symbols() - if err != nil { - return nil, fmt.Errorf("can't read symbols: %v", err) - } - - variableOffsets := make(map[variable]uint32) - for _, symbol := range symbols { - if idx := symbol.Section; idx >= elf.SHN_LORESERVE && idx <= elf.SHN_HIRESERVE { - // Ignore things like SHN_ABS - continue - } - - if int(symbol.Section) >= len(file.Sections) { - return nil, fmt.Errorf("symbol %s: invalid section %d", symbol.Name, symbol.Section) - } - - secName := file.Sections[symbol.Section].Name - if _, ok := sectionSizes[secName]; !ok { - continue - } - - if symbol.Value > math.MaxUint32 { - return nil, fmt.Errorf("section %s: symbol %s: size exceeds maximum", secName, symbol.Name) - } - - variableOffsets[variable{secName, symbol.Name}] = uint32(symbol.Value) - } - - spec, err := loadNakedSpec(btfSection.Open(), file.ByteOrder, sectionSizes, variableOffsets) - if err != nil { - return nil, err - } - - if btfExtSection == nil { - return spec, nil - } - - spec.funcInfos, spec.lineInfos, spec.coreRelos, err = parseExtInfos(btfExtSection.Open(), file.ByteOrder, spec.strings) - if err != nil { - return nil, fmt.Errorf("can't read ext info: %w", err) - } - - return spec, nil -} - -func findBtfSections(file *internal.SafeELFFile) (*elf.Section, *elf.Section, map[string]uint32, error) { - var ( - btfSection *elf.Section - btfExtSection *elf.Section - sectionSizes = make(map[string]uint32) - ) - - for _, sec := range file.Sections { - switch sec.Name { - case ".BTF": - btfSection = sec - case ".BTF.ext": - btfExtSection = sec - default: - if sec.Type != elf.SHT_PROGBITS && sec.Type != elf.SHT_NOBITS { - break - } - - if sec.Size > math.MaxUint32 { - return nil, nil, nil, fmt.Errorf("section %s exceeds maximum size", sec.Name) - } - - sectionSizes[sec.Name] = uint32(sec.Size) - } - } - return btfSection, btfExtSection, sectionSizes, nil -} - -func loadSpecFromVmlinux(rd io.ReaderAt) (*Spec, error) { - file, err := internal.NewSafeELFFile(rd) - if err != nil { - return nil, err - } - defer file.Close() - - btfSection, _, _, err := findBtfSections(file) - if err != nil { - return nil, fmt.Errorf(".BTF ELF section: %s", err) - } - if btfSection == nil { - return nil, fmt.Errorf("unable to find .BTF ELF section") - } - return loadNakedSpec(btfSection.Open(), file.ByteOrder, nil, nil) -} - -func loadNakedSpec(btf io.ReadSeeker, bo binary.ByteOrder, sectionSizes map[string]uint32, variableOffsets map[variable]uint32) (*Spec, error) { - rawTypes, rawStrings, err := parseBTF(btf, bo) - if err != nil { - return nil, err - } - - err = fixupDatasec(rawTypes, rawStrings, sectionSizes, variableOffsets) - if err != nil { - return nil, err - } - - types, typesByName, err := inflateRawTypes(rawTypes, rawStrings) - if err != nil { - return nil, err - } - - return &Spec{ - rawTypes: rawTypes, - namedTypes: typesByName, - types: types, - strings: rawStrings, - byteOrder: bo, - }, nil -} - -var kernelBTF struct { - sync.Mutex - *Spec -} - -// LoadKernelSpec returns the current kernel's BTF information. -// -// Requires a >= 5.5 kernel with CONFIG_DEBUG_INFO_BTF enabled. Returns -// ErrNotSupported if BTF is not enabled. -func LoadKernelSpec() (*Spec, error) { - kernelBTF.Lock() - defer kernelBTF.Unlock() - - if kernelBTF.Spec != nil { - return kernelBTF.Spec, nil - } - - var err error - kernelBTF.Spec, err = loadKernelSpec() - return kernelBTF.Spec, err -} - -func loadKernelSpec() (*Spec, error) { - release, err := unix.KernelRelease() - if err != nil { - return nil, fmt.Errorf("can't read kernel release number: %w", err) - } - - fh, err := os.Open("/sys/kernel/btf/vmlinux") - if err == nil { - defer fh.Close() - - return loadNakedSpec(fh, internal.NativeEndian, nil, nil) - } - - // use same list of locations as libbpf - // https://github.com/libbpf/libbpf/blob/9a3a42608dbe3731256a5682a125ac1e23bced8f/src/btf.c#L3114-L3122 - locations := []string{ - "/boot/vmlinux-%s", - "/lib/modules/%s/vmlinux-%[1]s", - "/lib/modules/%s/build/vmlinux", - "/usr/lib/modules/%s/kernel/vmlinux", - "/usr/lib/debug/boot/vmlinux-%s", - "/usr/lib/debug/boot/vmlinux-%s.debug", - "/usr/lib/debug/lib/modules/%s/vmlinux", - } - - for _, loc := range locations { - path := fmt.Sprintf(loc, release) - - fh, err := os.Open(path) - if err != nil { - continue - } - defer fh.Close() - - return loadSpecFromVmlinux(fh) - } - - return nil, fmt.Errorf("no BTF for kernel version %s: %w", release, internal.ErrNotSupported) -} - -func parseBTF(btf io.ReadSeeker, bo binary.ByteOrder) ([]rawType, stringTable, error) { - rawBTF, err := ioutil.ReadAll(btf) - if err != nil { - return nil, nil, fmt.Errorf("can't read BTF: %v", err) - } - - rd := bytes.NewReader(rawBTF) - - var header btfHeader - if err := binary.Read(rd, bo, &header); err != nil { - return nil, nil, fmt.Errorf("can't read header: %v", err) - } - - if header.Magic != btfMagic { - return nil, nil, fmt.Errorf("incorrect magic value %v", header.Magic) - } - - if header.Version != 1 { - return nil, nil, fmt.Errorf("unexpected version %v", header.Version) - } - - if header.Flags != 0 { - return nil, nil, fmt.Errorf("unsupported flags %v", header.Flags) - } - - remainder := int64(header.HdrLen) - int64(binary.Size(&header)) - if remainder < 0 { - return nil, nil, errors.New("header is too short") - } - - if _, err := io.CopyN(internal.DiscardZeroes{}, rd, remainder); err != nil { - return nil, nil, fmt.Errorf("header padding: %v", err) - } - - if _, err := rd.Seek(int64(header.HdrLen+header.StringOff), io.SeekStart); err != nil { - return nil, nil, fmt.Errorf("can't seek to start of string section: %v", err) - } - - rawStrings, err := readStringTable(io.LimitReader(rd, int64(header.StringLen))) - if err != nil { - return nil, nil, fmt.Errorf("can't read type names: %w", err) - } - - if _, err := rd.Seek(int64(header.HdrLen+header.TypeOff), io.SeekStart); err != nil { - return nil, nil, fmt.Errorf("can't seek to start of type section: %v", err) - } - - rawTypes, err := readTypes(io.LimitReader(rd, int64(header.TypeLen)), bo) - if err != nil { - return nil, nil, fmt.Errorf("can't read types: %w", err) - } - - return rawTypes, rawStrings, nil -} - -type variable struct { - section string - name string -} - -func fixupDatasec(rawTypes []rawType, rawStrings stringTable, sectionSizes map[string]uint32, variableOffsets map[variable]uint32) error { - for i, rawType := range rawTypes { - if rawType.Kind() != kindDatasec { - continue - } - - name, err := rawStrings.Lookup(rawType.NameOff) - if err != nil { - return err - } - - if name == ".kconfig" || name == ".ksyms" { - return fmt.Errorf("reference to %s: %w", name, ErrNotSupported) - } - - if rawTypes[i].SizeType != 0 { - continue - } - - size, ok := sectionSizes[name] - if !ok { - return fmt.Errorf("data section %s: missing size", name) - } - - rawTypes[i].SizeType = size - - secinfos := rawType.data.([]btfVarSecinfo) - for j, secInfo := range secinfos { - id := int(secInfo.Type - 1) - if id >= len(rawTypes) { - return fmt.Errorf("data section %s: invalid type id %d for variable %d", name, id, j) - } - - varName, err := rawStrings.Lookup(rawTypes[id].NameOff) - if err != nil { - return fmt.Errorf("data section %s: can't get name for type %d: %w", name, id, err) - } - - offset, ok := variableOffsets[variable{name, varName}] - if !ok { - return fmt.Errorf("data section %s: missing offset for variable %s", name, varName) - } - - secinfos[j].Offset = offset - } - } - - return nil -} - -type marshalOpts struct { - ByteOrder binary.ByteOrder - StripFuncLinkage bool -} - -func (s *Spec) marshal(opts marshalOpts) ([]byte, error) { - var ( - buf bytes.Buffer - header = new(btfHeader) - headerLen = binary.Size(header) - ) - - // Reserve space for the header. We have to write it last since - // we don't know the size of the type section yet. - _, _ = buf.Write(make([]byte, headerLen)) - - // Write type section, just after the header. - for _, raw := range s.rawTypes { - switch { - case opts.StripFuncLinkage && raw.Kind() == kindFunc: - raw.SetLinkage(StaticFunc) - } - - if err := raw.Marshal(&buf, opts.ByteOrder); err != nil { - return nil, fmt.Errorf("can't marshal BTF: %w", err) - } - } - - typeLen := uint32(buf.Len() - headerLen) - - // Write string section after type section. - _, _ = buf.Write(s.strings) - - // Fill out the header, and write it out. - header = &btfHeader{ - Magic: btfMagic, - Version: 1, - Flags: 0, - HdrLen: uint32(headerLen), - TypeOff: 0, - TypeLen: typeLen, - StringOff: typeLen, - StringLen: uint32(len(s.strings)), - } - - raw := buf.Bytes() - err := binary.Write(sliceWriter(raw[:headerLen]), opts.ByteOrder, header) - if err != nil { - return nil, fmt.Errorf("can't write header: %v", err) - } - - return raw, nil -} - -type sliceWriter []byte - -func (sw sliceWriter) Write(p []byte) (int, error) { - if len(p) != len(sw) { - return 0, errors.New("size doesn't match") - } - - return copy(sw, p), nil -} - -// Program finds the BTF for a specific section. -// -// Length is the number of bytes in the raw BPF instruction stream. -// -// Returns an error which may wrap ErrNoExtendedInfo if the Spec doesn't -// contain extended BTF info. -func (s *Spec) Program(name string, length uint64) (*Program, error) { - if length == 0 { - return nil, errors.New("length musn't be zero") - } - - if s.funcInfos == nil && s.lineInfos == nil && s.coreRelos == nil { - return nil, fmt.Errorf("BTF for section %s: %w", name, ErrNoExtendedInfo) - } - - funcInfos, funcOK := s.funcInfos[name] - lineInfos, lineOK := s.lineInfos[name] - relos, coreOK := s.coreRelos[name] - - if !funcOK && !lineOK && !coreOK { - return nil, fmt.Errorf("no extended BTF info for section %s", name) - } - - return &Program{s, length, funcInfos, lineInfos, relos}, nil -} - -// Datasec returns the BTF required to create maps which represent data sections. -func (s *Spec) Datasec(name string) (*Map, error) { - var datasec Datasec - if err := s.FindType(name, &datasec); err != nil { - return nil, fmt.Errorf("data section %s: can't get BTF: %w", name, err) - } - - m := NewMap(s, &Void{}, &datasec) - return &m, nil -} - -// FindType searches for a type with a specific name. -// -// hint determines the type of the returned Type. -// -// Returns an error wrapping ErrNotFound if no matching -// type exists in spec. -func (s *Spec) FindType(name string, typ Type) error { - var ( - wanted = reflect.TypeOf(typ) - candidate Type - ) - - for _, typ := range s.namedTypes[essentialName(name)] { - if reflect.TypeOf(typ) != wanted { - continue - } - - // Match against the full name, not just the essential one. - if typ.name() != name { - continue - } - - if candidate != nil { - return fmt.Errorf("type %s: multiple candidates for %T", name, typ) - } - - candidate = typ - } - - if candidate == nil { - return fmt.Errorf("type %s: %w", name, ErrNotFound) - } - - cpy, _ := copyType(candidate, nil) - value := reflect.Indirect(reflect.ValueOf(cpy)) - reflect.Indirect(reflect.ValueOf(typ)).Set(value) - return nil -} - -// Handle is a reference to BTF loaded into the kernel. -type Handle struct { - fd *internal.FD -} - -// NewHandle loads BTF into the kernel. -// -// Returns ErrNotSupported if BTF is not supported. -func NewHandle(spec *Spec) (*Handle, error) { - if err := haveBTF(); err != nil { - return nil, err - } - - if spec.byteOrder != internal.NativeEndian { - return nil, fmt.Errorf("can't load %s BTF on %s", spec.byteOrder, internal.NativeEndian) - } - - btf, err := spec.marshal(marshalOpts{ - ByteOrder: internal.NativeEndian, - StripFuncLinkage: haveFuncLinkage() != nil, - }) - if err != nil { - return nil, fmt.Errorf("can't marshal BTF: %w", err) - } - - if uint64(len(btf)) > math.MaxUint32 { - return nil, errors.New("BTF exceeds the maximum size") - } - - attr := &bpfLoadBTFAttr{ - btf: internal.NewSlicePointer(btf), - btfSize: uint32(len(btf)), - } - - fd, err := bpfLoadBTF(attr) - if err != nil { - logBuf := make([]byte, 64*1024) - attr.logBuf = internal.NewSlicePointer(logBuf) - attr.btfLogSize = uint32(len(logBuf)) - attr.btfLogLevel = 1 - _, logErr := bpfLoadBTF(attr) - return nil, internal.ErrorWithLog(err, logBuf, logErr) - } - - return &Handle{fd}, nil -} - -// Close destroys the handle. -// -// Subsequent calls to FD will return an invalid value. -func (h *Handle) Close() error { - return h.fd.Close() -} - -// FD returns the file descriptor for the handle. -func (h *Handle) FD() int { - value, err := h.fd.Value() - if err != nil { - return -1 - } - - return int(value) -} - -// Map is the BTF for a map. -type Map struct { - spec *Spec - key, value Type -} - -// NewMap returns a new Map containing the given values. -// The key and value arguments are initialized to Void if nil values are given. -func NewMap(spec *Spec, key Type, value Type) Map { - if key == nil { - key = &Void{} - } - if value == nil { - value = &Void{} - } - - return Map{ - spec: spec, - key: key, - value: value, - } -} - -// MapSpec should be a method on Map, but is a free function -// to hide it from users of the ebpf package. -func MapSpec(m *Map) *Spec { - return m.spec -} - -// MapKey should be a method on Map, but is a free function -// to hide it from users of the ebpf package. -func MapKey(m *Map) Type { - return m.key -} - -// MapValue should be a method on Map, but is a free function -// to hide it from users of the ebpf package. -func MapValue(m *Map) Type { - return m.value -} - -// Program is the BTF information for a stream of instructions. -type Program struct { - spec *Spec - length uint64 - funcInfos, lineInfos extInfo - coreRelos coreRelos -} - -// ProgramSpec returns the Spec needed for loading function and line infos into the kernel. -// -// This is a free function instead of a method to hide it from users -// of package ebpf. -func ProgramSpec(s *Program) *Spec { - return s.spec -} - -// ProgramAppend the information from other to the Program. -// -// This is a free function instead of a method to hide it from users -// of package ebpf. -func ProgramAppend(s, other *Program) error { - funcInfos, err := s.funcInfos.append(other.funcInfos, s.length) - if err != nil { - return fmt.Errorf("func infos: %w", err) - } - - lineInfos, err := s.lineInfos.append(other.lineInfos, s.length) - if err != nil { - return fmt.Errorf("line infos: %w", err) - } - - s.funcInfos = funcInfos - s.lineInfos = lineInfos - s.coreRelos = s.coreRelos.append(other.coreRelos, s.length) - s.length += other.length - return nil -} - -// ProgramFuncInfos returns the binary form of BTF function infos. -// -// This is a free function instead of a method to hide it from users -// of package ebpf. -func ProgramFuncInfos(s *Program) (recordSize uint32, bytes []byte, err error) { - bytes, err = s.funcInfos.MarshalBinary() - if err != nil { - return 0, nil, err - } - - return s.funcInfos.recordSize, bytes, nil -} - -// ProgramLineInfos returns the binary form of BTF line infos. -// -// This is a free function instead of a method to hide it from users -// of package ebpf. -func ProgramLineInfos(s *Program) (recordSize uint32, bytes []byte, err error) { - bytes, err = s.lineInfos.MarshalBinary() - if err != nil { - return 0, nil, err - } - - return s.lineInfos.recordSize, bytes, nil -} - -// ProgramFixups returns the changes required to adjust the program to the target. -// -// This is a free function instead of a method to hide it from users -// of package ebpf. -func ProgramFixups(s *Program, target *Spec) (COREFixups, error) { - if len(s.coreRelos) == 0 { - return nil, nil - } - - if target == nil { - var err error - target, err = LoadKernelSpec() - if err != nil { - return nil, err - } - } - - return coreRelocate(s.spec, target, s.coreRelos) -} - -type bpfLoadBTFAttr struct { - btf internal.Pointer - logBuf internal.Pointer - btfSize uint32 - btfLogSize uint32 - btfLogLevel uint32 -} - -func bpfLoadBTF(attr *bpfLoadBTFAttr) (*internal.FD, error) { - fd, err := internal.BPF(internal.BPF_BTF_LOAD, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) - if err != nil { - return nil, err - } - - return internal.NewFD(uint32(fd)), nil -} - -func marshalBTF(types interface{}, strings []byte, bo binary.ByteOrder) []byte { - const minHeaderLength = 24 - - typesLen := uint32(binary.Size(types)) - header := btfHeader{ - Magic: btfMagic, - Version: 1, - HdrLen: minHeaderLength, - TypeOff: 0, - TypeLen: typesLen, - StringOff: typesLen, - StringLen: uint32(len(strings)), - } - - buf := new(bytes.Buffer) - _ = binary.Write(buf, bo, &header) - _ = binary.Write(buf, bo, types) - buf.Write(strings) - - return buf.Bytes() -} - -var haveBTF = internal.FeatureTest("BTF", "5.1", func() error { - var ( - types struct { - Integer btfType - Var btfType - btfVar struct{ Linkage uint32 } - } - strings = []byte{0, 'a', 0} - ) - - // We use a BTF_KIND_VAR here, to make sure that - // the kernel understands BTF at least as well as we - // do. BTF_KIND_VAR was introduced ~5.1. - types.Integer.SetKind(kindPointer) - types.Var.NameOff = 1 - types.Var.SetKind(kindVar) - types.Var.SizeType = 1 - - btf := marshalBTF(&types, strings, internal.NativeEndian) - - fd, err := bpfLoadBTF(&bpfLoadBTFAttr{ - btf: internal.NewSlicePointer(btf), - btfSize: uint32(len(btf)), - }) - if errors.Is(err, unix.EINVAL) || errors.Is(err, unix.EPERM) { - // Treat both EINVAL and EPERM as not supported: loading the program - // might still succeed without BTF. - return internal.ErrNotSupported - } - if err != nil { - return err - } - - fd.Close() - return nil -}) - -var haveFuncLinkage = internal.FeatureTest("BTF func linkage", "5.6", func() error { - if err := haveBTF(); err != nil { - return err - } - - var ( - types struct { - FuncProto btfType - Func btfType - } - strings = []byte{0, 'a', 0} - ) - - types.FuncProto.SetKind(kindFuncProto) - types.Func.SetKind(kindFunc) - types.Func.SizeType = 1 // aka FuncProto - types.Func.NameOff = 1 - types.Func.SetLinkage(GlobalFunc) - - btf := marshalBTF(&types, strings, internal.NativeEndian) - - fd, err := bpfLoadBTF(&bpfLoadBTFAttr{ - btf: internal.NewSlicePointer(btf), - btfSize: uint32(len(btf)), - }) - if errors.Is(err, unix.EINVAL) { - return internal.ErrNotSupported - } - if err != nil { - return err - } - - fd.Close() - return nil -}) diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/btf_types.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/btf_types.go deleted file mode 100644 index a5ef945120..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/btf_types.go +++ /dev/null @@ -1,282 +0,0 @@ -package btf - -import ( - "encoding/binary" - "fmt" - "io" -) - -//go:generate stringer -linecomment -output=btf_types_string.go -type=FuncLinkage,VarLinkage - -// btfKind describes a Type. -type btfKind uint8 - -// Equivalents of the BTF_KIND_* constants. -const ( - kindUnknown btfKind = iota - kindInt - kindPointer - kindArray - kindStruct - kindUnion - kindEnum - kindForward - kindTypedef - kindVolatile - kindConst - kindRestrict - // Added ~4.20 - kindFunc - kindFuncProto - // Added ~5.1 - kindVar - kindDatasec -) - -// FuncLinkage describes BTF function linkage metadata. -type FuncLinkage int - -// Equivalent of enum btf_func_linkage. -const ( - StaticFunc FuncLinkage = iota // static - GlobalFunc // global - ExternFunc // extern -) - -// VarLinkage describes BTF variable linkage metadata. -type VarLinkage int - -const ( - StaticVar VarLinkage = iota // static - GlobalVar // global - ExternVar // extern -) - -const ( - btfTypeKindShift = 24 - btfTypeKindLen = 4 - btfTypeVlenShift = 0 - btfTypeVlenMask = 16 - btfTypeKindFlagShift = 31 - btfTypeKindFlagMask = 1 -) - -// btfType is equivalent to struct btf_type in Documentation/bpf/btf.rst. -type btfType struct { - NameOff uint32 - /* "info" bits arrangement - * bits 0-15: vlen (e.g. # of struct's members), linkage - * bits 16-23: unused - * bits 24-27: kind (e.g. int, ptr, array...etc) - * bits 28-30: unused - * bit 31: kind_flag, currently used by - * struct, union and fwd - */ - Info uint32 - /* "size" is used by INT, ENUM, STRUCT and UNION. - * "size" tells the size of the type it is describing. - * - * "type" is used by PTR, TYPEDEF, VOLATILE, CONST, RESTRICT, - * FUNC and FUNC_PROTO. - * "type" is a type_id referring to another type. - */ - SizeType uint32 -} - -func (k btfKind) String() string { - switch k { - case kindUnknown: - return "Unknown" - case kindInt: - return "Integer" - case kindPointer: - return "Pointer" - case kindArray: - return "Array" - case kindStruct: - return "Struct" - case kindUnion: - return "Union" - case kindEnum: - return "Enumeration" - case kindForward: - return "Forward" - case kindTypedef: - return "Typedef" - case kindVolatile: - return "Volatile" - case kindConst: - return "Const" - case kindRestrict: - return "Restrict" - case kindFunc: - return "Function" - case kindFuncProto: - return "Function Proto" - case kindVar: - return "Variable" - case kindDatasec: - return "Section" - default: - return fmt.Sprintf("Unknown (%d)", k) - } -} - -func mask(len uint32) uint32 { - return (1 << len) - 1 -} - -func (bt *btfType) info(len, shift uint32) uint32 { - return (bt.Info >> shift) & mask(len) -} - -func (bt *btfType) setInfo(value, len, shift uint32) { - bt.Info &^= mask(len) << shift - bt.Info |= (value & mask(len)) << shift -} - -func (bt *btfType) Kind() btfKind { - return btfKind(bt.info(btfTypeKindLen, btfTypeKindShift)) -} - -func (bt *btfType) SetKind(kind btfKind) { - bt.setInfo(uint32(kind), btfTypeKindLen, btfTypeKindShift) -} - -func (bt *btfType) Vlen() int { - return int(bt.info(btfTypeVlenMask, btfTypeVlenShift)) -} - -func (bt *btfType) SetVlen(vlen int) { - bt.setInfo(uint32(vlen), btfTypeVlenMask, btfTypeVlenShift) -} - -func (bt *btfType) KindFlag() bool { - return bt.info(btfTypeKindFlagMask, btfTypeKindFlagShift) == 1 -} - -func (bt *btfType) Linkage() FuncLinkage { - return FuncLinkage(bt.info(btfTypeVlenMask, btfTypeVlenShift)) -} - -func (bt *btfType) SetLinkage(linkage FuncLinkage) { - bt.setInfo(uint32(linkage), btfTypeVlenMask, btfTypeVlenShift) -} - -func (bt *btfType) Type() TypeID { - // TODO: Panic here if wrong kind? - return TypeID(bt.SizeType) -} - -func (bt *btfType) Size() uint32 { - // TODO: Panic here if wrong kind? - return bt.SizeType -} - -type rawType struct { - btfType - data interface{} -} - -func (rt *rawType) Marshal(w io.Writer, bo binary.ByteOrder) error { - if err := binary.Write(w, bo, &rt.btfType); err != nil { - return err - } - - if rt.data == nil { - return nil - } - - return binary.Write(w, bo, rt.data) -} - -type btfArray struct { - Type TypeID - IndexType TypeID - Nelems uint32 -} - -type btfMember struct { - NameOff uint32 - Type TypeID - Offset uint32 -} - -type btfVarSecinfo struct { - Type TypeID - Offset uint32 - Size uint32 -} - -type btfVariable struct { - Linkage uint32 -} - -type btfEnum struct { - NameOff uint32 - Val int32 -} - -type btfParam struct { - NameOff uint32 - Type TypeID -} - -func readTypes(r io.Reader, bo binary.ByteOrder) ([]rawType, error) { - var ( - header btfType - types []rawType - ) - - for id := TypeID(1); ; id++ { - if err := binary.Read(r, bo, &header); err == io.EOF { - return types, nil - } else if err != nil { - return nil, fmt.Errorf("can't read type info for id %v: %v", id, err) - } - - var data interface{} - switch header.Kind() { - case kindInt: - data = new(uint32) - case kindPointer: - case kindArray: - data = new(btfArray) - case kindStruct: - fallthrough - case kindUnion: - data = make([]btfMember, header.Vlen()) - case kindEnum: - data = make([]btfEnum, header.Vlen()) - case kindForward: - case kindTypedef: - case kindVolatile: - case kindConst: - case kindRestrict: - case kindFunc: - case kindFuncProto: - data = make([]btfParam, header.Vlen()) - case kindVar: - data = new(btfVariable) - case kindDatasec: - data = make([]btfVarSecinfo, header.Vlen()) - default: - return nil, fmt.Errorf("type id %v: unknown kind: %v", id, header.Kind()) - } - - if data == nil { - types = append(types, rawType{header, nil}) - continue - } - - if err := binary.Read(r, bo, data); err != nil { - return nil, fmt.Errorf("type id %d: kind %v: can't read %T: %v", id, header.Kind(), data, err) - } - - types = append(types, rawType{header, data}) - } -} - -func intEncoding(raw uint32) (IntEncoding, uint32, byte) { - return IntEncoding((raw & 0x0f000000) >> 24), (raw & 0x00ff0000) >> 16, byte(raw & 0x000000ff) -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/btf_types_string.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/btf_types_string.go deleted file mode 100644 index 0e0c17d68b..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/btf_types_string.go +++ /dev/null @@ -1,44 +0,0 @@ -// Code generated by "stringer -linecomment -output=btf_types_string.go -type=FuncLinkage,VarLinkage"; DO NOT EDIT. - -package btf - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[StaticFunc-0] - _ = x[GlobalFunc-1] - _ = x[ExternFunc-2] -} - -const _FuncLinkage_name = "staticglobalextern" - -var _FuncLinkage_index = [...]uint8{0, 6, 12, 18} - -func (i FuncLinkage) String() string { - if i < 0 || i >= FuncLinkage(len(_FuncLinkage_index)-1) { - return "FuncLinkage(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _FuncLinkage_name[_FuncLinkage_index[i]:_FuncLinkage_index[i+1]] -} -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[StaticVar-0] - _ = x[GlobalVar-1] - _ = x[ExternVar-2] -} - -const _VarLinkage_name = "staticglobalextern" - -var _VarLinkage_index = [...]uint8{0, 6, 12, 18} - -func (i VarLinkage) String() string { - if i < 0 || i >= VarLinkage(len(_VarLinkage_index)-1) { - return "VarLinkage(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _VarLinkage_name[_VarLinkage_index[i]:_VarLinkage_index[i+1]] -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/core.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/core.go deleted file mode 100644 index 7c888f602d..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/core.go +++ /dev/null @@ -1,887 +0,0 @@ -package btf - -import ( - "errors" - "fmt" - "math" - "reflect" - "sort" - "strconv" - "strings" - - "github.com/cilium/ebpf/asm" -) - -// Code in this file is derived from libbpf, which is available under a BSD -// 2-Clause license. - -// COREFixup is the result of computing a CO-RE relocation for a target. -type COREFixup struct { - Kind COREKind - Local uint32 - Target uint32 - Poison bool -} - -func (f COREFixup) equal(other COREFixup) bool { - return f.Local == other.Local && f.Target == other.Target -} - -func (f COREFixup) String() string { - if f.Poison { - return fmt.Sprintf("%s=poison", f.Kind) - } - return fmt.Sprintf("%s=%d->%d", f.Kind, f.Local, f.Target) -} - -func (f COREFixup) apply(ins *asm.Instruction) error { - if f.Poison { - return errors.New("can't poison individual instruction") - } - - switch class := ins.OpCode.Class(); class { - case asm.LdXClass, asm.StClass, asm.StXClass: - if want := int16(f.Local); want != ins.Offset { - return fmt.Errorf("invalid offset %d, expected %d", ins.Offset, want) - } - - if f.Target > math.MaxInt16 { - return fmt.Errorf("offset %d exceeds MaxInt16", f.Target) - } - - ins.Offset = int16(f.Target) - - case asm.LdClass: - if !ins.IsConstantLoad(asm.DWord) { - return fmt.Errorf("not a dword-sized immediate load") - } - - if want := int64(f.Local); want != ins.Constant { - return fmt.Errorf("invalid immediate %d, expected %d", ins.Constant, want) - } - - ins.Constant = int64(f.Target) - - case asm.ALUClass: - if ins.OpCode.ALUOp() == asm.Swap { - return fmt.Errorf("relocation against swap") - } - - fallthrough - - case asm.ALU64Class: - if src := ins.OpCode.Source(); src != asm.ImmSource { - return fmt.Errorf("invalid source %s", src) - } - - if want := int64(f.Local); want != ins.Constant { - return fmt.Errorf("invalid immediate %d, expected %d", ins.Constant, want) - } - - if f.Target > math.MaxInt32 { - return fmt.Errorf("immediate %d exceeds MaxInt32", f.Target) - } - - ins.Constant = int64(f.Target) - - default: - return fmt.Errorf("invalid class %s", class) - } - - return nil -} - -func (f COREFixup) isNonExistant() bool { - return f.Kind.checksForExistence() && f.Target == 0 -} - -type COREFixups map[uint64]COREFixup - -// Apply a set of CO-RE relocations to a BPF program. -func (fs COREFixups) Apply(insns asm.Instructions) (asm.Instructions, error) { - if len(fs) == 0 { - cpy := make(asm.Instructions, len(insns)) - copy(cpy, insns) - return insns, nil - } - - cpy := make(asm.Instructions, 0, len(insns)) - iter := insns.Iterate() - for iter.Next() { - fixup, ok := fs[iter.Offset.Bytes()] - if !ok { - cpy = append(cpy, *iter.Ins) - continue - } - - ins := *iter.Ins - if fixup.Poison { - const badRelo = asm.BuiltinFunc(0xbad2310) - - cpy = append(cpy, badRelo.Call()) - if ins.OpCode.IsDWordLoad() { - // 64 bit constant loads occupy two raw bpf instructions, so - // we need to add another instruction as padding. - cpy = append(cpy, badRelo.Call()) - } - - continue - } - - if err := fixup.apply(&ins); err != nil { - return nil, fmt.Errorf("instruction %d, offset %d: %s: %w", iter.Index, iter.Offset.Bytes(), fixup.Kind, err) - } - - cpy = append(cpy, ins) - } - - return cpy, nil -} - -// COREKind is the type of CO-RE relocation -type COREKind uint32 - -const ( - reloFieldByteOffset COREKind = iota /* field byte offset */ - reloFieldByteSize /* field size in bytes */ - reloFieldExists /* field existence in target kernel */ - reloFieldSigned /* field signedness (0 - unsigned, 1 - signed) */ - reloFieldLShiftU64 /* bitfield-specific left bitshift */ - reloFieldRShiftU64 /* bitfield-specific right bitshift */ - reloTypeIDLocal /* type ID in local BPF object */ - reloTypeIDTarget /* type ID in target kernel */ - reloTypeExists /* type existence in target kernel */ - reloTypeSize /* type size in bytes */ - reloEnumvalExists /* enum value existence in target kernel */ - reloEnumvalValue /* enum value integer value */ -) - -func (k COREKind) String() string { - switch k { - case reloFieldByteOffset: - return "byte_off" - case reloFieldByteSize: - return "byte_sz" - case reloFieldExists: - return "field_exists" - case reloFieldSigned: - return "signed" - case reloFieldLShiftU64: - return "lshift_u64" - case reloFieldRShiftU64: - return "rshift_u64" - case reloTypeIDLocal: - return "local_type_id" - case reloTypeIDTarget: - return "target_type_id" - case reloTypeExists: - return "type_exists" - case reloTypeSize: - return "type_size" - case reloEnumvalExists: - return "enumval_exists" - case reloEnumvalValue: - return "enumval_value" - default: - return "unknown" - } -} - -func (k COREKind) checksForExistence() bool { - return k == reloEnumvalExists || k == reloTypeExists || k == reloFieldExists -} - -func coreRelocate(local, target *Spec, relos coreRelos) (COREFixups, error) { - if local.byteOrder != target.byteOrder { - return nil, fmt.Errorf("can't relocate %s against %s", local.byteOrder, target.byteOrder) - } - - var ids []TypeID - relosByID := make(map[TypeID]coreRelos) - result := make(COREFixups, len(relos)) - for _, relo := range relos { - if relo.kind == reloTypeIDLocal { - // Filtering out reloTypeIDLocal here makes our lives a lot easier - // down the line, since it doesn't have a target at all. - if len(relo.accessor) > 1 || relo.accessor[0] != 0 { - return nil, fmt.Errorf("%s: unexpected accessor %v", relo.kind, relo.accessor) - } - - result[uint64(relo.insnOff)] = COREFixup{ - relo.kind, - uint32(relo.typeID), - uint32(relo.typeID), - false, - } - continue - } - - relos, ok := relosByID[relo.typeID] - if !ok { - ids = append(ids, relo.typeID) - } - relosByID[relo.typeID] = append(relos, relo) - } - - // Ensure we work on relocations in a deterministic order. - sort.Slice(ids, func(i, j int) bool { - return ids[i] < ids[j] - }) - - for _, id := range ids { - if int(id) >= len(local.types) { - return nil, fmt.Errorf("invalid type id %d", id) - } - - localType := local.types[id] - named, ok := localType.(namedType) - if !ok || named.name() == "" { - return nil, fmt.Errorf("relocate unnamed or anonymous type %s: %w", localType, ErrNotSupported) - } - - relos := relosByID[id] - targets := target.namedTypes[named.essentialName()] - fixups, err := coreCalculateFixups(localType, targets, relos) - if err != nil { - return nil, fmt.Errorf("relocate %s: %w", localType, err) - } - - for i, relo := range relos { - result[uint64(relo.insnOff)] = fixups[i] - } - } - - return result, nil -} - -var errAmbiguousRelocation = errors.New("ambiguous relocation") -var errImpossibleRelocation = errors.New("impossible relocation") - -// coreCalculateFixups calculates the fixups for the given relocations using -// the "best" target. -// -// The best target is determined by scoring: the less poisoning we have to do -// the better the target is. -func coreCalculateFixups(local Type, targets []namedType, relos coreRelos) ([]COREFixup, error) { - localID := local.ID() - local, err := copyType(local, skipQualifierAndTypedef) - if err != nil { - return nil, err - } - - bestScore := len(relos) - var bestFixups []COREFixup - for i := range targets { - targetID := targets[i].ID() - target, err := copyType(targets[i], skipQualifierAndTypedef) - if err != nil { - return nil, err - } - - score := 0 // lower is better - fixups := make([]COREFixup, 0, len(relos)) - for _, relo := range relos { - fixup, err := coreCalculateFixup(local, localID, target, targetID, relo) - if err != nil { - return nil, fmt.Errorf("target %s: %w", target, err) - } - if fixup.Poison || fixup.isNonExistant() { - score++ - } - fixups = append(fixups, fixup) - } - - if score > bestScore { - // We have a better target already, ignore this one. - continue - } - - if score < bestScore { - // This is the best target yet, use it. - bestScore = score - bestFixups = fixups - continue - } - - // Some other target has the same score as the current one. Make sure - // the fixups agree with each other. - for i, fixup := range bestFixups { - if !fixup.equal(fixups[i]) { - return nil, fmt.Errorf("%s: multiple types match: %w", fixup.Kind, errAmbiguousRelocation) - } - } - } - - if bestFixups == nil { - // Nothing at all matched, probably because there are no suitable - // targets at all. Poison everything! - bestFixups = make([]COREFixup, len(relos)) - for i, relo := range relos { - bestFixups[i] = COREFixup{Kind: relo.kind, Poison: true} - } - } - - return bestFixups, nil -} - -// coreCalculateFixup calculates the fixup for a single local type, target type -// and relocation. -func coreCalculateFixup(local Type, localID TypeID, target Type, targetID TypeID, relo coreRelo) (COREFixup, error) { - fixup := func(local, target uint32) (COREFixup, error) { - return COREFixup{relo.kind, local, target, false}, nil - } - poison := func() (COREFixup, error) { - if relo.kind.checksForExistence() { - return fixup(1, 0) - } - return COREFixup{relo.kind, 0, 0, true}, nil - } - zero := COREFixup{} - - switch relo.kind { - case reloTypeIDTarget, reloTypeSize, reloTypeExists: - if len(relo.accessor) > 1 || relo.accessor[0] != 0 { - return zero, fmt.Errorf("%s: unexpected accessor %v", relo.kind, relo.accessor) - } - - err := coreAreTypesCompatible(local, target) - if errors.Is(err, errImpossibleRelocation) { - return poison() - } - if err != nil { - return zero, fmt.Errorf("relocation %s: %w", relo.kind, err) - } - - switch relo.kind { - case reloTypeExists: - return fixup(1, 1) - - case reloTypeIDTarget: - return fixup(uint32(localID), uint32(targetID)) - - case reloTypeSize: - localSize, err := Sizeof(local) - if err != nil { - return zero, err - } - - targetSize, err := Sizeof(target) - if err != nil { - return zero, err - } - - return fixup(uint32(localSize), uint32(targetSize)) - } - - case reloEnumvalValue, reloEnumvalExists: - localValue, targetValue, err := coreFindEnumValue(local, relo.accessor, target) - if errors.Is(err, errImpossibleRelocation) { - return poison() - } - if err != nil { - return zero, fmt.Errorf("relocation %s: %w", relo.kind, err) - } - - switch relo.kind { - case reloEnumvalExists: - return fixup(1, 1) - - case reloEnumvalValue: - return fixup(uint32(localValue.Value), uint32(targetValue.Value)) - } - - case reloFieldByteOffset, reloFieldByteSize, reloFieldExists: - if _, ok := target.(*Fwd); ok { - // We can't relocate fields using a forward declaration, so - // skip it. If a non-forward declaration is present in the BTF - // we'll find it in one of the other iterations. - return poison() - } - - localField, targetField, err := coreFindField(local, relo.accessor, target) - if errors.Is(err, errImpossibleRelocation) { - return poison() - } - if err != nil { - return zero, fmt.Errorf("target %s: %w", target, err) - } - - switch relo.kind { - case reloFieldExists: - return fixup(1, 1) - - case reloFieldByteOffset: - return fixup(localField.offset/8, targetField.offset/8) - - case reloFieldByteSize: - localSize, err := Sizeof(localField.Type) - if err != nil { - return zero, err - } - - targetSize, err := Sizeof(targetField.Type) - if err != nil { - return zero, err - } - - return fixup(uint32(localSize), uint32(targetSize)) - - } - } - - return zero, fmt.Errorf("relocation %s: %w", relo.kind, ErrNotSupported) -} - -/* coreAccessor contains a path through a struct. It contains at least one index. - * - * The interpretation depends on the kind of the relocation. The following is - * taken from struct bpf_core_relo in libbpf_internal.h: - * - * - for field-based relocations, string encodes an accessed field using - * a sequence of field and array indices, separated by colon (:). It's - * conceptually very close to LLVM's getelementptr ([0]) instruction's - * arguments for identifying offset to a field. - * - for type-based relocations, strings is expected to be just "0"; - * - for enum value-based relocations, string contains an index of enum - * value within its enum type; - * - * Example to provide a better feel. - * - * struct sample { - * int a; - * struct { - * int b[10]; - * }; - * }; - * - * struct sample s = ...; - * int x = &s->a; // encoded as "0:0" (a is field #0) - * int y = &s->b[5]; // encoded as "0:1:0:5" (anon struct is field #1, - * // b is field #0 inside anon struct, accessing elem #5) - * int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) - */ -type coreAccessor []int - -func parseCoreAccessor(accessor string) (coreAccessor, error) { - if accessor == "" { - return nil, fmt.Errorf("empty accessor") - } - - var result coreAccessor - parts := strings.Split(accessor, ":") - for _, part := range parts { - // 31 bits to avoid overflowing int on 32 bit platforms. - index, err := strconv.ParseUint(part, 10, 31) - if err != nil { - return nil, fmt.Errorf("accessor index %q: %s", part, err) - } - - result = append(result, int(index)) - } - - return result, nil -} - -func (ca coreAccessor) String() string { - strs := make([]string, 0, len(ca)) - for _, i := range ca { - strs = append(strs, strconv.Itoa(i)) - } - return strings.Join(strs, ":") -} - -func (ca coreAccessor) enumValue(t Type) (*EnumValue, error) { - e, ok := t.(*Enum) - if !ok { - return nil, fmt.Errorf("not an enum: %s", t) - } - - if len(ca) > 1 { - return nil, fmt.Errorf("invalid accessor %s for enum", ca) - } - - i := ca[0] - if i >= len(e.Values) { - return nil, fmt.Errorf("invalid index %d for %s", i, e) - } - - return &e.Values[i], nil -} - -type coreField struct { - Type Type - offset uint32 -} - -func adjustOffset(base uint32, t Type, n int) (uint32, error) { - size, err := Sizeof(t) - if err != nil { - return 0, err - } - - return base + (uint32(n) * uint32(size) * 8), nil -} - -// coreFindField descends into the local type using the accessor and tries to -// find an equivalent field in target at each step. -// -// Returns the field and the offset of the field from the start of -// target in bits. -func coreFindField(local Type, localAcc coreAccessor, target Type) (_, _ coreField, _ error) { - // The first index is used to offset a pointer of the base type like - // when accessing an array. - localOffset, err := adjustOffset(0, local, localAcc[0]) - if err != nil { - return coreField{}, coreField{}, err - } - - targetOffset, err := adjustOffset(0, target, localAcc[0]) - if err != nil { - return coreField{}, coreField{}, err - } - - if err := coreAreMembersCompatible(local, target); err != nil { - return coreField{}, coreField{}, fmt.Errorf("fields: %w", err) - } - - var localMaybeFlex, targetMaybeFlex bool - for _, acc := range localAcc[1:] { - switch localType := local.(type) { - case composite: - // For composite types acc is used to find the field in the local type, - // and then we try to find a field in target with the same name. - localMembers := localType.members() - if acc >= len(localMembers) { - return coreField{}, coreField{}, fmt.Errorf("invalid accessor %d for %s", acc, local) - } - - localMember := localMembers[acc] - if localMember.Name == "" { - _, ok := localMember.Type.(composite) - if !ok { - return coreField{}, coreField{}, fmt.Errorf("unnamed field with type %s: %s", localMember.Type, ErrNotSupported) - } - - // This is an anonymous struct or union, ignore it. - local = localMember.Type - localOffset += localMember.Offset - localMaybeFlex = false - continue - } - - targetType, ok := target.(composite) - if !ok { - return coreField{}, coreField{}, fmt.Errorf("target not composite: %w", errImpossibleRelocation) - } - - targetMember, last, err := coreFindMember(targetType, localMember.Name) - if err != nil { - return coreField{}, coreField{}, err - } - - if targetMember.BitfieldSize > 0 { - return coreField{}, coreField{}, fmt.Errorf("field %q is a bitfield: %w", targetMember.Name, ErrNotSupported) - } - - local = localMember.Type - localMaybeFlex = acc == len(localMembers)-1 - localOffset += localMember.Offset - target = targetMember.Type - targetMaybeFlex = last - targetOffset += targetMember.Offset - - case *Array: - // For arrays, acc is the index in the target. - targetType, ok := target.(*Array) - if !ok { - return coreField{}, coreField{}, fmt.Errorf("target not array: %w", errImpossibleRelocation) - } - - if localType.Nelems == 0 && !localMaybeFlex { - return coreField{}, coreField{}, fmt.Errorf("local type has invalid flexible array") - } - if targetType.Nelems == 0 && !targetMaybeFlex { - return coreField{}, coreField{}, fmt.Errorf("target type has invalid flexible array") - } - - if localType.Nelems > 0 && acc >= int(localType.Nelems) { - return coreField{}, coreField{}, fmt.Errorf("invalid access of %s at index %d", localType, acc) - } - if targetType.Nelems > 0 && acc >= int(targetType.Nelems) { - return coreField{}, coreField{}, fmt.Errorf("out of bounds access of target: %w", errImpossibleRelocation) - } - - local = localType.Type - localMaybeFlex = false - localOffset, err = adjustOffset(localOffset, local, acc) - if err != nil { - return coreField{}, coreField{}, err - } - - target = targetType.Type - targetMaybeFlex = false - targetOffset, err = adjustOffset(targetOffset, target, acc) - if err != nil { - return coreField{}, coreField{}, err - } - - default: - return coreField{}, coreField{}, fmt.Errorf("relocate field of %T: %w", localType, ErrNotSupported) - } - - if err := coreAreMembersCompatible(local, target); err != nil { - return coreField{}, coreField{}, err - } - } - - return coreField{local, localOffset}, coreField{target, targetOffset}, nil -} - -// coreFindMember finds a member in a composite type while handling anonymous -// structs and unions. -func coreFindMember(typ composite, name Name) (Member, bool, error) { - if name == "" { - return Member{}, false, errors.New("can't search for anonymous member") - } - - type offsetTarget struct { - composite - offset uint32 - } - - targets := []offsetTarget{{typ, 0}} - visited := make(map[composite]bool) - - for i := 0; i < len(targets); i++ { - target := targets[i] - - // Only visit targets once to prevent infinite recursion. - if visited[target] { - continue - } - if len(visited) >= maxTypeDepth { - // This check is different than libbpf, which restricts the entire - // path to BPF_CORE_SPEC_MAX_LEN items. - return Member{}, false, fmt.Errorf("type is nested too deep") - } - visited[target] = true - - members := target.members() - for j, member := range members { - if member.Name == name { - // NB: This is safe because member is a copy. - member.Offset += target.offset - return member, j == len(members)-1, nil - } - - // The names don't match, but this member could be an anonymous struct - // or union. - if member.Name != "" { - continue - } - - comp, ok := member.Type.(composite) - if !ok { - return Member{}, false, fmt.Errorf("anonymous non-composite type %T not allowed", member.Type) - } - - targets = append(targets, offsetTarget{comp, target.offset + member.Offset}) - } - } - - return Member{}, false, fmt.Errorf("no matching member: %w", errImpossibleRelocation) -} - -// coreFindEnumValue follows localAcc to find the equivalent enum value in target. -func coreFindEnumValue(local Type, localAcc coreAccessor, target Type) (localValue, targetValue *EnumValue, _ error) { - localValue, err := localAcc.enumValue(local) - if err != nil { - return nil, nil, err - } - - targetEnum, ok := target.(*Enum) - if !ok { - return nil, nil, errImpossibleRelocation - } - - localName := localValue.Name.essentialName() - for i, targetValue := range targetEnum.Values { - if targetValue.Name.essentialName() != localName { - continue - } - - return localValue, &targetEnum.Values[i], nil - } - - return nil, nil, errImpossibleRelocation -} - -/* The comment below is from bpf_core_types_are_compat in libbpf.c: - * - * Check local and target types for compatibility. This check is used for - * type-based CO-RE relocations and follow slightly different rules than - * field-based relocations. This function assumes that root types were already - * checked for name match. Beyond that initial root-level name check, names - * are completely ignored. Compatibility rules are as follows: - * - any two STRUCTs/UNIONs/FWDs/ENUMs/INTs are considered compatible, but - * kind should match for local and target types (i.e., STRUCT is not - * compatible with UNION); - * - for ENUMs, the size is ignored; - * - for INT, size and signedness are ignored; - * - for ARRAY, dimensionality is ignored, element types are checked for - * compatibility recursively; - * - CONST/VOLATILE/RESTRICT modifiers are ignored; - * - TYPEDEFs/PTRs are compatible if types they pointing to are compatible; - * - FUNC_PROTOs are compatible if they have compatible signature: same - * number of input args and compatible return and argument types. - * These rules are not set in stone and probably will be adjusted as we get - * more experience with using BPF CO-RE relocations. - * - * Returns errImpossibleRelocation if types are not compatible. - */ -func coreAreTypesCompatible(localType Type, targetType Type) error { - var ( - localTs, targetTs typeDeque - l, t = &localType, &targetType - depth = 0 - ) - - for ; l != nil && t != nil; l, t = localTs.shift(), targetTs.shift() { - if depth >= maxTypeDepth { - return errors.New("types are nested too deep") - } - - localType = *l - targetType = *t - - if reflect.TypeOf(localType) != reflect.TypeOf(targetType) { - return fmt.Errorf("type mismatch: %w", errImpossibleRelocation) - } - - switch lv := (localType).(type) { - case *Void, *Struct, *Union, *Enum, *Fwd: - // Nothing to do here - - case *Int: - tv := targetType.(*Int) - if lv.isBitfield() || tv.isBitfield() { - return fmt.Errorf("bitfield: %w", errImpossibleRelocation) - } - - case *Pointer, *Array: - depth++ - localType.walk(&localTs) - targetType.walk(&targetTs) - - case *FuncProto: - tv := targetType.(*FuncProto) - if len(lv.Params) != len(tv.Params) { - return fmt.Errorf("function param mismatch: %w", errImpossibleRelocation) - } - - depth++ - localType.walk(&localTs) - targetType.walk(&targetTs) - - default: - return fmt.Errorf("unsupported type %T", localType) - } - } - - if l != nil { - return fmt.Errorf("dangling local type %T", *l) - } - - if t != nil { - return fmt.Errorf("dangling target type %T", *t) - } - - return nil -} - -/* coreAreMembersCompatible checks two types for field-based relocation compatibility. - * - * The comment below is from bpf_core_fields_are_compat in libbpf.c: - * - * Check two types for compatibility for the purpose of field access - * relocation. const/volatile/restrict and typedefs are skipped to ensure we - * are relocating semantically compatible entities: - * - any two STRUCTs/UNIONs are compatible and can be mixed; - * - any two FWDs are compatible, if their names match (modulo flavor suffix); - * - any two PTRs are always compatible; - * - for ENUMs, names should be the same (ignoring flavor suffix) or at - * least one of enums should be anonymous; - * - for ENUMs, check sizes, names are ignored; - * - for INT, size and signedness are ignored; - * - for ARRAY, dimensionality is ignored, element types are checked for - * compatibility recursively; - * [ NB: coreAreMembersCompatible doesn't recurse, this check is done - * by coreFindField. ] - * - everything else shouldn't be ever a target of relocation. - * These rules are not set in stone and probably will be adjusted as we get - * more experience with using BPF CO-RE relocations. - * - * Returns errImpossibleRelocation if the members are not compatible. - */ -func coreAreMembersCompatible(localType Type, targetType Type) error { - doNamesMatch := func(a, b string) error { - if a == "" || b == "" { - // allow anonymous and named type to match - return nil - } - - if essentialName(a) == essentialName(b) { - return nil - } - - return fmt.Errorf("names don't match: %w", errImpossibleRelocation) - } - - _, lok := localType.(composite) - _, tok := targetType.(composite) - if lok && tok { - return nil - } - - if reflect.TypeOf(localType) != reflect.TypeOf(targetType) { - return fmt.Errorf("type mismatch: %w", errImpossibleRelocation) - } - - switch lv := localType.(type) { - case *Array, *Pointer: - return nil - - case *Enum: - tv := targetType.(*Enum) - return doNamesMatch(lv.name(), tv.name()) - - case *Fwd: - tv := targetType.(*Fwd) - return doNamesMatch(lv.name(), tv.name()) - - case *Int: - tv := targetType.(*Int) - if lv.isBitfield() || tv.isBitfield() { - return fmt.Errorf("bitfield: %w", errImpossibleRelocation) - } - return nil - - default: - return fmt.Errorf("type %s: %w", localType, ErrNotSupported) - } -} - -func skipQualifierAndTypedef(typ Type) (Type, error) { - result := typ - for depth := 0; depth <= maxTypeDepth; depth++ { - switch v := (result).(type) { - case qualifier: - result = v.qualify() - case *Typedef: - result = v.Type - default: - return result, nil - } - } - return nil, errors.New("exceeded type depth") -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/doc.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/doc.go deleted file mode 100644 index ad2576cb23..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/doc.go +++ /dev/null @@ -1,8 +0,0 @@ -// Package btf handles data encoded according to the BPF Type Format. -// -// The canonical documentation lives in the Linux kernel repository and is -// available at https://www.kernel.org/doc/html/latest/bpf/btf.html -// -// The API is very much unstable. You should only use this via the main -// ebpf library. -package btf diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/ext_info.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/ext_info.go deleted file mode 100644 index beba1bce69..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/ext_info.go +++ /dev/null @@ -1,303 +0,0 @@ -package btf - -import ( - "bufio" - "bytes" - "encoding/binary" - "errors" - "fmt" - "io" - "io/ioutil" - - "github.com/cilium/ebpf/asm" - "github.com/cilium/ebpf/internal" -) - -type btfExtHeader struct { - Magic uint16 - Version uint8 - Flags uint8 - HdrLen uint32 - - FuncInfoOff uint32 - FuncInfoLen uint32 - LineInfoOff uint32 - LineInfoLen uint32 -} - -type btfExtCoreHeader struct { - CoreReloOff uint32 - CoreReloLen uint32 -} - -func parseExtInfos(r io.ReadSeeker, bo binary.ByteOrder, strings stringTable) (funcInfo, lineInfo map[string]extInfo, relos map[string]coreRelos, err error) { - var header btfExtHeader - var coreHeader btfExtCoreHeader - if err := binary.Read(r, bo, &header); err != nil { - return nil, nil, nil, fmt.Errorf("can't read header: %v", err) - } - - if header.Magic != btfMagic { - return nil, nil, nil, fmt.Errorf("incorrect magic value %v", header.Magic) - } - - if header.Version != 1 { - return nil, nil, nil, fmt.Errorf("unexpected version %v", header.Version) - } - - if header.Flags != 0 { - return nil, nil, nil, fmt.Errorf("unsupported flags %v", header.Flags) - } - - remainder := int64(header.HdrLen) - int64(binary.Size(&header)) - if remainder < 0 { - return nil, nil, nil, errors.New("header is too short") - } - - coreHdrSize := int64(binary.Size(&coreHeader)) - if remainder >= coreHdrSize { - if err := binary.Read(r, bo, &coreHeader); err != nil { - return nil, nil, nil, fmt.Errorf("can't read CO-RE relocation header: %v", err) - } - remainder -= coreHdrSize - } - - // Of course, the .BTF.ext header has different semantics than the - // .BTF ext header. We need to ignore non-null values. - _, err = io.CopyN(ioutil.Discard, r, remainder) - if err != nil { - return nil, nil, nil, fmt.Errorf("header padding: %v", err) - } - - if _, err := r.Seek(int64(header.HdrLen+header.FuncInfoOff), io.SeekStart); err != nil { - return nil, nil, nil, fmt.Errorf("can't seek to function info section: %v", err) - } - - buf := bufio.NewReader(io.LimitReader(r, int64(header.FuncInfoLen))) - funcInfo, err = parseExtInfo(buf, bo, strings) - if err != nil { - return nil, nil, nil, fmt.Errorf("function info: %w", err) - } - - if _, err := r.Seek(int64(header.HdrLen+header.LineInfoOff), io.SeekStart); err != nil { - return nil, nil, nil, fmt.Errorf("can't seek to line info section: %v", err) - } - - buf = bufio.NewReader(io.LimitReader(r, int64(header.LineInfoLen))) - lineInfo, err = parseExtInfo(buf, bo, strings) - if err != nil { - return nil, nil, nil, fmt.Errorf("line info: %w", err) - } - - if coreHeader.CoreReloOff > 0 && coreHeader.CoreReloLen > 0 { - if _, err := r.Seek(int64(header.HdrLen+coreHeader.CoreReloOff), io.SeekStart); err != nil { - return nil, nil, nil, fmt.Errorf("can't seek to CO-RE relocation section: %v", err) - } - - relos, err = parseExtInfoRelos(io.LimitReader(r, int64(coreHeader.CoreReloLen)), bo, strings) - if err != nil { - return nil, nil, nil, fmt.Errorf("CO-RE relocation info: %w", err) - } - } - - return funcInfo, lineInfo, relos, nil -} - -type btfExtInfoSec struct { - SecNameOff uint32 - NumInfo uint32 -} - -type extInfoRecord struct { - InsnOff uint64 - Opaque []byte -} - -type extInfo struct { - recordSize uint32 - records []extInfoRecord -} - -func (ei extInfo) append(other extInfo, offset uint64) (extInfo, error) { - if other.recordSize != ei.recordSize { - return extInfo{}, fmt.Errorf("ext_info record size mismatch, want %d (got %d)", ei.recordSize, other.recordSize) - } - - records := make([]extInfoRecord, 0, len(ei.records)+len(other.records)) - records = append(records, ei.records...) - for _, info := range other.records { - records = append(records, extInfoRecord{ - InsnOff: info.InsnOff + offset, - Opaque: info.Opaque, - }) - } - return extInfo{ei.recordSize, records}, nil -} - -func (ei extInfo) MarshalBinary() ([]byte, error) { - if len(ei.records) == 0 { - return nil, nil - } - - buf := bytes.NewBuffer(make([]byte, 0, int(ei.recordSize)*len(ei.records))) - for _, info := range ei.records { - // The kernel expects offsets in number of raw bpf instructions, - // while the ELF tracks it in bytes. - insnOff := uint32(info.InsnOff / asm.InstructionSize) - if err := binary.Write(buf, internal.NativeEndian, insnOff); err != nil { - return nil, fmt.Errorf("can't write instruction offset: %v", err) - } - - buf.Write(info.Opaque) - } - - return buf.Bytes(), nil -} - -func parseExtInfo(r io.Reader, bo binary.ByteOrder, strings stringTable) (map[string]extInfo, error) { - const maxRecordSize = 256 - - var recordSize uint32 - if err := binary.Read(r, bo, &recordSize); err != nil { - return nil, fmt.Errorf("can't read record size: %v", err) - } - - if recordSize < 4 { - // Need at least insnOff - return nil, errors.New("record size too short") - } - if recordSize > maxRecordSize { - return nil, fmt.Errorf("record size %v exceeds %v", recordSize, maxRecordSize) - } - - result := make(map[string]extInfo) - for { - secName, infoHeader, err := parseExtInfoHeader(r, bo, strings) - if errors.Is(err, io.EOF) { - return result, nil - } - - var records []extInfoRecord - for i := uint32(0); i < infoHeader.NumInfo; i++ { - var byteOff uint32 - if err := binary.Read(r, bo, &byteOff); err != nil { - return nil, fmt.Errorf("section %v: can't read extended info offset: %v", secName, err) - } - - buf := make([]byte, int(recordSize-4)) - if _, err := io.ReadFull(r, buf); err != nil { - return nil, fmt.Errorf("section %v: can't read record: %v", secName, err) - } - - if byteOff%asm.InstructionSize != 0 { - return nil, fmt.Errorf("section %v: offset %v is not aligned with instruction size", secName, byteOff) - } - - records = append(records, extInfoRecord{uint64(byteOff), buf}) - } - - result[secName] = extInfo{ - recordSize, - records, - } - } -} - -// bpfCoreRelo matches `struct bpf_core_relo` from the kernel -type bpfCoreRelo struct { - InsnOff uint32 - TypeID TypeID - AccessStrOff uint32 - Kind COREKind -} - -type coreRelo struct { - insnOff uint32 - typeID TypeID - accessor coreAccessor - kind COREKind -} - -type coreRelos []coreRelo - -// append two slices of extInfoRelo to each other. The InsnOff of b are adjusted -// by offset. -func (r coreRelos) append(other coreRelos, offset uint64) coreRelos { - result := make([]coreRelo, 0, len(r)+len(other)) - result = append(result, r...) - for _, relo := range other { - relo.insnOff += uint32(offset) - result = append(result, relo) - } - return result -} - -var extInfoReloSize = binary.Size(bpfCoreRelo{}) - -func parseExtInfoRelos(r io.Reader, bo binary.ByteOrder, strings stringTable) (map[string]coreRelos, error) { - var recordSize uint32 - if err := binary.Read(r, bo, &recordSize); err != nil { - return nil, fmt.Errorf("read record size: %v", err) - } - - if recordSize != uint32(extInfoReloSize) { - return nil, fmt.Errorf("expected record size %d, got %d", extInfoReloSize, recordSize) - } - - result := make(map[string]coreRelos) - for { - secName, infoHeader, err := parseExtInfoHeader(r, bo, strings) - if errors.Is(err, io.EOF) { - return result, nil - } - - var relos coreRelos - for i := uint32(0); i < infoHeader.NumInfo; i++ { - var relo bpfCoreRelo - if err := binary.Read(r, bo, &relo); err != nil { - return nil, fmt.Errorf("section %v: read record: %v", secName, err) - } - - if relo.InsnOff%asm.InstructionSize != 0 { - return nil, fmt.Errorf("section %v: offset %v is not aligned with instruction size", secName, relo.InsnOff) - } - - accessorStr, err := strings.Lookup(relo.AccessStrOff) - if err != nil { - return nil, err - } - - accessor, err := parseCoreAccessor(accessorStr) - if err != nil { - return nil, fmt.Errorf("accessor %q: %s", accessorStr, err) - } - - relos = append(relos, coreRelo{ - relo.InsnOff, - relo.TypeID, - accessor, - relo.Kind, - }) - } - - result[secName] = relos - } -} - -func parseExtInfoHeader(r io.Reader, bo binary.ByteOrder, strings stringTable) (string, *btfExtInfoSec, error) { - var infoHeader btfExtInfoSec - if err := binary.Read(r, bo, &infoHeader); err != nil { - return "", nil, fmt.Errorf("read ext info header: %w", err) - } - - secName, err := strings.Lookup(infoHeader.SecNameOff) - if err != nil { - return "", nil, fmt.Errorf("get section name: %w", err) - } - - if infoHeader.NumInfo == 0 { - return "", nil, fmt.Errorf("section %s has zero records", secName) - } - - return secName, &infoHeader, nil -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/fuzz.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/fuzz.go deleted file mode 100644 index 37e043fd37..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/fuzz.go +++ /dev/null @@ -1,49 +0,0 @@ -// +build gofuzz - -// Use with https://github.com/dvyukov/go-fuzz - -package btf - -import ( - "bytes" - "encoding/binary" - - "github.com/cilium/ebpf/internal" -) - -func FuzzSpec(data []byte) int { - if len(data) < binary.Size(btfHeader{}) { - return -1 - } - - spec, err := loadNakedSpec(bytes.NewReader(data), internal.NativeEndian, nil, nil) - if err != nil { - if spec != nil { - panic("spec is not nil") - } - return 0 - } - if spec == nil { - panic("spec is nil") - } - return 1 -} - -func FuzzExtInfo(data []byte) int { - if len(data) < binary.Size(btfExtHeader{}) { - return -1 - } - - table := stringTable("\x00foo\x00barfoo\x00") - info, err := parseExtInfo(bytes.NewReader(data), internal.NativeEndian, table) - if err != nil { - if info != nil { - panic("info is not nil") - } - return 0 - } - if info == nil { - panic("info is nil") - } - return 1 -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/strings.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/strings.go deleted file mode 100644 index 8782643a04..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/strings.go +++ /dev/null @@ -1,60 +0,0 @@ -package btf - -import ( - "bytes" - "errors" - "fmt" - "io" - "io/ioutil" -) - -type stringTable []byte - -func readStringTable(r io.Reader) (stringTable, error) { - contents, err := ioutil.ReadAll(r) - if err != nil { - return nil, fmt.Errorf("can't read string table: %v", err) - } - - if len(contents) < 1 { - return nil, errors.New("string table is empty") - } - - if contents[0] != '\x00' { - return nil, errors.New("first item in string table is non-empty") - } - - if contents[len(contents)-1] != '\x00' { - return nil, errors.New("string table isn't null terminated") - } - - return stringTable(contents), nil -} - -func (st stringTable) Lookup(offset uint32) (string, error) { - if int64(offset) > int64(^uint(0)>>1) { - return "", fmt.Errorf("offset %d overflows int", offset) - } - - pos := int(offset) - if pos >= len(st) { - return "", fmt.Errorf("offset %d is out of bounds", offset) - } - - if pos > 0 && st[pos-1] != '\x00' { - return "", fmt.Errorf("offset %d isn't start of a string", offset) - } - - str := st[pos:] - end := bytes.IndexByte(str, '\x00') - if end == -1 { - return "", fmt.Errorf("offset %d isn't null terminated", offset) - } - - return string(str[:end]), nil -} - -func (st stringTable) LookupName(offset uint32) (Name, error) { - str, err := st.Lookup(offset) - return Name(str), err -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/types.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/types.go deleted file mode 100644 index 62aa31bcd7..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/btf/types.go +++ /dev/null @@ -1,893 +0,0 @@ -package btf - -import ( - "fmt" - "math" - "strings" -) - -const maxTypeDepth = 32 - -// TypeID identifies a type in a BTF section. -type TypeID uint32 - -// ID implements part of the Type interface. -func (tid TypeID) ID() TypeID { - return tid -} - -// Type represents a type described by BTF. -type Type interface { - ID() TypeID - - String() string - - // Make a copy of the type, without copying Type members. - copy() Type - - // Enumerate all nested Types. Repeated calls must visit nested - // types in the same order. - walk(*typeDeque) -} - -// namedType is a type with a name. -// -// Most named types simply embed Name. -type namedType interface { - Type - name() string - essentialName() string -} - -// Name identifies a type. -// -// Anonymous types have an empty name. -type Name string - -func (n Name) name() string { - return string(n) -} - -func (n Name) essentialName() string { - return essentialName(string(n)) -} - -// Void is the unit type of BTF. -type Void struct{} - -func (v *Void) ID() TypeID { return 0 } -func (v *Void) String() string { return "void#0" } -func (v *Void) size() uint32 { return 0 } -func (v *Void) copy() Type { return (*Void)(nil) } -func (v *Void) walk(*typeDeque) {} - -type IntEncoding byte - -const ( - Signed IntEncoding = 1 << iota - Char - Bool -) - -// Int is an integer of a given length. -type Int struct { - TypeID - Name - - // The size of the integer in bytes. - Size uint32 - Encoding IntEncoding - // Offset is the starting bit offset. Currently always 0. - // See https://www.kernel.org/doc/html/latest/bpf/btf.html#btf-kind-int - Offset uint32 - Bits byte -} - -var _ namedType = (*Int)(nil) - -func (i *Int) String() string { - var s strings.Builder - - switch { - case i.Encoding&Char != 0: - s.WriteString("char") - case i.Encoding&Bool != 0: - s.WriteString("bool") - default: - if i.Encoding&Signed == 0 { - s.WriteRune('u') - } - s.WriteString("int") - fmt.Fprintf(&s, "%d", i.Size*8) - } - - fmt.Fprintf(&s, "#%d", i.TypeID) - - if i.Bits > 0 { - fmt.Fprintf(&s, "[bits=%d]", i.Bits) - } - - return s.String() -} - -func (i *Int) size() uint32 { return i.Size } -func (i *Int) walk(*typeDeque) {} -func (i *Int) copy() Type { - cpy := *i - return &cpy -} - -func (i *Int) isBitfield() bool { - return i.Offset > 0 -} - -// Pointer is a pointer to another type. -type Pointer struct { - TypeID - Target Type -} - -func (p *Pointer) String() string { - return fmt.Sprintf("pointer#%d[target=#%d]", p.TypeID, p.Target.ID()) -} - -func (p *Pointer) size() uint32 { return 8 } -func (p *Pointer) walk(tdq *typeDeque) { tdq.push(&p.Target) } -func (p *Pointer) copy() Type { - cpy := *p - return &cpy -} - -// Array is an array with a fixed number of elements. -type Array struct { - TypeID - Type Type - Nelems uint32 -} - -func (arr *Array) String() string { - return fmt.Sprintf("array#%d[type=#%d n=%d]", arr.TypeID, arr.Type.ID(), arr.Nelems) -} - -func (arr *Array) walk(tdq *typeDeque) { tdq.push(&arr.Type) } -func (arr *Array) copy() Type { - cpy := *arr - return &cpy -} - -// Struct is a compound type of consecutive members. -type Struct struct { - TypeID - Name - // The size of the struct including padding, in bytes - Size uint32 - Members []Member -} - -func (s *Struct) String() string { - return fmt.Sprintf("struct#%d[%q]", s.TypeID, s.Name) -} - -func (s *Struct) size() uint32 { return s.Size } - -func (s *Struct) walk(tdq *typeDeque) { - for i := range s.Members { - tdq.push(&s.Members[i].Type) - } -} - -func (s *Struct) copy() Type { - cpy := *s - cpy.Members = copyMembers(s.Members) - return &cpy -} - -func (s *Struct) members() []Member { - return s.Members -} - -// Union is a compound type where members occupy the same memory. -type Union struct { - TypeID - Name - // The size of the union including padding, in bytes. - Size uint32 - Members []Member -} - -func (u *Union) String() string { - return fmt.Sprintf("union#%d[%q]", u.TypeID, u.Name) -} - -func (u *Union) size() uint32 { return u.Size } - -func (u *Union) walk(tdq *typeDeque) { - for i := range u.Members { - tdq.push(&u.Members[i].Type) - } -} - -func (u *Union) copy() Type { - cpy := *u - cpy.Members = copyMembers(u.Members) - return &cpy -} - -func (u *Union) members() []Member { - return u.Members -} - -func copyMembers(orig []Member) []Member { - cpy := make([]Member, len(orig)) - copy(cpy, orig) - return cpy -} - -type composite interface { - members() []Member -} - -var ( - _ composite = (*Struct)(nil) - _ composite = (*Union)(nil) -) - -// Member is part of a Struct or Union. -// -// It is not a valid Type. -type Member struct { - Name - Type Type - // Offset is the bit offset of this member - Offset uint32 - BitfieldSize uint32 -} - -// Enum lists possible values. -type Enum struct { - TypeID - Name - Values []EnumValue -} - -func (e *Enum) String() string { - return fmt.Sprintf("enum#%d[%q]", e.TypeID, e.Name) -} - -// EnumValue is part of an Enum -// -// Is is not a valid Type -type EnumValue struct { - Name - Value int32 -} - -func (e *Enum) size() uint32 { return 4 } -func (e *Enum) walk(*typeDeque) {} -func (e *Enum) copy() Type { - cpy := *e - cpy.Values = make([]EnumValue, len(e.Values)) - copy(cpy.Values, e.Values) - return &cpy -} - -// FwdKind is the type of forward declaration. -type FwdKind int - -// Valid types of forward declaration. -const ( - FwdStruct FwdKind = iota - FwdUnion -) - -func (fk FwdKind) String() string { - switch fk { - case FwdStruct: - return "struct" - case FwdUnion: - return "union" - default: - return fmt.Sprintf("%T(%d)", fk, int(fk)) - } -} - -// Fwd is a forward declaration of a Type. -type Fwd struct { - TypeID - Name - Kind FwdKind -} - -func (f *Fwd) String() string { - return fmt.Sprintf("fwd#%d[%s %q]", f.TypeID, f.Kind, f.Name) -} - -func (f *Fwd) walk(*typeDeque) {} -func (f *Fwd) copy() Type { - cpy := *f - return &cpy -} - -// Typedef is an alias of a Type. -type Typedef struct { - TypeID - Name - Type Type -} - -func (td *Typedef) String() string { - return fmt.Sprintf("typedef#%d[%q #%d]", td.TypeID, td.Name, td.Type.ID()) -} - -func (td *Typedef) walk(tdq *typeDeque) { tdq.push(&td.Type) } -func (td *Typedef) copy() Type { - cpy := *td - return &cpy -} - -// Volatile is a qualifier. -type Volatile struct { - TypeID - Type Type -} - -func (v *Volatile) String() string { - return fmt.Sprintf("volatile#%d[#%d]", v.TypeID, v.Type.ID()) -} - -func (v *Volatile) qualify() Type { return v.Type } -func (v *Volatile) walk(tdq *typeDeque) { tdq.push(&v.Type) } -func (v *Volatile) copy() Type { - cpy := *v - return &cpy -} - -// Const is a qualifier. -type Const struct { - TypeID - Type Type -} - -func (c *Const) String() string { - return fmt.Sprintf("const#%d[#%d]", c.TypeID, c.Type.ID()) -} - -func (c *Const) qualify() Type { return c.Type } -func (c *Const) walk(tdq *typeDeque) { tdq.push(&c.Type) } -func (c *Const) copy() Type { - cpy := *c - return &cpy -} - -// Restrict is a qualifier. -type Restrict struct { - TypeID - Type Type -} - -func (r *Restrict) String() string { - return fmt.Sprintf("restrict#%d[#%d]", r.TypeID, r.Type.ID()) -} - -func (r *Restrict) qualify() Type { return r.Type } -func (r *Restrict) walk(tdq *typeDeque) { tdq.push(&r.Type) } -func (r *Restrict) copy() Type { - cpy := *r - return &cpy -} - -// Func is a function definition. -type Func struct { - TypeID - Name - Type Type - Linkage FuncLinkage -} - -func (f *Func) String() string { - return fmt.Sprintf("func#%d[%s %q proto=#%d]", f.TypeID, f.Linkage, f.Name, f.Type.ID()) -} - -func (f *Func) walk(tdq *typeDeque) { tdq.push(&f.Type) } -func (f *Func) copy() Type { - cpy := *f - return &cpy -} - -// FuncProto is a function declaration. -type FuncProto struct { - TypeID - Return Type - Params []FuncParam -} - -func (fp *FuncProto) String() string { - var s strings.Builder - fmt.Fprintf(&s, "proto#%d[", fp.TypeID) - for _, param := range fp.Params { - fmt.Fprintf(&s, "%q=#%d, ", param.Name, param.Type.ID()) - } - fmt.Fprintf(&s, "return=#%d]", fp.Return.ID()) - return s.String() -} - -func (fp *FuncProto) walk(tdq *typeDeque) { - tdq.push(&fp.Return) - for i := range fp.Params { - tdq.push(&fp.Params[i].Type) - } -} - -func (fp *FuncProto) copy() Type { - cpy := *fp - cpy.Params = make([]FuncParam, len(fp.Params)) - copy(cpy.Params, fp.Params) - return &cpy -} - -type FuncParam struct { - Name - Type Type -} - -// Var is a global variable. -type Var struct { - TypeID - Name - Type Type - Linkage VarLinkage -} - -func (v *Var) String() string { - return fmt.Sprintf("var#%d[%s %q]", v.TypeID, v.Linkage, v.Name) -} - -func (v *Var) walk(tdq *typeDeque) { tdq.push(&v.Type) } -func (v *Var) copy() Type { - cpy := *v - return &cpy -} - -// Datasec is a global program section containing data. -type Datasec struct { - TypeID - Name - Size uint32 - Vars []VarSecinfo -} - -func (ds *Datasec) String() string { - return fmt.Sprintf("section#%d[%q]", ds.TypeID, ds.Name) -} - -func (ds *Datasec) size() uint32 { return ds.Size } - -func (ds *Datasec) walk(tdq *typeDeque) { - for i := range ds.Vars { - tdq.push(&ds.Vars[i].Type) - } -} - -func (ds *Datasec) copy() Type { - cpy := *ds - cpy.Vars = make([]VarSecinfo, len(ds.Vars)) - copy(cpy.Vars, ds.Vars) - return &cpy -} - -// VarSecinfo describes variable in a Datasec -// -// It is not a valid Type. -type VarSecinfo struct { - Type Type - Offset uint32 - Size uint32 -} - -type sizer interface { - size() uint32 -} - -var ( - _ sizer = (*Int)(nil) - _ sizer = (*Pointer)(nil) - _ sizer = (*Struct)(nil) - _ sizer = (*Union)(nil) - _ sizer = (*Enum)(nil) - _ sizer = (*Datasec)(nil) -) - -type qualifier interface { - qualify() Type -} - -var ( - _ qualifier = (*Const)(nil) - _ qualifier = (*Restrict)(nil) - _ qualifier = (*Volatile)(nil) -) - -// Sizeof returns the size of a type in bytes. -// -// Returns an error if the size can't be computed. -func Sizeof(typ Type) (int, error) { - var ( - n = int64(1) - elem int64 - ) - - for i := 0; i < maxTypeDepth; i++ { - switch v := typ.(type) { - case *Array: - if n > 0 && int64(v.Nelems) > math.MaxInt64/n { - return 0, fmt.Errorf("type %s: overflow", typ) - } - - // Arrays may be of zero length, which allows - // n to be zero as well. - n *= int64(v.Nelems) - typ = v.Type - continue - - case sizer: - elem = int64(v.size()) - - case *Typedef: - typ = v.Type - continue - - case qualifier: - typ = v.qualify() - continue - - default: - return 0, fmt.Errorf("unsized type %T", typ) - } - - if n > 0 && elem > math.MaxInt64/n { - return 0, fmt.Errorf("type %s: overflow", typ) - } - - size := n * elem - if int64(int(size)) != size { - return 0, fmt.Errorf("type %s: overflow", typ) - } - - return int(size), nil - } - - return 0, fmt.Errorf("type %s: exceeded type depth", typ) -} - -// copy a Type recursively. -// -// typ may form a cycle. -// -// Returns any errors from transform verbatim. -func copyType(typ Type, transform func(Type) (Type, error)) (Type, error) { - var ( - copies = make(map[Type]Type) - work typeDeque - ) - - for t := &typ; t != nil; t = work.pop() { - // *t is the identity of the type. - if cpy := copies[*t]; cpy != nil { - *t = cpy - continue - } - - var cpy Type - if transform != nil { - tf, err := transform(*t) - if err != nil { - return nil, fmt.Errorf("copy %s: %w", typ, err) - } - cpy = tf.copy() - } else { - cpy = (*t).copy() - } - - copies[*t] = cpy - *t = cpy - - // Mark any nested types for copying. - cpy.walk(&work) - } - - return typ, nil -} - -// typeDeque keeps track of pointers to types which still -// need to be visited. -type typeDeque struct { - types []*Type - read, write uint64 - mask uint64 -} - -// push adds a type to the stack. -func (dq *typeDeque) push(t *Type) { - if dq.write-dq.read < uint64(len(dq.types)) { - dq.types[dq.write&dq.mask] = t - dq.write++ - return - } - - new := len(dq.types) * 2 - if new == 0 { - new = 8 - } - - types := make([]*Type, new) - pivot := dq.read & dq.mask - n := copy(types, dq.types[pivot:]) - n += copy(types[n:], dq.types[:pivot]) - types[n] = t - - dq.types = types - dq.mask = uint64(new) - 1 - dq.read, dq.write = 0, uint64(n+1) -} - -// shift returns the first element or null. -func (dq *typeDeque) shift() *Type { - if dq.read == dq.write { - return nil - } - - index := dq.read & dq.mask - t := dq.types[index] - dq.types[index] = nil - dq.read++ - return t -} - -// pop returns the last element or null. -func (dq *typeDeque) pop() *Type { - if dq.read == dq.write { - return nil - } - - dq.write-- - index := dq.write & dq.mask - t := dq.types[index] - dq.types[index] = nil - return t -} - -// all returns all elements. -// -// The deque is empty after calling this method. -func (dq *typeDeque) all() []*Type { - length := dq.write - dq.read - types := make([]*Type, 0, length) - for t := dq.shift(); t != nil; t = dq.shift() { - types = append(types, t) - } - return types -} - -// inflateRawTypes takes a list of raw btf types linked via type IDs, and turns -// it into a graph of Types connected via pointers. -// -// Returns a map of named types (so, where NameOff is non-zero) and a slice of types -// indexed by TypeID. Since BTF ignores compilation units, multiple types may share -// the same name. A Type may form a cyclic graph by pointing at itself. -func inflateRawTypes(rawTypes []rawType, rawStrings stringTable) (types []Type, namedTypes map[string][]namedType, err error) { - type fixupDef struct { - id TypeID - expectedKind btfKind - typ *Type - } - - var fixups []fixupDef - fixup := func(id TypeID, expectedKind btfKind, typ *Type) { - fixups = append(fixups, fixupDef{id, expectedKind, typ}) - } - - convertMembers := func(raw []btfMember, kindFlag bool) ([]Member, error) { - // NB: The fixup below relies on pre-allocating this array to - // work, since otherwise append might re-allocate members. - members := make([]Member, 0, len(raw)) - for i, btfMember := range raw { - name, err := rawStrings.LookupName(btfMember.NameOff) - if err != nil { - return nil, fmt.Errorf("can't get name for member %d: %w", i, err) - } - m := Member{ - Name: name, - Offset: btfMember.Offset, - } - if kindFlag { - m.BitfieldSize = btfMember.Offset >> 24 - m.Offset &= 0xffffff - } - members = append(members, m) - } - for i := range members { - fixup(raw[i].Type, kindUnknown, &members[i].Type) - } - return members, nil - } - - types = make([]Type, 0, len(rawTypes)) - types = append(types, (*Void)(nil)) - namedTypes = make(map[string][]namedType) - - for i, raw := range rawTypes { - var ( - // Void is defined to always be type ID 0, and is thus - // omitted from BTF. - id = TypeID(i + 1) - typ Type - ) - - name, err := rawStrings.LookupName(raw.NameOff) - if err != nil { - return nil, nil, fmt.Errorf("get name for type id %d: %w", id, err) - } - - switch raw.Kind() { - case kindInt: - encoding, offset, bits := intEncoding(*raw.data.(*uint32)) - typ = &Int{id, name, raw.Size(), encoding, offset, bits} - - case kindPointer: - ptr := &Pointer{id, nil} - fixup(raw.Type(), kindUnknown, &ptr.Target) - typ = ptr - - case kindArray: - btfArr := raw.data.(*btfArray) - - // IndexType is unused according to btf.rst. - // Don't make it available right now. - arr := &Array{id, nil, btfArr.Nelems} - fixup(btfArr.Type, kindUnknown, &arr.Type) - typ = arr - - case kindStruct: - members, err := convertMembers(raw.data.([]btfMember), raw.KindFlag()) - if err != nil { - return nil, nil, fmt.Errorf("struct %s (id %d): %w", name, id, err) - } - typ = &Struct{id, name, raw.Size(), members} - - case kindUnion: - members, err := convertMembers(raw.data.([]btfMember), raw.KindFlag()) - if err != nil { - return nil, nil, fmt.Errorf("union %s (id %d): %w", name, id, err) - } - typ = &Union{id, name, raw.Size(), members} - - case kindEnum: - rawvals := raw.data.([]btfEnum) - vals := make([]EnumValue, 0, len(rawvals)) - for i, btfVal := range rawvals { - name, err := rawStrings.LookupName(btfVal.NameOff) - if err != nil { - return nil, nil, fmt.Errorf("get name for enum value %d: %s", i, err) - } - vals = append(vals, EnumValue{ - Name: name, - Value: btfVal.Val, - }) - } - typ = &Enum{id, name, vals} - - case kindForward: - if raw.KindFlag() { - typ = &Fwd{id, name, FwdUnion} - } else { - typ = &Fwd{id, name, FwdStruct} - } - - case kindTypedef: - typedef := &Typedef{id, name, nil} - fixup(raw.Type(), kindUnknown, &typedef.Type) - typ = typedef - - case kindVolatile: - volatile := &Volatile{id, nil} - fixup(raw.Type(), kindUnknown, &volatile.Type) - typ = volatile - - case kindConst: - cnst := &Const{id, nil} - fixup(raw.Type(), kindUnknown, &cnst.Type) - typ = cnst - - case kindRestrict: - restrict := &Restrict{id, nil} - fixup(raw.Type(), kindUnknown, &restrict.Type) - typ = restrict - - case kindFunc: - fn := &Func{id, name, nil, raw.Linkage()} - fixup(raw.Type(), kindFuncProto, &fn.Type) - typ = fn - - case kindFuncProto: - rawparams := raw.data.([]btfParam) - params := make([]FuncParam, 0, len(rawparams)) - for i, param := range rawparams { - name, err := rawStrings.LookupName(param.NameOff) - if err != nil { - return nil, nil, fmt.Errorf("get name for func proto parameter %d: %s", i, err) - } - params = append(params, FuncParam{ - Name: name, - }) - } - for i := range params { - fixup(rawparams[i].Type, kindUnknown, ¶ms[i].Type) - } - - fp := &FuncProto{id, nil, params} - fixup(raw.Type(), kindUnknown, &fp.Return) - typ = fp - - case kindVar: - variable := raw.data.(*btfVariable) - v := &Var{id, name, nil, VarLinkage(variable.Linkage)} - fixup(raw.Type(), kindUnknown, &v.Type) - typ = v - - case kindDatasec: - btfVars := raw.data.([]btfVarSecinfo) - vars := make([]VarSecinfo, 0, len(btfVars)) - for _, btfVar := range btfVars { - vars = append(vars, VarSecinfo{ - Offset: btfVar.Offset, - Size: btfVar.Size, - }) - } - for i := range vars { - fixup(btfVars[i].Type, kindVar, &vars[i].Type) - } - typ = &Datasec{id, name, raw.SizeType, vars} - - default: - return nil, nil, fmt.Errorf("type id %d: unknown kind: %v", id, raw.Kind()) - } - - types = append(types, typ) - - if named, ok := typ.(namedType); ok { - if name := essentialName(named.name()); name != "" { - namedTypes[name] = append(namedTypes[name], named) - } - } - } - - for _, fixup := range fixups { - i := int(fixup.id) - if i >= len(types) { - return nil, nil, fmt.Errorf("reference to invalid type id: %d", fixup.id) - } - - // Default void (id 0) to unknown - rawKind := kindUnknown - if i > 0 { - rawKind = rawTypes[i-1].Kind() - } - - if expected := fixup.expectedKind; expected != kindUnknown && rawKind != expected { - return nil, nil, fmt.Errorf("expected type id %d to have kind %s, found %s", fixup.id, expected, rawKind) - } - - *fixup.typ = types[i] - } - - return types, namedTypes, nil -} - -// essentialName returns name without a ___ suffix. -func essentialName(name string) string { - lastIdx := strings.LastIndex(name, "___") - if lastIdx > 0 { - return name[:lastIdx] - } - return name -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/cpu.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/cpu.go deleted file mode 100644 index d3424ba434..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/cpu.go +++ /dev/null @@ -1,62 +0,0 @@ -package internal - -import ( - "fmt" - "io/ioutil" - "strings" - "sync" -) - -var sysCPU struct { - once sync.Once - err error - num int -} - -// PossibleCPUs returns the max number of CPUs a system may possibly have -// Logical CPU numbers must be of the form 0-n -func PossibleCPUs() (int, error) { - sysCPU.once.Do(func() { - sysCPU.num, sysCPU.err = parseCPUsFromFile("/sys/devices/system/cpu/possible") - }) - - return sysCPU.num, sysCPU.err -} - -func parseCPUsFromFile(path string) (int, error) { - spec, err := ioutil.ReadFile(path) - if err != nil { - return 0, err - } - - n, err := parseCPUs(string(spec)) - if err != nil { - return 0, fmt.Errorf("can't parse %s: %v", path, err) - } - - return n, nil -} - -// parseCPUs parses the number of cpus from a string produced -// by bitmap_list_string() in the Linux kernel. -// Multiple ranges are rejected, since they can't be unified -// into a single number. -// This is the format of /sys/devices/system/cpu/possible, it -// is not suitable for /sys/devices/system/cpu/online, etc. -func parseCPUs(spec string) (int, error) { - if strings.Trim(spec, "\n") == "0" { - return 1, nil - } - - var low, high int - n, err := fmt.Sscanf(spec, "%d-%d\n", &low, &high) - if n != 2 || err != nil { - return 0, fmt.Errorf("invalid format: %s", spec) - } - if low != 0 { - return 0, fmt.Errorf("CPU spec doesn't start at zero: %s", spec) - } - - // cpus is 0 indexed - return high + 1, nil -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/elf.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/elf.go deleted file mode 100644 index 54a4313130..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/elf.go +++ /dev/null @@ -1,68 +0,0 @@ -package internal - -import ( - "debug/elf" - "fmt" - "io" -) - -type SafeELFFile struct { - *elf.File -} - -// NewSafeELFFile reads an ELF safely. -// -// Any panic during parsing is turned into an error. This is necessary since -// there are a bunch of unfixed bugs in debug/elf. -// -// https://github.com/golang/go/issues?q=is%3Aissue+is%3Aopen+debug%2Felf+in%3Atitle -func NewSafeELFFile(r io.ReaderAt) (safe *SafeELFFile, err error) { - defer func() { - r := recover() - if r == nil { - return - } - - safe = nil - err = fmt.Errorf("reading ELF file panicked: %s", r) - }() - - file, err := elf.NewFile(r) - if err != nil { - return nil, err - } - - return &SafeELFFile{file}, nil -} - -// Symbols is the safe version of elf.File.Symbols. -func (se *SafeELFFile) Symbols() (syms []elf.Symbol, err error) { - defer func() { - r := recover() - if r == nil { - return - } - - syms = nil - err = fmt.Errorf("reading ELF symbols panicked: %s", r) - }() - - syms, err = se.File.Symbols() - return -} - -// DynamicSymbols is the safe version of elf.File.DynamicSymbols. -func (se *SafeELFFile) DynamicSymbols() (syms []elf.Symbol, err error) { - defer func() { - r := recover() - if r == nil { - return - } - - syms = nil - err = fmt.Errorf("reading ELF dynamic symbols panicked: %s", r) - }() - - syms, err = se.File.DynamicSymbols() - return -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/endian.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/endian.go deleted file mode 100644 index 6ae99fcd5f..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/endian.go +++ /dev/null @@ -1,29 +0,0 @@ -package internal - -import ( - "encoding/binary" - "unsafe" -) - -// NativeEndian is set to either binary.BigEndian or binary.LittleEndian, -// depending on the host's endianness. -var NativeEndian binary.ByteOrder - -// Clang is set to either "el" or "eb" depending on the host's endianness. -var ClangEndian string - -func init() { - if isBigEndian() { - NativeEndian = binary.BigEndian - ClangEndian = "eb" - } else { - NativeEndian = binary.LittleEndian - ClangEndian = "el" - } -} - -func isBigEndian() (ret bool) { - i := int(0x1) - bs := (*[int(unsafe.Sizeof(i))]byte)(unsafe.Pointer(&i)) - return bs[0] == 0 -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/errors.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/errors.go deleted file mode 100644 index 877bd72ee2..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/errors.go +++ /dev/null @@ -1,51 +0,0 @@ -package internal - -import ( - "bytes" - "errors" - "fmt" - "strings" - - "github.com/cilium/ebpf/internal/unix" -) - -// ErrorWithLog returns an error that includes logs from the -// kernel verifier. -// -// logErr should be the error returned by the syscall that generated -// the log. It is used to check for truncation of the output. -func ErrorWithLog(err error, log []byte, logErr error) error { - logStr := strings.Trim(CString(log), "\t\r\n ") - if errors.Is(logErr, unix.ENOSPC) { - logStr += " (truncated...)" - } - - return &VerifierError{err, logStr} -} - -// VerifierError includes information from the eBPF verifier. -type VerifierError struct { - cause error - log string -} - -func (le *VerifierError) Unwrap() error { - return le.cause -} - -func (le *VerifierError) Error() string { - if le.log == "" { - return le.cause.Error() - } - - return fmt.Sprintf("%s: %s", le.cause, le.log) -} - -// CString turns a NUL / zero terminated byte buffer into a string. -func CString(in []byte) string { - inLen := bytes.IndexByte(in, 0) - if inLen == -1 { - return "" - } - return string(in[:inLen]) -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/fd.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/fd.go deleted file mode 100644 index af04955bd5..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/fd.go +++ /dev/null @@ -1,69 +0,0 @@ -package internal - -import ( - "errors" - "fmt" - "os" - "runtime" - "strconv" - - "github.com/cilium/ebpf/internal/unix" -) - -var ErrClosedFd = errors.New("use of closed file descriptor") - -type FD struct { - raw int64 -} - -func NewFD(value uint32) *FD { - fd := &FD{int64(value)} - runtime.SetFinalizer(fd, (*FD).Close) - return fd -} - -func (fd *FD) String() string { - return strconv.FormatInt(fd.raw, 10) -} - -func (fd *FD) Value() (uint32, error) { - if fd.raw < 0 { - return 0, ErrClosedFd - } - - return uint32(fd.raw), nil -} - -func (fd *FD) Close() error { - if fd.raw < 0 { - return nil - } - - value := int(fd.raw) - fd.raw = -1 - - fd.Forget() - return unix.Close(value) -} - -func (fd *FD) Forget() { - runtime.SetFinalizer(fd, nil) -} - -func (fd *FD) Dup() (*FD, error) { - if fd.raw < 0 { - return nil, ErrClosedFd - } - - dup, err := unix.FcntlInt(uintptr(fd.raw), unix.F_DUPFD_CLOEXEC, 0) - if err != nil { - return nil, fmt.Errorf("can't dup fd: %v", err) - } - - return NewFD(uint32(dup)), nil -} - -func (fd *FD) File(name string) *os.File { - fd.Forget() - return os.NewFile(uintptr(fd.raw), name) -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/feature.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/feature.go deleted file mode 100644 index c94a2e1ee0..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/feature.go +++ /dev/null @@ -1,100 +0,0 @@ -package internal - -import ( - "errors" - "fmt" - "sync" -) - -// ErrNotSupported indicates that a feature is not supported by the current kernel. -var ErrNotSupported = errors.New("not supported") - -// UnsupportedFeatureError is returned by FeatureTest() functions. -type UnsupportedFeatureError struct { - // The minimum Linux mainline version required for this feature. - // Used for the error string, and for sanity checking during testing. - MinimumVersion Version - - // The name of the feature that isn't supported. - Name string -} - -func (ufe *UnsupportedFeatureError) Error() string { - if ufe.MinimumVersion.Unspecified() { - return fmt.Sprintf("%s not supported", ufe.Name) - } - return fmt.Sprintf("%s not supported (requires >= %s)", ufe.Name, ufe.MinimumVersion) -} - -// Is indicates that UnsupportedFeatureError is ErrNotSupported. -func (ufe *UnsupportedFeatureError) Is(target error) bool { - return target == ErrNotSupported -} - -type featureTest struct { - sync.RWMutex - successful bool - result error -} - -// FeatureTestFn is used to determine whether the kernel supports -// a certain feature. -// -// The return values have the following semantics: -// -// err == ErrNotSupported: the feature is not available -// err == nil: the feature is available -// err != nil: the test couldn't be executed -type FeatureTestFn func() error - -// FeatureTest wraps a function so that it is run at most once. -// -// name should identify the tested feature, while version must be in the -// form Major.Minor[.Patch]. -// -// Returns an error wrapping ErrNotSupported if the feature is not supported. -func FeatureTest(name, version string, fn FeatureTestFn) func() error { - v, err := NewVersion(version) - if err != nil { - return func() error { return err } - } - - ft := new(featureTest) - return func() error { - ft.RLock() - if ft.successful { - defer ft.RUnlock() - return ft.result - } - ft.RUnlock() - ft.Lock() - defer ft.Unlock() - // check one more time on the off - // chance that two go routines - // were able to call into the write - // lock - if ft.successful { - return ft.result - } - err := fn() - switch { - case errors.Is(err, ErrNotSupported): - ft.result = &UnsupportedFeatureError{ - MinimumVersion: v, - Name: name, - } - fallthrough - - case err == nil: - ft.successful = true - - default: - // We couldn't execute the feature test to a point - // where it could make a determination. - // Don't cache the result, just return it. - return fmt.Errorf("detect support for %s: %w", name, err) - } - - return ft.result - } -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/io.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/io.go deleted file mode 100644 index fa7402782d..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/io.go +++ /dev/null @@ -1,16 +0,0 @@ -package internal - -import "errors" - -// DiscardZeroes makes sure that all written bytes are zero -// before discarding them. -type DiscardZeroes struct{} - -func (DiscardZeroes) Write(p []byte) (int, error) { - for _, b := range p { - if b != 0 { - return 0, errors.New("encountered non-zero byte") - } - } - return len(p), nil -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/pinning.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/pinning.go deleted file mode 100644 index 5329b432d7..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/pinning.go +++ /dev/null @@ -1,44 +0,0 @@ -package internal - -import ( - "errors" - "fmt" - "os" - - "github.com/cilium/ebpf/internal/unix" -) - -func Pin(currentPath, newPath string, fd *FD) error { - if newPath == "" { - return errors.New("given pinning path cannot be empty") - } - if currentPath == newPath { - return nil - } - if currentPath == "" { - return BPFObjPin(newPath, fd) - } - var err error - // Renameat2 is used instead of os.Rename to disallow the new path replacing - // an existing path. - if err = unix.Renameat2(unix.AT_FDCWD, currentPath, unix.AT_FDCWD, newPath, unix.RENAME_NOREPLACE); err == nil { - // Object is now moved to the new pinning path. - return nil - } - if !os.IsNotExist(err) { - return fmt.Errorf("unable to move pinned object to new path %v: %w", newPath, err) - } - // Internal state not in sync with the file system so let's fix it. - return BPFObjPin(newPath, fd) -} - -func Unpin(pinnedPath string) error { - if pinnedPath == "" { - return nil - } - err := os.Remove(pinnedPath) - if err == nil || os.IsNotExist(err) { - return nil - } - return err -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/ptr.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/ptr.go deleted file mode 100644 index f295de72cf..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/ptr.go +++ /dev/null @@ -1,31 +0,0 @@ -package internal - -import ( - "unsafe" - - "github.com/cilium/ebpf/internal/unix" -) - -// NewPointer creates a 64-bit pointer from an unsafe Pointer. -func NewPointer(ptr unsafe.Pointer) Pointer { - return Pointer{ptr: ptr} -} - -// NewSlicePointer creates a 64-bit pointer from a byte slice. -func NewSlicePointer(buf []byte) Pointer { - if len(buf) == 0 { - return Pointer{} - } - - return Pointer{ptr: unsafe.Pointer(&buf[0])} -} - -// NewStringPointer creates a 64-bit pointer from a string. -func NewStringPointer(str string) Pointer { - p, err := unix.BytePtrFromString(str) - if err != nil { - return Pointer{} - } - - return Pointer{ptr: unsafe.Pointer(p)} -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/ptr_32_be.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/ptr_32_be.go deleted file mode 100644 index a56fbcc8e0..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/ptr_32_be.go +++ /dev/null @@ -1,14 +0,0 @@ -// +build armbe mips mips64p32 - -package internal - -import ( - "unsafe" -) - -// Pointer wraps an unsafe.Pointer to be 64bit to -// conform to the syscall specification. -type Pointer struct { - pad uint32 - ptr unsafe.Pointer -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/ptr_32_le.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/ptr_32_le.go deleted file mode 100644 index be2ecfca73..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/ptr_32_le.go +++ /dev/null @@ -1,14 +0,0 @@ -// +build 386 amd64p32 arm mipsle mips64p32le - -package internal - -import ( - "unsafe" -) - -// Pointer wraps an unsafe.Pointer to be 64bit to -// conform to the syscall specification. -type Pointer struct { - ptr unsafe.Pointer - pad uint32 -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/ptr_64.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/ptr_64.go deleted file mode 100644 index 69452dceb9..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/ptr_64.go +++ /dev/null @@ -1,14 +0,0 @@ -// +build !386,!amd64p32,!arm,!mipsle,!mips64p32le -// +build !armbe,!mips,!mips64p32 - -package internal - -import ( - "unsafe" -) - -// Pointer wraps an unsafe.Pointer to be 64bit to -// conform to the syscall specification. -type Pointer struct { - ptr unsafe.Pointer -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/syscall.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/syscall.go deleted file mode 100644 index b766e643e0..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/syscall.go +++ /dev/null @@ -1,245 +0,0 @@ -package internal - -import ( - "fmt" - "path/filepath" - "runtime" - "syscall" - "unsafe" - - "github.com/cilium/ebpf/internal/unix" -) - -//go:generate stringer -output syscall_string.go -type=BPFCmd - -// BPFCmd identifies a subcommand of the bpf syscall. -type BPFCmd int - -// Well known BPF commands. -const ( - BPF_MAP_CREATE BPFCmd = iota - BPF_MAP_LOOKUP_ELEM - BPF_MAP_UPDATE_ELEM - BPF_MAP_DELETE_ELEM - BPF_MAP_GET_NEXT_KEY - BPF_PROG_LOAD - BPF_OBJ_PIN - BPF_OBJ_GET - BPF_PROG_ATTACH - BPF_PROG_DETACH - BPF_PROG_TEST_RUN - BPF_PROG_GET_NEXT_ID - BPF_MAP_GET_NEXT_ID - BPF_PROG_GET_FD_BY_ID - BPF_MAP_GET_FD_BY_ID - BPF_OBJ_GET_INFO_BY_FD - BPF_PROG_QUERY - BPF_RAW_TRACEPOINT_OPEN - BPF_BTF_LOAD - BPF_BTF_GET_FD_BY_ID - BPF_TASK_FD_QUERY - BPF_MAP_LOOKUP_AND_DELETE_ELEM - BPF_MAP_FREEZE - BPF_BTF_GET_NEXT_ID - BPF_MAP_LOOKUP_BATCH - BPF_MAP_LOOKUP_AND_DELETE_BATCH - BPF_MAP_UPDATE_BATCH - BPF_MAP_DELETE_BATCH - BPF_LINK_CREATE - BPF_LINK_UPDATE - BPF_LINK_GET_FD_BY_ID - BPF_LINK_GET_NEXT_ID - BPF_ENABLE_STATS - BPF_ITER_CREATE -) - -// BPF wraps SYS_BPF. -// -// Any pointers contained in attr must use the Pointer type from this package. -func BPF(cmd BPFCmd, attr unsafe.Pointer, size uintptr) (uintptr, error) { - r1, _, errNo := unix.Syscall(unix.SYS_BPF, uintptr(cmd), uintptr(attr), size) - runtime.KeepAlive(attr) - - var err error - if errNo != 0 { - err = wrappedErrno{errNo} - } - - return r1, err -} - -type BPFProgAttachAttr struct { - TargetFd uint32 - AttachBpfFd uint32 - AttachType uint32 - AttachFlags uint32 - ReplaceBpfFd uint32 -} - -func BPFProgAttach(attr *BPFProgAttachAttr) error { - _, err := BPF(BPF_PROG_ATTACH, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) - return err -} - -type BPFProgDetachAttr struct { - TargetFd uint32 - AttachBpfFd uint32 - AttachType uint32 -} - -func BPFProgDetach(attr *BPFProgDetachAttr) error { - _, err := BPF(BPF_PROG_DETACH, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) - return err -} - -type BPFEnableStatsAttr struct { - StatsType uint32 -} - -func BPFEnableStats(attr *BPFEnableStatsAttr) (*FD, error) { - ptr, err := BPF(BPF_ENABLE_STATS, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) - if err != nil { - return nil, fmt.Errorf("enable stats: %w", err) - } - return NewFD(uint32(ptr)), nil - -} - -type bpfObjAttr struct { - fileName Pointer - fd uint32 - fileFlags uint32 -} - -const bpfFSType = 0xcafe4a11 - -// BPFObjPin wraps BPF_OBJ_PIN. -func BPFObjPin(fileName string, fd *FD) error { - dirName := filepath.Dir(fileName) - var statfs unix.Statfs_t - if err := unix.Statfs(dirName, &statfs); err != nil { - return err - } - if uint64(statfs.Type) != bpfFSType { - return fmt.Errorf("%s is not on a bpf filesystem", fileName) - } - - value, err := fd.Value() - if err != nil { - return err - } - - attr := bpfObjAttr{ - fileName: NewStringPointer(fileName), - fd: value, - } - _, err = BPF(BPF_OBJ_PIN, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) - if err != nil { - return fmt.Errorf("pin object %s: %w", fileName, err) - } - return nil -} - -// BPFObjGet wraps BPF_OBJ_GET. -func BPFObjGet(fileName string, flags uint32) (*FD, error) { - attr := bpfObjAttr{ - fileName: NewStringPointer(fileName), - fileFlags: flags, - } - ptr, err := BPF(BPF_OBJ_GET, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) - if err != nil { - return nil, fmt.Errorf("get object %s: %w", fileName, err) - } - return NewFD(uint32(ptr)), nil -} - -type bpfObjGetInfoByFDAttr struct { - fd uint32 - infoLen uint32 - info Pointer -} - -// BPFObjGetInfoByFD wraps BPF_OBJ_GET_INFO_BY_FD. -// -// Available from 4.13. -func BPFObjGetInfoByFD(fd *FD, info unsafe.Pointer, size uintptr) error { - value, err := fd.Value() - if err != nil { - return err - } - - attr := bpfObjGetInfoByFDAttr{ - fd: value, - infoLen: uint32(size), - info: NewPointer(info), - } - _, err = BPF(BPF_OBJ_GET_INFO_BY_FD, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) - if err != nil { - return fmt.Errorf("fd %v: %w", fd, err) - } - return nil -} - -// BPFObjName is a null-terminated string made up of -// 'A-Za-z0-9_' characters. -type BPFObjName [unix.BPF_OBJ_NAME_LEN]byte - -// NewBPFObjName truncates the result if it is too long. -func NewBPFObjName(name string) BPFObjName { - var result BPFObjName - copy(result[:unix.BPF_OBJ_NAME_LEN-1], name) - return result -} - -type BPFMapCreateAttr struct { - MapType uint32 - KeySize uint32 - ValueSize uint32 - MaxEntries uint32 - Flags uint32 - InnerMapFd uint32 // since 4.12 56f668dfe00d - NumaNode uint32 // since 4.14 96eabe7a40aa - MapName BPFObjName // since 4.15 ad5b177bd73f - MapIfIndex uint32 - BTFFd uint32 - BTFKeyTypeID uint32 - BTFValueTypeID uint32 -} - -func BPFMapCreate(attr *BPFMapCreateAttr) (*FD, error) { - fd, err := BPF(BPF_MAP_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) - if err != nil { - return nil, err - } - - return NewFD(uint32(fd)), nil -} - -// wrappedErrno wraps syscall.Errno to prevent direct comparisons with -// syscall.E* or unix.E* constants. -// -// You should never export an error of this type. -type wrappedErrno struct { - syscall.Errno -} - -func (we wrappedErrno) Unwrap() error { - return we.Errno -} - -type syscallError struct { - error - errno syscall.Errno -} - -func SyscallError(err error, errno syscall.Errno) error { - return &syscallError{err, errno} -} - -func (se *syscallError) Is(target error) bool { - return target == se.error -} - -func (se *syscallError) Unwrap() error { - return se.errno -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/syscall_string.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/syscall_string.go deleted file mode 100644 index 85df047797..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/syscall_string.go +++ /dev/null @@ -1,56 +0,0 @@ -// Code generated by "stringer -output syscall_string.go -type=BPFCmd"; DO NOT EDIT. - -package internal - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[BPF_MAP_CREATE-0] - _ = x[BPF_MAP_LOOKUP_ELEM-1] - _ = x[BPF_MAP_UPDATE_ELEM-2] - _ = x[BPF_MAP_DELETE_ELEM-3] - _ = x[BPF_MAP_GET_NEXT_KEY-4] - _ = x[BPF_PROG_LOAD-5] - _ = x[BPF_OBJ_PIN-6] - _ = x[BPF_OBJ_GET-7] - _ = x[BPF_PROG_ATTACH-8] - _ = x[BPF_PROG_DETACH-9] - _ = x[BPF_PROG_TEST_RUN-10] - _ = x[BPF_PROG_GET_NEXT_ID-11] - _ = x[BPF_MAP_GET_NEXT_ID-12] - _ = x[BPF_PROG_GET_FD_BY_ID-13] - _ = x[BPF_MAP_GET_FD_BY_ID-14] - _ = x[BPF_OBJ_GET_INFO_BY_FD-15] - _ = x[BPF_PROG_QUERY-16] - _ = x[BPF_RAW_TRACEPOINT_OPEN-17] - _ = x[BPF_BTF_LOAD-18] - _ = x[BPF_BTF_GET_FD_BY_ID-19] - _ = x[BPF_TASK_FD_QUERY-20] - _ = x[BPF_MAP_LOOKUP_AND_DELETE_ELEM-21] - _ = x[BPF_MAP_FREEZE-22] - _ = x[BPF_BTF_GET_NEXT_ID-23] - _ = x[BPF_MAP_LOOKUP_BATCH-24] - _ = x[BPF_MAP_LOOKUP_AND_DELETE_BATCH-25] - _ = x[BPF_MAP_UPDATE_BATCH-26] - _ = x[BPF_MAP_DELETE_BATCH-27] - _ = x[BPF_LINK_CREATE-28] - _ = x[BPF_LINK_UPDATE-29] - _ = x[BPF_LINK_GET_FD_BY_ID-30] - _ = x[BPF_LINK_GET_NEXT_ID-31] - _ = x[BPF_ENABLE_STATS-32] - _ = x[BPF_ITER_CREATE-33] -} - -const _BPFCmd_name = "BPF_MAP_CREATEBPF_MAP_LOOKUP_ELEMBPF_MAP_UPDATE_ELEMBPF_MAP_DELETE_ELEMBPF_MAP_GET_NEXT_KEYBPF_PROG_LOADBPF_OBJ_PINBPF_OBJ_GETBPF_PROG_ATTACHBPF_PROG_DETACHBPF_PROG_TEST_RUNBPF_PROG_GET_NEXT_IDBPF_MAP_GET_NEXT_IDBPF_PROG_GET_FD_BY_IDBPF_MAP_GET_FD_BY_IDBPF_OBJ_GET_INFO_BY_FDBPF_PROG_QUERYBPF_RAW_TRACEPOINT_OPENBPF_BTF_LOADBPF_BTF_GET_FD_BY_IDBPF_TASK_FD_QUERYBPF_MAP_LOOKUP_AND_DELETE_ELEMBPF_MAP_FREEZEBPF_BTF_GET_NEXT_IDBPF_MAP_LOOKUP_BATCHBPF_MAP_LOOKUP_AND_DELETE_BATCHBPF_MAP_UPDATE_BATCHBPF_MAP_DELETE_BATCHBPF_LINK_CREATEBPF_LINK_UPDATEBPF_LINK_GET_FD_BY_IDBPF_LINK_GET_NEXT_IDBPF_ENABLE_STATSBPF_ITER_CREATE" - -var _BPFCmd_index = [...]uint16{0, 14, 33, 52, 71, 91, 104, 115, 126, 141, 156, 173, 193, 212, 233, 253, 275, 289, 312, 324, 344, 361, 391, 405, 424, 444, 475, 495, 515, 530, 545, 566, 586, 602, 617} - -func (i BPFCmd) String() string { - if i < 0 || i >= BPFCmd(len(_BPFCmd_index)-1) { - return "BPFCmd(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _BPFCmd_name[_BPFCmd_index[i]:_BPFCmd_index[i+1]] -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/unix/types_linux.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/unix/types_linux.go deleted file mode 100644 index 0a18eaf0cf..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/unix/types_linux.go +++ /dev/null @@ -1,204 +0,0 @@ -// +build linux - -package unix - -import ( - "bytes" - "syscall" - - linux "golang.org/x/sys/unix" -) - -const ( - ENOENT = linux.ENOENT - EEXIST = linux.EEXIST - EAGAIN = linux.EAGAIN - ENOSPC = linux.ENOSPC - EINVAL = linux.EINVAL - EPOLLIN = linux.EPOLLIN - EINTR = linux.EINTR - EPERM = linux.EPERM - ESRCH = linux.ESRCH - ENODEV = linux.ENODEV - // ENOTSUPP is not the same as ENOTSUP or EOPNOTSUP - ENOTSUPP = syscall.Errno(0x20c) - - EBADF = linux.EBADF - BPF_F_NO_PREALLOC = linux.BPF_F_NO_PREALLOC - BPF_F_NUMA_NODE = linux.BPF_F_NUMA_NODE - BPF_F_RDONLY = linux.BPF_F_RDONLY - BPF_F_WRONLY = linux.BPF_F_WRONLY - BPF_F_RDONLY_PROG = linux.BPF_F_RDONLY_PROG - BPF_F_WRONLY_PROG = linux.BPF_F_WRONLY_PROG - BPF_F_SLEEPABLE = linux.BPF_F_SLEEPABLE - BPF_F_MMAPABLE = linux.BPF_F_MMAPABLE - BPF_F_INNER_MAP = linux.BPF_F_INNER_MAP - BPF_OBJ_NAME_LEN = linux.BPF_OBJ_NAME_LEN - BPF_TAG_SIZE = linux.BPF_TAG_SIZE - SYS_BPF = linux.SYS_BPF - F_DUPFD_CLOEXEC = linux.F_DUPFD_CLOEXEC - EPOLL_CTL_ADD = linux.EPOLL_CTL_ADD - EPOLL_CLOEXEC = linux.EPOLL_CLOEXEC - O_CLOEXEC = linux.O_CLOEXEC - O_NONBLOCK = linux.O_NONBLOCK - PROT_READ = linux.PROT_READ - PROT_WRITE = linux.PROT_WRITE - MAP_SHARED = linux.MAP_SHARED - PERF_ATTR_SIZE_VER1 = linux.PERF_ATTR_SIZE_VER1 - PERF_TYPE_SOFTWARE = linux.PERF_TYPE_SOFTWARE - PERF_TYPE_TRACEPOINT = linux.PERF_TYPE_TRACEPOINT - PERF_COUNT_SW_BPF_OUTPUT = linux.PERF_COUNT_SW_BPF_OUTPUT - PERF_EVENT_IOC_DISABLE = linux.PERF_EVENT_IOC_DISABLE - PERF_EVENT_IOC_ENABLE = linux.PERF_EVENT_IOC_ENABLE - PERF_EVENT_IOC_SET_BPF = linux.PERF_EVENT_IOC_SET_BPF - PerfBitWatermark = linux.PerfBitWatermark - PERF_SAMPLE_RAW = linux.PERF_SAMPLE_RAW - PERF_FLAG_FD_CLOEXEC = linux.PERF_FLAG_FD_CLOEXEC - RLIM_INFINITY = linux.RLIM_INFINITY - RLIMIT_MEMLOCK = linux.RLIMIT_MEMLOCK - BPF_STATS_RUN_TIME = linux.BPF_STATS_RUN_TIME - PERF_RECORD_LOST = linux.PERF_RECORD_LOST - PERF_RECORD_SAMPLE = linux.PERF_RECORD_SAMPLE - AT_FDCWD = linux.AT_FDCWD - RENAME_NOREPLACE = linux.RENAME_NOREPLACE -) - -// Statfs_t is a wrapper -type Statfs_t = linux.Statfs_t - -// Rlimit is a wrapper -type Rlimit = linux.Rlimit - -// Setrlimit is a wrapper -func Setrlimit(resource int, rlim *Rlimit) (err error) { - return linux.Setrlimit(resource, rlim) -} - -// Syscall is a wrapper -func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) { - return linux.Syscall(trap, a1, a2, a3) -} - -// FcntlInt is a wrapper -func FcntlInt(fd uintptr, cmd, arg int) (int, error) { - return linux.FcntlInt(fd, cmd, arg) -} - -// IoctlSetInt is a wrapper -func IoctlSetInt(fd int, req uint, value int) error { - return linux.IoctlSetInt(fd, req, value) -} - -// Statfs is a wrapper -func Statfs(path string, buf *Statfs_t) (err error) { - return linux.Statfs(path, buf) -} - -// Close is a wrapper -func Close(fd int) (err error) { - return linux.Close(fd) -} - -// EpollEvent is a wrapper -type EpollEvent = linux.EpollEvent - -// EpollWait is a wrapper -func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) { - return linux.EpollWait(epfd, events, msec) -} - -// EpollCtl is a wrapper -func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) { - return linux.EpollCtl(epfd, op, fd, event) -} - -// Eventfd is a wrapper -func Eventfd(initval uint, flags int) (fd int, err error) { - return linux.Eventfd(initval, flags) -} - -// Write is a wrapper -func Write(fd int, p []byte) (n int, err error) { - return linux.Write(fd, p) -} - -// EpollCreate1 is a wrapper -func EpollCreate1(flag int) (fd int, err error) { - return linux.EpollCreate1(flag) -} - -// PerfEventMmapPage is a wrapper -type PerfEventMmapPage linux.PerfEventMmapPage - -// SetNonblock is a wrapper -func SetNonblock(fd int, nonblocking bool) (err error) { - return linux.SetNonblock(fd, nonblocking) -} - -// Mmap is a wrapper -func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) { - return linux.Mmap(fd, offset, length, prot, flags) -} - -// Munmap is a wrapper -func Munmap(b []byte) (err error) { - return linux.Munmap(b) -} - -// PerfEventAttr is a wrapper -type PerfEventAttr = linux.PerfEventAttr - -// PerfEventOpen is a wrapper -func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) { - return linux.PerfEventOpen(attr, pid, cpu, groupFd, flags) -} - -// Utsname is a wrapper -type Utsname = linux.Utsname - -// Uname is a wrapper -func Uname(buf *Utsname) (err error) { - return linux.Uname(buf) -} - -// Getpid is a wrapper -func Getpid() int { - return linux.Getpid() -} - -// Gettid is a wrapper -func Gettid() int { - return linux.Gettid() -} - -// Tgkill is a wrapper -func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) { - return linux.Tgkill(tgid, tid, sig) -} - -// BytePtrFromString is a wrapper -func BytePtrFromString(s string) (*byte, error) { - return linux.BytePtrFromString(s) -} - -// ByteSliceToString is a wrapper -func ByteSliceToString(s []byte) string { - return linux.ByteSliceToString(s) -} - -// Renameat2 is a wrapper -func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error { - return linux.Renameat2(olddirfd, oldpath, newdirfd, newpath, flags) -} - -func KernelRelease() (string, error) { - var uname Utsname - err := Uname(&uname) - if err != nil { - return "", err - } - - end := bytes.IndexByte(uname.Release[:], 0) - release := string(uname.Release[:end]) - return release, nil -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/unix/types_other.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/unix/types_other.go deleted file mode 100644 index 1b06defc0a..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/unix/types_other.go +++ /dev/null @@ -1,263 +0,0 @@ -// +build !linux - -package unix - -import ( - "fmt" - "runtime" - "syscall" -) - -var errNonLinux = fmt.Errorf("unsupported platform %s/%s", runtime.GOOS, runtime.GOARCH) - -const ( - ENOENT = syscall.ENOENT - EEXIST = syscall.EEXIST - EAGAIN = syscall.EAGAIN - ENOSPC = syscall.ENOSPC - EINVAL = syscall.EINVAL - EINTR = syscall.EINTR - EPERM = syscall.EPERM - ESRCH = syscall.ESRCH - ENODEV = syscall.ENODEV - EBADF = syscall.Errno(0) - // ENOTSUPP is not the same as ENOTSUP or EOPNOTSUP - ENOTSUPP = syscall.Errno(0x20c) - - BPF_F_NO_PREALLOC = 0 - BPF_F_NUMA_NODE = 0 - BPF_F_RDONLY = 0 - BPF_F_WRONLY = 0 - BPF_F_RDONLY_PROG = 0 - BPF_F_WRONLY_PROG = 0 - BPF_F_SLEEPABLE = 0 - BPF_F_MMAPABLE = 0 - BPF_F_INNER_MAP = 0 - BPF_OBJ_NAME_LEN = 0x10 - BPF_TAG_SIZE = 0x8 - SYS_BPF = 321 - F_DUPFD_CLOEXEC = 0x406 - EPOLLIN = 0x1 - EPOLL_CTL_ADD = 0x1 - EPOLL_CLOEXEC = 0x80000 - O_CLOEXEC = 0x80000 - O_NONBLOCK = 0x800 - PROT_READ = 0x1 - PROT_WRITE = 0x2 - MAP_SHARED = 0x1 - PERF_ATTR_SIZE_VER1 = 0 - PERF_TYPE_SOFTWARE = 0x1 - PERF_TYPE_TRACEPOINT = 0 - PERF_COUNT_SW_BPF_OUTPUT = 0xa - PERF_EVENT_IOC_DISABLE = 0 - PERF_EVENT_IOC_ENABLE = 0 - PERF_EVENT_IOC_SET_BPF = 0 - PerfBitWatermark = 0x4000 - PERF_SAMPLE_RAW = 0x400 - PERF_FLAG_FD_CLOEXEC = 0x8 - RLIM_INFINITY = 0x7fffffffffffffff - RLIMIT_MEMLOCK = 8 - BPF_STATS_RUN_TIME = 0 - PERF_RECORD_LOST = 2 - PERF_RECORD_SAMPLE = 9 - AT_FDCWD = -0x2 - RENAME_NOREPLACE = 0x1 -) - -// Statfs_t is a wrapper -type Statfs_t struct { - Type int64 - Bsize int64 - Blocks uint64 - Bfree uint64 - Bavail uint64 - Files uint64 - Ffree uint64 - Fsid [2]int32 - Namelen int64 - Frsize int64 - Flags int64 - Spare [4]int64 -} - -// Rlimit is a wrapper -type Rlimit struct { - Cur uint64 - Max uint64 -} - -// Setrlimit is a wrapper -func Setrlimit(resource int, rlim *Rlimit) (err error) { - return errNonLinux -} - -// Syscall is a wrapper -func Syscall(trap, a1, a2, a3 uintptr) (r1, r2 uintptr, err syscall.Errno) { - return 0, 0, syscall.Errno(1) -} - -// FcntlInt is a wrapper -func FcntlInt(fd uintptr, cmd, arg int) (int, error) { - return -1, errNonLinux -} - -// IoctlSetInt is a wrapper -func IoctlSetInt(fd int, req uint, value int) error { - return errNonLinux -} - -// Statfs is a wrapper -func Statfs(path string, buf *Statfs_t) error { - return errNonLinux -} - -// Close is a wrapper -func Close(fd int) (err error) { - return errNonLinux -} - -// EpollEvent is a wrapper -type EpollEvent struct { - Events uint32 - Fd int32 - Pad int32 -} - -// EpollWait is a wrapper -func EpollWait(epfd int, events []EpollEvent, msec int) (n int, err error) { - return 0, errNonLinux -} - -// EpollCtl is a wrapper -func EpollCtl(epfd int, op int, fd int, event *EpollEvent) (err error) { - return errNonLinux -} - -// Eventfd is a wrapper -func Eventfd(initval uint, flags int) (fd int, err error) { - return 0, errNonLinux -} - -// Write is a wrapper -func Write(fd int, p []byte) (n int, err error) { - return 0, errNonLinux -} - -// EpollCreate1 is a wrapper -func EpollCreate1(flag int) (fd int, err error) { - return 0, errNonLinux -} - -// PerfEventMmapPage is a wrapper -type PerfEventMmapPage struct { - Version uint32 - Compat_version uint32 - Lock uint32 - Index uint32 - Offset int64 - Time_enabled uint64 - Time_running uint64 - Capabilities uint64 - Pmc_width uint16 - Time_shift uint16 - Time_mult uint32 - Time_offset uint64 - Time_zero uint64 - Size uint32 - - Data_head uint64 - Data_tail uint64 - Data_offset uint64 - Data_size uint64 - Aux_head uint64 - Aux_tail uint64 - Aux_offset uint64 - Aux_size uint64 -} - -// SetNonblock is a wrapper -func SetNonblock(fd int, nonblocking bool) (err error) { - return errNonLinux -} - -// Mmap is a wrapper -func Mmap(fd int, offset int64, length int, prot int, flags int) (data []byte, err error) { - return []byte{}, errNonLinux -} - -// Munmap is a wrapper -func Munmap(b []byte) (err error) { - return errNonLinux -} - -// PerfEventAttr is a wrapper -type PerfEventAttr struct { - Type uint32 - Size uint32 - Config uint64 - Sample uint64 - Sample_type uint64 - Read_format uint64 - Bits uint64 - Wakeup uint32 - Bp_type uint32 - Ext1 uint64 - Ext2 uint64 - Branch_sample_type uint64 - Sample_regs_user uint64 - Sample_stack_user uint32 - Clockid int32 - Sample_regs_intr uint64 - Aux_watermark uint32 - Sample_max_stack uint16 -} - -// PerfEventOpen is a wrapper -func PerfEventOpen(attr *PerfEventAttr, pid int, cpu int, groupFd int, flags int) (fd int, err error) { - return 0, errNonLinux -} - -// Utsname is a wrapper -type Utsname struct { - Release [65]byte - Version [65]byte -} - -// Uname is a wrapper -func Uname(buf *Utsname) (err error) { - return errNonLinux -} - -// Getpid is a wrapper -func Getpid() int { - return -1 -} - -// Gettid is a wrapper -func Gettid() int { - return -1 -} - -// Tgkill is a wrapper -func Tgkill(tgid int, tid int, sig syscall.Signal) (err error) { - return errNonLinux -} - -// BytePtrFromString is a wrapper -func BytePtrFromString(s string) (*byte, error) { - return nil, errNonLinux -} - -// ByteSliceToString is a wrapper -func ByteSliceToString(s []byte) string { - return "" -} - -// Renameat2 is a wrapper -func Renameat2(olddirfd int, oldpath string, newdirfd int, newpath string, flags uint) error { - return errNonLinux -} - -func KernelRelease() (string, error) { - return "", errNonLinux -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/internal/version.go b/src/runtime/vendor/github.com/cilium/ebpf/internal/version.go deleted file mode 100644 index 1a678bfe65..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/internal/version.go +++ /dev/null @@ -1,163 +0,0 @@ -package internal - -import ( - "fmt" - "io/ioutil" - "regexp" - "sync" - - "github.com/cilium/ebpf/internal/unix" -) - -const ( - // Version constant used in ELF binaries indicating that the loader needs to - // substitute the eBPF program's version with the value of the kernel's - // KERNEL_VERSION compile-time macro. Used for compatibility with BCC, gobpf - // and RedSift. - MagicKernelVersion = 0xFFFFFFFE -) - -var ( - // Match between one and three decimals separated by dots, with the last - // segment (patch level) being optional on some kernels. - // The x.y.z string must appear at the start of a string or right after - // whitespace to prevent sequences like 'x.y.z-a.b.c' from matching 'a.b.c'. - rgxKernelVersion = regexp.MustCompile(`(?:\A|\s)\d{1,3}\.\d{1,3}(?:\.\d{1,3})?`) - - kernelVersion = struct { - once sync.Once - version Version - err error - }{} -) - -// A Version in the form Major.Minor.Patch. -type Version [3]uint16 - -// NewVersion creates a version from a string like "Major.Minor.Patch". -// -// Patch is optional. -func NewVersion(ver string) (Version, error) { - var major, minor, patch uint16 - n, _ := fmt.Sscanf(ver, "%d.%d.%d", &major, &minor, &patch) - if n < 2 { - return Version{}, fmt.Errorf("invalid version: %s", ver) - } - return Version{major, minor, patch}, nil -} - -func (v Version) String() string { - if v[2] == 0 { - return fmt.Sprintf("v%d.%d", v[0], v[1]) - } - return fmt.Sprintf("v%d.%d.%d", v[0], v[1], v[2]) -} - -// Less returns true if the version is less than another version. -func (v Version) Less(other Version) bool { - for i, a := range v { - if a == other[i] { - continue - } - return a < other[i] - } - return false -} - -// Unspecified returns true if the version is all zero. -func (v Version) Unspecified() bool { - return v[0] == 0 && v[1] == 0 && v[2] == 0 -} - -// Kernel implements the kernel's KERNEL_VERSION macro from linux/version.h. -// It represents the kernel version and patch level as a single value. -func (v Version) Kernel() uint32 { - - // Kernels 4.4 and 4.9 have their SUBLEVEL clamped to 255 to avoid - // overflowing into PATCHLEVEL. - // See kernel commit 9b82f13e7ef3 ("kbuild: clamp SUBLEVEL to 255"). - s := v[2] - if s > 255 { - s = 255 - } - - // Truncate members to uint8 to prevent them from spilling over into - // each other when overflowing 8 bits. - return uint32(uint8(v[0]))<<16 | uint32(uint8(v[1]))<<8 | uint32(uint8(s)) -} - -// KernelVersion returns the version of the currently running kernel. -func KernelVersion() (Version, error) { - kernelVersion.once.Do(func() { - kernelVersion.version, kernelVersion.err = detectKernelVersion() - }) - - if kernelVersion.err != nil { - return Version{}, kernelVersion.err - } - return kernelVersion.version, nil -} - -// detectKernelVersion returns the version of the running kernel. It scans the -// following sources in order: /proc/version_signature, uname -v, uname -r. -// In each of those locations, the last-appearing x.y(.z) value is selected -// for parsing. The first location that yields a usable version number is -// returned. -func detectKernelVersion() (Version, error) { - - // Try reading /proc/version_signature for Ubuntu compatibility. - // Example format: Ubuntu 4.15.0-91.92-generic 4.15.18 - // This method exists in the kernel itself, see d18acd15c - // ("perf tools: Fix kernel version error in ubuntu"). - if pvs, err := ioutil.ReadFile("/proc/version_signature"); err == nil { - // If /proc/version_signature exists, failing to parse it is an error. - // It only exists on Ubuntu, where the real patch level is not obtainable - // through any other method. - v, err := findKernelVersion(string(pvs)) - if err != nil { - return Version{}, err - } - return v, nil - } - - var uname unix.Utsname - if err := unix.Uname(&uname); err != nil { - return Version{}, fmt.Errorf("calling uname: %w", err) - } - - // Debian puts the version including the patch level in uname.Version. - // It is not an error if there's no version number in uname.Version, - // as most distributions don't use it. Parsing can continue on uname.Release. - // Example format: #1 SMP Debian 4.19.37-5+deb10u2 (2019-08-08) - if v, err := findKernelVersion(unix.ByteSliceToString(uname.Version[:])); err == nil { - return v, nil - } - - // Most other distributions have the full kernel version including patch - // level in uname.Release. - // Example format: 4.19.0-5-amd64, 5.5.10-arch1-1 - v, err := findKernelVersion(unix.ByteSliceToString(uname.Release[:])) - if err != nil { - return Version{}, err - } - - return v, nil -} - -// findKernelVersion matches s against rgxKernelVersion and parses the result -// into a Version. If s contains multiple matches, the last entry is selected. -func findKernelVersion(s string) (Version, error) { - m := rgxKernelVersion.FindAllString(s, -1) - if m == nil { - return Version{}, fmt.Errorf("no kernel version in string: %s", s) - } - // Pick the last match of the string in case there are multiple. - s = m[len(m)-1] - - v, err := NewVersion(s) - if err != nil { - return Version{}, fmt.Errorf("parsing version string %s: %w", s, err) - } - - return v, nil -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/link/cgroup.go b/src/runtime/vendor/github.com/cilium/ebpf/link/cgroup.go deleted file mode 100644 index 5540bb068c..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/link/cgroup.go +++ /dev/null @@ -1,171 +0,0 @@ -package link - -import ( - "errors" - "fmt" - "os" - - "github.com/cilium/ebpf" -) - -type cgroupAttachFlags uint32 - -// cgroup attach flags -const ( - flagAllowOverride cgroupAttachFlags = 1 << iota - flagAllowMulti - flagReplace -) - -type CgroupOptions struct { - // Path to a cgroupv2 folder. - Path string - // One of the AttachCgroup* constants - Attach ebpf.AttachType - // Program must be of type CGroup*, and the attach type must match Attach. - Program *ebpf.Program -} - -// AttachCgroup links a BPF program to a cgroup. -func AttachCgroup(opts CgroupOptions) (Link, error) { - cgroup, err := os.Open(opts.Path) - if err != nil { - return nil, fmt.Errorf("can't open cgroup: %s", err) - } - - clone, err := opts.Program.Clone() - if err != nil { - cgroup.Close() - return nil, err - } - - var cg Link - cg, err = newLinkCgroup(cgroup, opts.Attach, clone) - if errors.Is(err, ErrNotSupported) { - cg, err = newProgAttachCgroup(cgroup, opts.Attach, clone, flagAllowMulti) - } - if errors.Is(err, ErrNotSupported) { - cg, err = newProgAttachCgroup(cgroup, opts.Attach, clone, flagAllowOverride) - } - if err != nil { - cgroup.Close() - clone.Close() - return nil, err - } - - return cg, nil -} - -// LoadPinnedCgroup loads a pinned cgroup from a bpffs. -func LoadPinnedCgroup(fileName string, opts *ebpf.LoadPinOptions) (Link, error) { - link, err := LoadPinnedRawLink(fileName, CgroupType, opts) - if err != nil { - return nil, err - } - - return &linkCgroup{*link}, nil -} - -type progAttachCgroup struct { - cgroup *os.File - current *ebpf.Program - attachType ebpf.AttachType - flags cgroupAttachFlags -} - -var _ Link = (*progAttachCgroup)(nil) - -func (cg *progAttachCgroup) isLink() {} - -func newProgAttachCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Program, flags cgroupAttachFlags) (*progAttachCgroup, error) { - if flags&flagAllowMulti > 0 { - if err := haveProgAttachReplace(); err != nil { - return nil, fmt.Errorf("can't support multiple programs: %w", err) - } - } - - err := RawAttachProgram(RawAttachProgramOptions{ - Target: int(cgroup.Fd()), - Program: prog, - Flags: uint32(flags), - Attach: attach, - }) - if err != nil { - return nil, fmt.Errorf("cgroup: %w", err) - } - - return &progAttachCgroup{cgroup, prog, attach, flags}, nil -} - -func (cg *progAttachCgroup) Close() error { - defer cg.cgroup.Close() - defer cg.current.Close() - - err := RawDetachProgram(RawDetachProgramOptions{ - Target: int(cg.cgroup.Fd()), - Program: cg.current, - Attach: cg.attachType, - }) - if err != nil { - return fmt.Errorf("close cgroup: %s", err) - } - return nil -} - -func (cg *progAttachCgroup) Update(prog *ebpf.Program) error { - new, err := prog.Clone() - if err != nil { - return err - } - - args := RawAttachProgramOptions{ - Target: int(cg.cgroup.Fd()), - Program: prog, - Attach: cg.attachType, - Flags: uint32(cg.flags), - } - - if cg.flags&flagAllowMulti > 0 { - // Atomically replacing multiple programs requires at least - // 5.5 (commit 7dd68b3279f17921 "bpf: Support replacing cgroup-bpf - // program in MULTI mode") - args.Flags |= uint32(flagReplace) - args.Replace = cg.current - } - - if err := RawAttachProgram(args); err != nil { - new.Close() - return fmt.Errorf("can't update cgroup: %s", err) - } - - cg.current.Close() - cg.current = new - return nil -} - -func (cg *progAttachCgroup) Pin(string) error { - return fmt.Errorf("can't pin cgroup: %w", ErrNotSupported) -} - -func (cg *progAttachCgroup) Unpin() error { - return fmt.Errorf("can't pin cgroup: %w", ErrNotSupported) -} - -type linkCgroup struct { - RawLink -} - -var _ Link = (*linkCgroup)(nil) - -func newLinkCgroup(cgroup *os.File, attach ebpf.AttachType, prog *ebpf.Program) (*linkCgroup, error) { - link, err := AttachRawLink(RawLinkOptions{ - Target: int(cgroup.Fd()), - Program: prog, - Attach: attach, - }) - if err != nil { - return nil, err - } - - return &linkCgroup{*link}, err -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/link/doc.go b/src/runtime/vendor/github.com/cilium/ebpf/link/doc.go deleted file mode 100644 index 2bde35ed7a..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/link/doc.go +++ /dev/null @@ -1,2 +0,0 @@ -// Package link allows attaching eBPF programs to various kernel hooks. -package link diff --git a/src/runtime/vendor/github.com/cilium/ebpf/link/iter.go b/src/runtime/vendor/github.com/cilium/ebpf/link/iter.go deleted file mode 100644 index 654d34ef84..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/link/iter.go +++ /dev/null @@ -1,100 +0,0 @@ -package link - -import ( - "fmt" - "io" - "unsafe" - - "github.com/cilium/ebpf" - "github.com/cilium/ebpf/internal" -) - -type IterOptions struct { - // Program must be of type Tracing with attach type - // AttachTraceIter. The kind of iterator to attach to is - // determined at load time via the AttachTo field. - // - // AttachTo requires the kernel to include BTF of itself, - // and it to be compiled with a recent pahole (>= 1.16). - Program *ebpf.Program - - // Map specifies the target map for bpf_map_elem and sockmap iterators. - // It may be nil. - Map *ebpf.Map -} - -// AttachIter attaches a BPF seq_file iterator. -func AttachIter(opts IterOptions) (*Iter, error) { - if err := haveBPFLink(); err != nil { - return nil, err - } - - progFd := opts.Program.FD() - if progFd < 0 { - return nil, fmt.Errorf("invalid program: %s", internal.ErrClosedFd) - } - - var info bpfIterLinkInfoMap - if opts.Map != nil { - mapFd := opts.Map.FD() - if mapFd < 0 { - return nil, fmt.Errorf("invalid map: %w", internal.ErrClosedFd) - } - info.map_fd = uint32(mapFd) - } - - attr := bpfLinkCreateIterAttr{ - prog_fd: uint32(progFd), - attach_type: ebpf.AttachTraceIter, - iter_info: internal.NewPointer(unsafe.Pointer(&info)), - iter_info_len: uint32(unsafe.Sizeof(info)), - } - - fd, err := bpfLinkCreateIter(&attr) - if err != nil { - return nil, fmt.Errorf("can't link iterator: %w", err) - } - - return &Iter{RawLink{fd, ""}}, err -} - -// LoadPinnedIter loads a pinned iterator from a bpffs. -func LoadPinnedIter(fileName string, opts *ebpf.LoadPinOptions) (*Iter, error) { - link, err := LoadPinnedRawLink(fileName, IterType, opts) - if err != nil { - return nil, err - } - - return &Iter{*link}, err -} - -// Iter represents an attached bpf_iter. -type Iter struct { - RawLink -} - -// Open creates a new instance of the iterator. -// -// Reading from the returned reader triggers the BPF program. -func (it *Iter) Open() (io.ReadCloser, error) { - linkFd, err := it.fd.Value() - if err != nil { - return nil, err - } - - attr := &bpfIterCreateAttr{ - linkFd: linkFd, - } - - fd, err := bpfIterCreate(attr) - if err != nil { - return nil, fmt.Errorf("can't create iterator: %w", err) - } - - return fd.File("bpf_iter"), nil -} - -// union bpf_iter_link_info.map -type bpfIterLinkInfoMap struct { - map_fd uint32 -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/link/kprobe.go b/src/runtime/vendor/github.com/cilium/ebpf/link/kprobe.go deleted file mode 100644 index ea71d6d608..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/link/kprobe.go +++ /dev/null @@ -1,438 +0,0 @@ -package link - -import ( - "bytes" - "crypto/rand" - "errors" - "fmt" - "io/ioutil" - "os" - "path/filepath" - "runtime" - "sync" - "unsafe" - - "github.com/cilium/ebpf" - "github.com/cilium/ebpf/internal" - "github.com/cilium/ebpf/internal/unix" -) - -var ( - kprobeEventsPath = filepath.Join(tracefsPath, "kprobe_events") - - kprobeRetprobeBit = struct { - once sync.Once - value uint64 - err error - }{} -) - -type probeType uint8 - -const ( - kprobeType probeType = iota - uprobeType -) - -func (pt probeType) String() string { - if pt == kprobeType { - return "kprobe" - } - return "uprobe" -} - -func (pt probeType) EventsPath() string { - if pt == kprobeType { - return kprobeEventsPath - } - return uprobeEventsPath -} - -func (pt probeType) PerfEventType(ret bool) perfEventType { - if pt == kprobeType { - if ret { - return kretprobeEvent - } - return kprobeEvent - } - if ret { - return uretprobeEvent - } - return uprobeEvent -} - -func (pt probeType) RetprobeBit() (uint64, error) { - if pt == kprobeType { - return kretprobeBit() - } - return uretprobeBit() -} - -// Kprobe attaches the given eBPF program to a perf event that fires when the -// given kernel symbol starts executing. See /proc/kallsyms for available -// symbols. For example, printk(): -// -// Kprobe("printk", prog) -// -// The resulting Link must be Closed during program shutdown to avoid leaking -// system resources. -func Kprobe(symbol string, prog *ebpf.Program) (Link, error) { - k, err := kprobe(symbol, prog, false) - if err != nil { - return nil, err - } - - err = k.attach(prog) - if err != nil { - k.Close() - return nil, err - } - - return k, nil -} - -// Kretprobe attaches the given eBPF program to a perf event that fires right -// before the given kernel symbol exits, with the function stack left intact. -// See /proc/kallsyms for available symbols. For example, printk(): -// -// Kretprobe("printk", prog) -// -// The resulting Link must be Closed during program shutdown to avoid leaking -// system resources. -func Kretprobe(symbol string, prog *ebpf.Program) (Link, error) { - k, err := kprobe(symbol, prog, true) - if err != nil { - return nil, err - } - - err = k.attach(prog) - if err != nil { - k.Close() - return nil, err - } - - return k, nil -} - -// kprobe opens a perf event on the given symbol and attaches prog to it. -// If ret is true, create a kretprobe. -func kprobe(symbol string, prog *ebpf.Program, ret bool) (*perfEvent, error) { - if symbol == "" { - return nil, fmt.Errorf("symbol name cannot be empty: %w", errInvalidInput) - } - if prog == nil { - return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput) - } - if !rgxTraceEvent.MatchString(symbol) { - return nil, fmt.Errorf("symbol '%s' must be alphanumeric or underscore: %w", symbol, errInvalidInput) - } - if prog.Type() != ebpf.Kprobe { - return nil, fmt.Errorf("eBPF program type %s is not a Kprobe: %w", prog.Type(), errInvalidInput) - } - - // Use kprobe PMU if the kernel has it available. - tp, err := pmuKprobe(platformPrefix(symbol), ret) - if errors.Is(err, os.ErrNotExist) { - tp, err = pmuKprobe(symbol, ret) - } - if err == nil { - return tp, nil - } - if err != nil && !errors.Is(err, ErrNotSupported) { - return nil, fmt.Errorf("creating perf_kprobe PMU: %w", err) - } - - // Use tracefs if kprobe PMU is missing. - tp, err = tracefsKprobe(platformPrefix(symbol), ret) - if errors.Is(err, os.ErrNotExist) { - tp, err = tracefsKprobe(symbol, ret) - } - if err != nil { - return nil, fmt.Errorf("creating trace event '%s' in tracefs: %w", symbol, err) - } - - return tp, nil -} - -// pmuKprobe opens a perf event based on the kprobe PMU. -// Returns os.ErrNotExist if the given symbol does not exist in the kernel. -func pmuKprobe(symbol string, ret bool) (*perfEvent, error) { - return pmuProbe(kprobeType, symbol, "", 0, ret) -} - -// pmuProbe opens a perf event based on a Performance Monitoring Unit. -// -// Requires at least a 4.17 kernel. -// e12f03d7031a "perf/core: Implement the 'perf_kprobe' PMU" -// 33ea4b24277b "perf/core: Implement the 'perf_uprobe' PMU" -// -// Returns ErrNotSupported if the kernel doesn't support perf_[k,u]probe PMU -func pmuProbe(typ probeType, symbol, path string, offset uint64, ret bool) (*perfEvent, error) { - // Getting the PMU type will fail if the kernel doesn't support - // the perf_[k,u]probe PMU. - et, err := getPMUEventType(typ) - if err != nil { - return nil, err - } - - var config uint64 - if ret { - bit, err := typ.RetprobeBit() - if err != nil { - return nil, err - } - config |= 1 << bit - } - - var ( - attr unix.PerfEventAttr - sp unsafe.Pointer - ) - switch typ { - case kprobeType: - // Create a pointer to a NUL-terminated string for the kernel. - sp, err := unsafeStringPtr(symbol) - if err != nil { - return nil, err - } - - attr = unix.PerfEventAttr{ - Type: uint32(et), // PMU event type read from sysfs - Ext1: uint64(uintptr(sp)), // Kernel symbol to trace - Config: config, // Retprobe flag - } - case uprobeType: - sp, err := unsafeStringPtr(path) - if err != nil { - return nil, err - } - - attr = unix.PerfEventAttr{ - // The minimum size required for PMU uprobes is PERF_ATTR_SIZE_VER1, - // since it added the config2 (Ext2) field. The Size field controls the - // size of the internal buffer the kernel allocates for reading the - // perf_event_attr argument from userspace. - Size: unix.PERF_ATTR_SIZE_VER1, - Type: uint32(et), // PMU event type read from sysfs - Ext1: uint64(uintptr(sp)), // Uprobe path - Ext2: offset, // Uprobe offset - Config: config, // Retprobe flag - } - } - - fd, err := unix.PerfEventOpen(&attr, perfAllThreads, 0, -1, unix.PERF_FLAG_FD_CLOEXEC) - - // Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL - // when trying to create a kretprobe for a missing symbol. Make sure ENOENT - // is returned to the caller. - if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { - return nil, fmt.Errorf("symbol '%s' not found: %w", symbol, os.ErrNotExist) - } - if err != nil { - return nil, fmt.Errorf("opening perf event: %w", err) - } - - // Ensure the string pointer is not collected before PerfEventOpen returns. - runtime.KeepAlive(sp) - - // Kernel has perf_[k,u]probe PMU available, initialize perf event. - return &perfEvent{ - fd: internal.NewFD(uint32(fd)), - pmuID: et, - name: symbol, - typ: typ.PerfEventType(ret), - }, nil -} - -// tracefsKprobe creates a Kprobe tracefs entry. -func tracefsKprobe(symbol string, ret bool) (*perfEvent, error) { - return tracefsProbe(kprobeType, symbol, "", 0, ret) -} - -// tracefsProbe creates a trace event by writing an entry to /[k,u]probe_events. -// A new trace event group name is generated on every call to support creating -// multiple trace events for the same kernel or userspace symbol. -// Path and offset are only set in the case of uprobe(s) and are used to set -// the executable/library path on the filesystem and the offset where the probe is inserted. -// A perf event is then opened on the newly-created trace event and returned to the caller. -func tracefsProbe(typ probeType, symbol, path string, offset uint64, ret bool) (*perfEvent, error) { - // Generate a random string for each trace event we attempt to create. - // This value is used as the 'group' token in tracefs to allow creating - // multiple kprobe trace events with the same name. - group, err := randomGroup("ebpf") - if err != nil { - return nil, fmt.Errorf("randomizing group name: %w", err) - } - - // Before attempting to create a trace event through tracefs, - // check if an event with the same group and name already exists. - // Kernels 4.x and earlier don't return os.ErrExist on writing a duplicate - // entry, so we need to rely on reads for detecting uniqueness. - _, err = getTraceEventID(group, symbol) - if err == nil { - return nil, fmt.Errorf("trace event already exists: %s/%s", group, symbol) - } - if err != nil && !errors.Is(err, os.ErrNotExist) { - return nil, fmt.Errorf("checking trace event %s/%s: %w", group, symbol, err) - } - - // Create the [k,u]probe trace event using tracefs. - if err := createTraceFSProbeEvent(typ, group, symbol, path, offset, ret); err != nil { - return nil, fmt.Errorf("creating probe entry on tracefs: %w", err) - } - - // Get the newly-created trace event's id. - tid, err := getTraceEventID(group, symbol) - if err != nil { - return nil, fmt.Errorf("getting trace event id: %w", err) - } - - // Kprobes are ephemeral tracepoints and share the same perf event type. - fd, err := openTracepointPerfEvent(tid) - if err != nil { - return nil, err - } - - return &perfEvent{ - fd: fd, - group: group, - name: symbol, - tracefsID: tid, - typ: typ.PerfEventType(ret), - }, nil -} - -// createTraceFSProbeEvent creates a new ephemeral trace event by writing to -// /[k,u]probe_events. Returns os.ErrNotExist if symbol is not a valid -// kernel symbol, or if it is not traceable with kprobes. Returns os.ErrExist -// if a probe with the same group and symbol already exists. -func createTraceFSProbeEvent(typ probeType, group, symbol, path string, offset uint64, ret bool) error { - // Open the kprobe_events file in tracefs. - f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666) - if err != nil { - return fmt.Errorf("error opening '%s': %w", typ.EventsPath(), err) - } - defer f.Close() - - var pe string - switch typ { - case kprobeType: - // The kprobe_events syntax is as follows (see Documentation/trace/kprobetrace.txt): - // p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe - // r[MAXACTIVE][:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe - // -:[GRP/]EVENT : Clear a probe - // - // Some examples: - // r:ebpf_1234/r_my_kretprobe nf_conntrack_destroy - // p:ebpf_5678/p_my_kprobe __x64_sys_execve - // - // Leaving the kretprobe's MAXACTIVE set to 0 (or absent) will make the - // kernel default to NR_CPUS. This is desired in most eBPF cases since - // subsampling or rate limiting logic can be more accurately implemented in - // the eBPF program itself. - // See Documentation/kprobes.txt for more details. - pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(ret), group, symbol, symbol) - case uprobeType: - // The uprobe_events syntax is as follows: - // p[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a probe - // r[:[GRP/]EVENT] PATH:OFFSET [FETCHARGS] : Set a return probe - // -:[GRP/]EVENT : Clear a probe - // - // Some examples: - // r:ebpf_1234/readline /bin/bash:0x12345 - // p:ebpf_5678/main_mySymbol /bin/mybin:0x12345 - // - // See Documentation/trace/uprobetracer.txt for more details. - pathOffset := uprobePathOffset(path, offset) - pe = fmt.Sprintf("%s:%s/%s %s", probePrefix(ret), group, symbol, pathOffset) - } - _, err = f.WriteString(pe) - // Since commit 97c753e62e6c, ENOENT is correctly returned instead of EINVAL - // when trying to create a kretprobe for a missing symbol. Make sure ENOENT - // is returned to the caller. - if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.EINVAL) { - return fmt.Errorf("symbol %s not found: %w", symbol, os.ErrNotExist) - } - if err != nil { - return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err) - } - - return nil -} - -// closeTraceFSProbeEvent removes the [k,u]probe with the given type, group and symbol -// from /[k,u]probe_events. -func closeTraceFSProbeEvent(typ probeType, group, symbol string) error { - f, err := os.OpenFile(typ.EventsPath(), os.O_APPEND|os.O_WRONLY, 0666) - if err != nil { - return fmt.Errorf("error opening %s: %w", typ.EventsPath(), err) - } - defer f.Close() - - // See [k,u]probe_events syntax above. The probe type does not need to be specified - // for removals. - pe := fmt.Sprintf("-:%s/%s", group, symbol) - if _, err = f.WriteString(pe); err != nil { - return fmt.Errorf("writing '%s' to '%s': %w", pe, typ.EventsPath(), err) - } - - return nil -} - -// randomGroup generates a pseudorandom string for use as a tracefs group name. -// Returns an error when the output string would exceed 63 characters (kernel -// limitation), when rand.Read() fails or when prefix contains characters not -// allowed by rgxTraceEvent. -func randomGroup(prefix string) (string, error) { - if !rgxTraceEvent.MatchString(prefix) { - return "", fmt.Errorf("prefix '%s' must be alphanumeric or underscore: %w", prefix, errInvalidInput) - } - - b := make([]byte, 8) - if _, err := rand.Read(b); err != nil { - return "", fmt.Errorf("reading random bytes: %w", err) - } - - group := fmt.Sprintf("%s_%x", prefix, b) - if len(group) > 63 { - return "", fmt.Errorf("group name '%s' cannot be longer than 63 characters: %w", group, errInvalidInput) - } - - return group, nil -} - -func probePrefix(ret bool) string { - if ret { - return "r" - } - return "p" -} - -// determineRetprobeBit reads a Performance Monitoring Unit's retprobe bit -// from /sys/bus/event_source/devices//format/retprobe. -func determineRetprobeBit(typ probeType) (uint64, error) { - p := filepath.Join("/sys/bus/event_source/devices/", typ.String(), "/format/retprobe") - - data, err := ioutil.ReadFile(p) - if err != nil { - return 0, err - } - - var rp uint64 - n, err := fmt.Sscanf(string(bytes.TrimSpace(data)), "config:%d", &rp) - if err != nil { - return 0, fmt.Errorf("parse retprobe bit: %w", err) - } - if n != 1 { - return 0, fmt.Errorf("parse retprobe bit: expected 1 item, got %d", n) - } - - return rp, nil -} - -func kretprobeBit() (uint64, error) { - kprobeRetprobeBit.once.Do(func() { - kprobeRetprobeBit.value, kprobeRetprobeBit.err = determineRetprobeBit(kprobeType) - }) - return kprobeRetprobeBit.value, kprobeRetprobeBit.err -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/link/link.go b/src/runtime/vendor/github.com/cilium/ebpf/link/link.go deleted file mode 100644 index 16cfff415d..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/link/link.go +++ /dev/null @@ -1,229 +0,0 @@ -package link - -import ( - "fmt" - "unsafe" - - "github.com/cilium/ebpf" - "github.com/cilium/ebpf/internal" -) - -var ErrNotSupported = internal.ErrNotSupported - -// Link represents a Program attached to a BPF hook. -type Link interface { - // Replace the current program with a new program. - // - // Passing a nil program is an error. May return an error wrapping ErrNotSupported. - Update(*ebpf.Program) error - - // Persist a link by pinning it into a bpffs. - // - // May return an error wrapping ErrNotSupported. - Pin(string) error - - // Undo a previous call to Pin. - // - // May return an error wrapping ErrNotSupported. - Unpin() error - - // Close frees resources. - // - // The link will be broken unless it has been pinned. A link - // may continue past the lifetime of the process if Close is - // not called. - Close() error - - // Prevent external users from implementing this interface. - isLink() -} - -// ID uniquely identifies a BPF link. -type ID uint32 - -// RawLinkOptions control the creation of a raw link. -type RawLinkOptions struct { - // File descriptor to attach to. This differs for each attach type. - Target int - // Program to attach. - Program *ebpf.Program - // Attach must match the attach type of Program. - Attach ebpf.AttachType -} - -// RawLinkInfo contains metadata on a link. -type RawLinkInfo struct { - Type Type - ID ID - Program ebpf.ProgramID -} - -// RawLink is the low-level API to bpf_link. -// -// You should consider using the higher level interfaces in this -// package instead. -type RawLink struct { - fd *internal.FD - pinnedPath string -} - -// AttachRawLink creates a raw link. -func AttachRawLink(opts RawLinkOptions) (*RawLink, error) { - if err := haveBPFLink(); err != nil { - return nil, err - } - - if opts.Target < 0 { - return nil, fmt.Errorf("invalid target: %s", internal.ErrClosedFd) - } - - progFd := opts.Program.FD() - if progFd < 0 { - return nil, fmt.Errorf("invalid program: %s", internal.ErrClosedFd) - } - - attr := bpfLinkCreateAttr{ - targetFd: uint32(opts.Target), - progFd: uint32(progFd), - attachType: opts.Attach, - } - fd, err := bpfLinkCreate(&attr) - if err != nil { - return nil, fmt.Errorf("can't create link: %s", err) - } - - return &RawLink{fd, ""}, nil -} - -// LoadPinnedRawLink loads a persisted link from a bpffs. -// -// Returns an error if the pinned link type doesn't match linkType. Pass -// UnspecifiedType to disable this behaviour. -func LoadPinnedRawLink(fileName string, linkType Type, opts *ebpf.LoadPinOptions) (*RawLink, error) { - fd, err := internal.BPFObjGet(fileName, opts.Marshal()) - if err != nil { - return nil, fmt.Errorf("load pinned link: %w", err) - } - - link := &RawLink{fd, fileName} - if linkType == UnspecifiedType { - return link, nil - } - - info, err := link.Info() - if err != nil { - link.Close() - return nil, fmt.Errorf("get pinned link info: %s", err) - } - - if info.Type != linkType { - link.Close() - return nil, fmt.Errorf("link type %v doesn't match %v", info.Type, linkType) - } - - return link, nil -} - -func (l *RawLink) isLink() {} - -// FD returns the raw file descriptor. -func (l *RawLink) FD() int { - fd, err := l.fd.Value() - if err != nil { - return -1 - } - return int(fd) -} - -// Close breaks the link. -// -// Use Pin if you want to make the link persistent. -func (l *RawLink) Close() error { - return l.fd.Close() -} - -// Pin persists a link past the lifetime of the process. -// -// Calling Close on a pinned Link will not break the link -// until the pin is removed. -func (l *RawLink) Pin(fileName string) error { - if err := internal.Pin(l.pinnedPath, fileName, l.fd); err != nil { - return err - } - l.pinnedPath = fileName - return nil -} - -// Unpin implements the Link interface. -func (l *RawLink) Unpin() error { - if err := internal.Unpin(l.pinnedPath); err != nil { - return err - } - l.pinnedPath = "" - return nil -} - -// Update implements the Link interface. -func (l *RawLink) Update(new *ebpf.Program) error { - return l.UpdateArgs(RawLinkUpdateOptions{ - New: new, - }) -} - -// RawLinkUpdateOptions control the behaviour of RawLink.UpdateArgs. -type RawLinkUpdateOptions struct { - New *ebpf.Program - Old *ebpf.Program - Flags uint32 -} - -// UpdateArgs updates a link based on args. -func (l *RawLink) UpdateArgs(opts RawLinkUpdateOptions) error { - newFd := opts.New.FD() - if newFd < 0 { - return fmt.Errorf("invalid program: %s", internal.ErrClosedFd) - } - - var oldFd int - if opts.Old != nil { - oldFd = opts.Old.FD() - if oldFd < 0 { - return fmt.Errorf("invalid replacement program: %s", internal.ErrClosedFd) - } - } - - linkFd, err := l.fd.Value() - if err != nil { - return fmt.Errorf("can't update link: %s", err) - } - - attr := bpfLinkUpdateAttr{ - linkFd: linkFd, - newProgFd: uint32(newFd), - oldProgFd: uint32(oldFd), - flags: opts.Flags, - } - return bpfLinkUpdate(&attr) -} - -// struct bpf_link_info -type bpfLinkInfo struct { - typ uint32 - id uint32 - prog_id uint32 -} - -// Info returns metadata about the link. -func (l *RawLink) Info() (*RawLinkInfo, error) { - var info bpfLinkInfo - err := internal.BPFObjGetInfoByFD(l.fd, unsafe.Pointer(&info), unsafe.Sizeof(info)) - if err != nil { - return nil, fmt.Errorf("link info: %s", err) - } - - return &RawLinkInfo{ - Type(info.typ), - ID(info.id), - ebpf.ProgramID(info.prog_id), - }, nil -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/link/netns.go b/src/runtime/vendor/github.com/cilium/ebpf/link/netns.go deleted file mode 100644 index 37e5b84c4d..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/link/netns.go +++ /dev/null @@ -1,60 +0,0 @@ -package link - -import ( - "fmt" - - "github.com/cilium/ebpf" -) - -// NetNsInfo contains metadata about a network namespace link. -type NetNsInfo struct { - RawLinkInfo -} - -// NetNsLink is a program attached to a network namespace. -type NetNsLink struct { - *RawLink -} - -// AttachNetNs attaches a program to a network namespace. -func AttachNetNs(ns int, prog *ebpf.Program) (*NetNsLink, error) { - var attach ebpf.AttachType - switch t := prog.Type(); t { - case ebpf.FlowDissector: - attach = ebpf.AttachFlowDissector - case ebpf.SkLookup: - attach = ebpf.AttachSkLookup - default: - return nil, fmt.Errorf("can't attach %v to network namespace", t) - } - - link, err := AttachRawLink(RawLinkOptions{ - Target: ns, - Program: prog, - Attach: attach, - }) - if err != nil { - return nil, err - } - - return &NetNsLink{link}, nil -} - -// LoadPinnedNetNs loads a network namespace link from bpffs. -func LoadPinnedNetNs(fileName string, opts *ebpf.LoadPinOptions) (*NetNsLink, error) { - link, err := LoadPinnedRawLink(fileName, NetNsType, opts) - if err != nil { - return nil, err - } - - return &NetNsLink{link}, nil -} - -// Info returns information about the link. -func (nns *NetNsLink) Info() (*NetNsInfo, error) { - info, err := nns.RawLink.Info() - if err != nil { - return nil, err - } - return &NetNsInfo{*info}, nil -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/link/perf_event.go b/src/runtime/vendor/github.com/cilium/ebpf/link/perf_event.go deleted file mode 100644 index 5267a47ec9..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/link/perf_event.go +++ /dev/null @@ -1,273 +0,0 @@ -package link - -import ( - "bytes" - "errors" - "fmt" - "io/ioutil" - "os" - "path/filepath" - "regexp" - "runtime" - "strconv" - "strings" - "unsafe" - - "github.com/cilium/ebpf" - "github.com/cilium/ebpf/internal" - "github.com/cilium/ebpf/internal/unix" -) - -// Getting the terminology right is usually the hardest part. For posterity and -// for staying sane during implementation: -// -// - trace event: Representation of a kernel runtime hook. Filesystem entries -// under /events. Can be tracepoints (static), kprobes or uprobes. -// Can be instantiated into perf events (see below). -// - tracepoint: A predetermined hook point in the kernel. Exposed as trace -// events in (sub)directories under /events. Cannot be closed or -// removed, they are static. -// - k(ret)probe: Ephemeral trace events based on entry or exit points of -// exported kernel symbols. kprobe-based (tracefs) trace events can be -// created system-wide by writing to the /kprobe_events file, or -// they can be scoped to the current process by creating PMU perf events. -// - u(ret)probe: Ephemeral trace events based on user provides ELF binaries -// and offsets. uprobe-based (tracefs) trace events can be -// created system-wide by writing to the /uprobe_events file, or -// they can be scoped to the current process by creating PMU perf events. -// - perf event: An object instantiated based on an existing trace event or -// kernel symbol. Referred to by fd in userspace. -// Exactly one eBPF program can be attached to a perf event. Multiple perf -// events can be created from a single trace event. Closing a perf event -// stops any further invocations of the attached eBPF program. - -var ( - tracefsPath = "/sys/kernel/debug/tracing" - - // Trace event groups, names and kernel symbols must adhere to this set - // of characters. Non-empty, first character must not be a number, all - // characters must be alphanumeric or underscore. - rgxTraceEvent = regexp.MustCompile("^[a-zA-Z_][0-9a-zA-Z_]*$") - - errInvalidInput = errors.New("invalid input") -) - -const ( - perfAllThreads = -1 -) - -type perfEventType uint8 - -const ( - tracepointEvent perfEventType = iota - kprobeEvent - kretprobeEvent - uprobeEvent - uretprobeEvent -) - -// A perfEvent represents a perf event kernel object. Exactly one eBPF program -// can be attached to it. It is created based on a tracefs trace event or a -// Performance Monitoring Unit (PMU). -type perfEvent struct { - - // Group and name of the tracepoint/kprobe/uprobe. - group string - name string - - // PMU event ID read from sysfs. Valid IDs are non-zero. - pmuID uint64 - // ID of the trace event read from tracefs. Valid IDs are non-zero. - tracefsID uint64 - - // The event type determines the types of programs that can be attached. - typ perfEventType - - fd *internal.FD -} - -func (pe *perfEvent) isLink() {} - -func (pe *perfEvent) Pin(string) error { - return fmt.Errorf("pin perf event: %w", ErrNotSupported) -} - -func (pe *perfEvent) Unpin() error { - return fmt.Errorf("unpin perf event: %w", ErrNotSupported) -} - -// Since 4.15 (e87c6bc3852b "bpf: permit multiple bpf attachments for a single perf event"), -// calling PERF_EVENT_IOC_SET_BPF appends the given program to a prog_array -// owned by the perf event, which means multiple programs can be attached -// simultaneously. -// -// Before 4.15, calling PERF_EVENT_IOC_SET_BPF more than once on a perf event -// returns EEXIST. -// -// Detaching a program from a perf event is currently not possible, so a -// program replacement mechanism cannot be implemented for perf events. -func (pe *perfEvent) Update(prog *ebpf.Program) error { - return fmt.Errorf("can't replace eBPF program in perf event: %w", ErrNotSupported) -} - -func (pe *perfEvent) Close() error { - if pe.fd == nil { - return nil - } - - pfd, err := pe.fd.Value() - if err != nil { - return fmt.Errorf("getting perf event fd: %w", err) - } - - err = unix.IoctlSetInt(int(pfd), unix.PERF_EVENT_IOC_DISABLE, 0) - if err != nil { - return fmt.Errorf("disabling perf event: %w", err) - } - - err = pe.fd.Close() - if err != nil { - return fmt.Errorf("closing perf event fd: %w", err) - } - - switch pe.typ { - case kprobeEvent, kretprobeEvent: - // Clean up kprobe tracefs entry. - if pe.tracefsID != 0 { - return closeTraceFSProbeEvent(kprobeType, pe.group, pe.name) - } - case uprobeEvent, uretprobeEvent: - // Clean up uprobe tracefs entry. - if pe.tracefsID != 0 { - return closeTraceFSProbeEvent(uprobeType, pe.group, pe.name) - } - case tracepointEvent: - // Tracepoint trace events don't hold any extra resources. - return nil - } - - return nil -} - -// attach the given eBPF prog to the perf event stored in pe. -// pe must contain a valid perf event fd. -// prog's type must match the program type stored in pe. -func (pe *perfEvent) attach(prog *ebpf.Program) error { - if prog == nil { - return errors.New("cannot attach a nil program") - } - if pe.fd == nil { - return errors.New("cannot attach to nil perf event") - } - if prog.FD() < 0 { - return fmt.Errorf("invalid program: %w", internal.ErrClosedFd) - } - switch pe.typ { - case kprobeEvent, kretprobeEvent, uprobeEvent, uretprobeEvent: - if t := prog.Type(); t != ebpf.Kprobe { - return fmt.Errorf("invalid program type (expected %s): %s", ebpf.Kprobe, t) - } - case tracepointEvent: - if t := prog.Type(); t != ebpf.TracePoint { - return fmt.Errorf("invalid program type (expected %s): %s", ebpf.TracePoint, t) - } - default: - return fmt.Errorf("unknown perf event type: %d", pe.typ) - } - - // The ioctl below will fail when the fd is invalid. - kfd, _ := pe.fd.Value() - - // Assign the eBPF program to the perf event. - err := unix.IoctlSetInt(int(kfd), unix.PERF_EVENT_IOC_SET_BPF, prog.FD()) - if err != nil { - return fmt.Errorf("setting perf event bpf program: %w", err) - } - - // PERF_EVENT_IOC_ENABLE and _DISABLE ignore their given values. - if err := unix.IoctlSetInt(int(kfd), unix.PERF_EVENT_IOC_ENABLE, 0); err != nil { - return fmt.Errorf("enable perf event: %s", err) - } - - // Close the perf event when its reference is lost to avoid leaking system resources. - runtime.SetFinalizer(pe, (*perfEvent).Close) - return nil -} - -// unsafeStringPtr returns an unsafe.Pointer to a NUL-terminated copy of str. -func unsafeStringPtr(str string) (unsafe.Pointer, error) { - p, err := unix.BytePtrFromString(str) - if err != nil { - return nil, err - } - return unsafe.Pointer(p), nil -} - -// getTraceEventID reads a trace event's ID from tracefs given its group and name. -// group and name must be alphanumeric or underscore, as required by the kernel. -func getTraceEventID(group, name string) (uint64, error) { - tid, err := uint64FromFile(tracefsPath, "events", group, name, "id") - if errors.Is(err, os.ErrNotExist) { - return 0, fmt.Errorf("trace event %s/%s: %w", group, name, os.ErrNotExist) - } - if err != nil { - return 0, fmt.Errorf("reading trace event ID of %s/%s: %w", group, name, err) - } - - return tid, nil -} - -// getPMUEventType reads a Performance Monitoring Unit's type (numeric identifier) -// from /sys/bus/event_source/devices//type. -// -// Returns ErrNotSupported if the pmu type is not supported. -func getPMUEventType(typ probeType) (uint64, error) { - et, err := uint64FromFile("/sys/bus/event_source/devices", typ.String(), "type") - if errors.Is(err, os.ErrNotExist) { - return 0, fmt.Errorf("pmu type %s: %w", typ, ErrNotSupported) - } - if err != nil { - return 0, fmt.Errorf("reading pmu type %s: %w", typ, err) - } - - return et, nil -} - -// openTracepointPerfEvent opens a tracepoint-type perf event. System-wide -// [k,u]probes created by writing to /[k,u]probe_events are tracepoints -// behind the scenes, and can be attached to using these perf events. -func openTracepointPerfEvent(tid uint64) (*internal.FD, error) { - attr := unix.PerfEventAttr{ - Type: unix.PERF_TYPE_TRACEPOINT, - Config: tid, - Sample_type: unix.PERF_SAMPLE_RAW, - Sample: 1, - Wakeup: 1, - } - - fd, err := unix.PerfEventOpen(&attr, perfAllThreads, 0, -1, unix.PERF_FLAG_FD_CLOEXEC) - if err != nil { - return nil, fmt.Errorf("opening tracepoint perf event: %w", err) - } - - return internal.NewFD(uint32(fd)), nil -} - -// uint64FromFile reads a uint64 from a file. All elements of path are sanitized -// and joined onto base. Returns error if base no longer prefixes the path after -// joining all components. -func uint64FromFile(base string, path ...string) (uint64, error) { - l := filepath.Join(path...) - p := filepath.Join(base, l) - if !strings.HasPrefix(p, base) { - return 0, fmt.Errorf("path '%s' attempts to escape base path '%s': %w", l, base, errInvalidInput) - } - - data, err := ioutil.ReadFile(p) - if err != nil { - return 0, fmt.Errorf("reading file %s: %w", p, err) - } - - et := bytes.TrimSpace(data) - return strconv.ParseUint(string(et), 10, 64) -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/link/platform.go b/src/runtime/vendor/github.com/cilium/ebpf/link/platform.go deleted file mode 100644 index eb6f7b7a37..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/link/platform.go +++ /dev/null @@ -1,25 +0,0 @@ -package link - -import ( - "fmt" - "runtime" -) - -func platformPrefix(symbol string) string { - - prefix := runtime.GOARCH - - // per https://github.com/golang/go/blob/master/src/go/build/syslist.go - switch prefix { - case "386": - prefix = "ia32" - case "amd64", "amd64p32": - prefix = "x64" - case "arm64", "arm64be": - prefix = "arm64" - default: - return symbol - } - - return fmt.Sprintf("__%s_%s", prefix, symbol) -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/link/program.go b/src/runtime/vendor/github.com/cilium/ebpf/link/program.go deleted file mode 100644 index b90c457467..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/link/program.go +++ /dev/null @@ -1,76 +0,0 @@ -package link - -import ( - "fmt" - - "github.com/cilium/ebpf" - "github.com/cilium/ebpf/internal" -) - -type RawAttachProgramOptions struct { - // File descriptor to attach to. This differs for each attach type. - Target int - // Program to attach. - Program *ebpf.Program - // Program to replace (cgroups). - Replace *ebpf.Program - // Attach must match the attach type of Program (and Replace). - Attach ebpf.AttachType - // Flags control the attach behaviour. This differs for each attach type. - Flags uint32 -} - -// RawAttachProgram is a low level wrapper around BPF_PROG_ATTACH. -// -// You should use one of the higher level abstractions available in this -// package if possible. -func RawAttachProgram(opts RawAttachProgramOptions) error { - if err := haveProgAttach(); err != nil { - return err - } - - var replaceFd uint32 - if opts.Replace != nil { - replaceFd = uint32(opts.Replace.FD()) - } - - attr := internal.BPFProgAttachAttr{ - TargetFd: uint32(opts.Target), - AttachBpfFd: uint32(opts.Program.FD()), - ReplaceBpfFd: replaceFd, - AttachType: uint32(opts.Attach), - AttachFlags: uint32(opts.Flags), - } - - if err := internal.BPFProgAttach(&attr); err != nil { - return fmt.Errorf("can't attach program: %w", err) - } - return nil -} - -type RawDetachProgramOptions struct { - Target int - Program *ebpf.Program - Attach ebpf.AttachType -} - -// RawDetachProgram is a low level wrapper around BPF_PROG_DETACH. -// -// You should use one of the higher level abstractions available in this -// package if possible. -func RawDetachProgram(opts RawDetachProgramOptions) error { - if err := haveProgAttach(); err != nil { - return err - } - - attr := internal.BPFProgDetachAttr{ - TargetFd: uint32(opts.Target), - AttachBpfFd: uint32(opts.Program.FD()), - AttachType: uint32(opts.Attach), - } - if err := internal.BPFProgDetach(&attr); err != nil { - return fmt.Errorf("can't detach program: %w", err) - } - - return nil -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/link/raw_tracepoint.go b/src/runtime/vendor/github.com/cilium/ebpf/link/raw_tracepoint.go deleted file mode 100644 index f4beb1e078..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/link/raw_tracepoint.go +++ /dev/null @@ -1,61 +0,0 @@ -package link - -import ( - "fmt" - - "github.com/cilium/ebpf" - "github.com/cilium/ebpf/internal" -) - -type RawTracepointOptions struct { - // Tracepoint name. - Name string - // Program must be of type RawTracepoint* - Program *ebpf.Program -} - -// AttachRawTracepoint links a BPF program to a raw_tracepoint. -// -// Requires at least Linux 4.17. -func AttachRawTracepoint(opts RawTracepointOptions) (Link, error) { - if t := opts.Program.Type(); t != ebpf.RawTracepoint && t != ebpf.RawTracepointWritable { - return nil, fmt.Errorf("invalid program type %s, expected RawTracepoint(Writable)", t) - } - if opts.Program.FD() < 0 { - return nil, fmt.Errorf("invalid program: %w", internal.ErrClosedFd) - } - - fd, err := bpfRawTracepointOpen(&bpfRawTracepointOpenAttr{ - name: internal.NewStringPointer(opts.Name), - fd: uint32(opts.Program.FD()), - }) - if err != nil { - return nil, err - } - - return &progAttachRawTracepoint{fd: fd}, nil -} - -type progAttachRawTracepoint struct { - fd *internal.FD -} - -var _ Link = (*progAttachRawTracepoint)(nil) - -func (rt *progAttachRawTracepoint) isLink() {} - -func (rt *progAttachRawTracepoint) Close() error { - return rt.fd.Close() -} - -func (rt *progAttachRawTracepoint) Update(_ *ebpf.Program) error { - return fmt.Errorf("can't update raw_tracepoint: %w", ErrNotSupported) -} - -func (rt *progAttachRawTracepoint) Pin(_ string) error { - return fmt.Errorf("can't pin raw_tracepoint: %w", ErrNotSupported) -} - -func (rt *progAttachRawTracepoint) Unpin() error { - return fmt.Errorf("unpin raw_tracepoint: %w", ErrNotSupported) -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/link/syscalls.go b/src/runtime/vendor/github.com/cilium/ebpf/link/syscalls.go deleted file mode 100644 index 30e8a88050..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/link/syscalls.go +++ /dev/null @@ -1,190 +0,0 @@ -package link - -import ( - "errors" - "unsafe" - - "github.com/cilium/ebpf" - "github.com/cilium/ebpf/asm" - "github.com/cilium/ebpf/internal" - "github.com/cilium/ebpf/internal/unix" -) - -// Type is the kind of link. -type Type uint32 - -// Valid link types. -// -// Equivalent to enum bpf_link_type. -const ( - UnspecifiedType Type = iota - RawTracepointType - TracingType - CgroupType - IterType - NetNsType - XDPType -) - -var haveProgAttach = internal.FeatureTest("BPF_PROG_ATTACH", "4.10", func() error { - prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ - Type: ebpf.CGroupSKB, - AttachType: ebpf.AttachCGroupInetIngress, - License: "MIT", - Instructions: asm.Instructions{ - asm.Mov.Imm(asm.R0, 0), - asm.Return(), - }, - }) - if err != nil { - return internal.ErrNotSupported - } - - // BPF_PROG_ATTACH was introduced at the same time as CGgroupSKB, - // so being able to load the program is enough to infer that we - // have the syscall. - prog.Close() - return nil -}) - -var haveProgAttachReplace = internal.FeatureTest("BPF_PROG_ATTACH atomic replacement", "5.5", func() error { - if err := haveProgAttach(); err != nil { - return err - } - - prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ - Type: ebpf.CGroupSKB, - AttachType: ebpf.AttachCGroupInetIngress, - License: "MIT", - Instructions: asm.Instructions{ - asm.Mov.Imm(asm.R0, 0), - asm.Return(), - }, - }) - if err != nil { - return internal.ErrNotSupported - } - defer prog.Close() - - // We know that we have BPF_PROG_ATTACH since we can load CGroupSKB programs. - // If passing BPF_F_REPLACE gives us EINVAL we know that the feature isn't - // present. - attr := internal.BPFProgAttachAttr{ - // We rely on this being checked after attachFlags. - TargetFd: ^uint32(0), - AttachBpfFd: uint32(prog.FD()), - AttachType: uint32(ebpf.AttachCGroupInetIngress), - AttachFlags: uint32(flagReplace), - } - - err = internal.BPFProgAttach(&attr) - if errors.Is(err, unix.EINVAL) { - return internal.ErrNotSupported - } - if errors.Is(err, unix.EBADF) { - return nil - } - return err -}) - -type bpfLinkCreateAttr struct { - progFd uint32 - targetFd uint32 - attachType ebpf.AttachType - flags uint32 -} - -func bpfLinkCreate(attr *bpfLinkCreateAttr) (*internal.FD, error) { - ptr, err := internal.BPF(internal.BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) - if err != nil { - return nil, err - } - return internal.NewFD(uint32(ptr)), nil -} - -type bpfLinkCreateIterAttr struct { - prog_fd uint32 - target_fd uint32 - attach_type ebpf.AttachType - flags uint32 - iter_info internal.Pointer - iter_info_len uint32 -} - -func bpfLinkCreateIter(attr *bpfLinkCreateIterAttr) (*internal.FD, error) { - ptr, err := internal.BPF(internal.BPF_LINK_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) - if err != nil { - return nil, err - } - return internal.NewFD(uint32(ptr)), nil -} - -type bpfLinkUpdateAttr struct { - linkFd uint32 - newProgFd uint32 - flags uint32 - oldProgFd uint32 -} - -func bpfLinkUpdate(attr *bpfLinkUpdateAttr) error { - _, err := internal.BPF(internal.BPF_LINK_UPDATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) - return err -} - -var haveBPFLink = internal.FeatureTest("bpf_link", "5.7", func() error { - prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ - Type: ebpf.CGroupSKB, - AttachType: ebpf.AttachCGroupInetIngress, - License: "MIT", - Instructions: asm.Instructions{ - asm.Mov.Imm(asm.R0, 0), - asm.Return(), - }, - }) - if err != nil { - return internal.ErrNotSupported - } - defer prog.Close() - - attr := bpfLinkCreateAttr{ - // This is a hopefully invalid file descriptor, which triggers EBADF. - targetFd: ^uint32(0), - progFd: uint32(prog.FD()), - attachType: ebpf.AttachCGroupInetIngress, - } - _, err = bpfLinkCreate(&attr) - if errors.Is(err, unix.EINVAL) { - return internal.ErrNotSupported - } - if errors.Is(err, unix.EBADF) { - return nil - } - return err -}) - -type bpfIterCreateAttr struct { - linkFd uint32 - flags uint32 -} - -func bpfIterCreate(attr *bpfIterCreateAttr) (*internal.FD, error) { - ptr, err := internal.BPF(internal.BPF_ITER_CREATE, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) - if err == nil { - return internal.NewFD(uint32(ptr)), nil - } - return nil, err -} - -type bpfRawTracepointOpenAttr struct { - name internal.Pointer - fd uint32 - _ uint32 -} - -func bpfRawTracepointOpen(attr *bpfRawTracepointOpenAttr) (*internal.FD, error) { - ptr, err := internal.BPF(internal.BPF_RAW_TRACEPOINT_OPEN, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) - if err == nil { - return internal.NewFD(uint32(ptr)), nil - } - return nil, err -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/link/tracepoint.go b/src/runtime/vendor/github.com/cilium/ebpf/link/tracepoint.go deleted file mode 100644 index b8ae04bf0a..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/link/tracepoint.go +++ /dev/null @@ -1,56 +0,0 @@ -package link - -import ( - "fmt" - - "github.com/cilium/ebpf" -) - -// Tracepoint attaches the given eBPF program to the tracepoint with the given -// group and name. See /sys/kernel/debug/tracing/events to find available -// tracepoints. The top-level directory is the group, the event's subdirectory -// is the name. Example: -// -// Tracepoint("syscalls", "sys_enter_fork", prog) -// -// Note that attaching eBPF programs to syscalls (sys_enter_*/sys_exit_*) is -// only possible as of kernel 4.14 (commit cf5f5ce). -func Tracepoint(group, name string, prog *ebpf.Program) (Link, error) { - if group == "" || name == "" { - return nil, fmt.Errorf("group and name cannot be empty: %w", errInvalidInput) - } - if prog == nil { - return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput) - } - if !rgxTraceEvent.MatchString(group) || !rgxTraceEvent.MatchString(name) { - return nil, fmt.Errorf("group and name '%s/%s' must be alphanumeric or underscore: %w", group, name, errInvalidInput) - } - if prog.Type() != ebpf.TracePoint { - return nil, fmt.Errorf("eBPF program type %s is not a Tracepoint: %w", prog.Type(), errInvalidInput) - } - - tid, err := getTraceEventID(group, name) - if err != nil { - return nil, err - } - - fd, err := openTracepointPerfEvent(tid) - if err != nil { - return nil, err - } - - pe := &perfEvent{ - fd: fd, - tracefsID: tid, - group: group, - name: name, - typ: tracepointEvent, - } - - if err := pe.attach(prog); err != nil { - pe.Close() - return nil, err - } - - return pe, nil -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/link/uprobe.go b/src/runtime/vendor/github.com/cilium/ebpf/link/uprobe.go deleted file mode 100644 index 2bc395ee3c..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/link/uprobe.go +++ /dev/null @@ -1,237 +0,0 @@ -package link - -import ( - "debug/elf" - "errors" - "fmt" - "os" - "path/filepath" - "regexp" - "sync" - - "github.com/cilium/ebpf" - "github.com/cilium/ebpf/internal" -) - -var ( - uprobeEventsPath = filepath.Join(tracefsPath, "uprobe_events") - - // rgxUprobeSymbol is used to strip invalid characters from the uprobe symbol - // as they are not allowed to be used as the EVENT token in tracefs. - rgxUprobeSymbol = regexp.MustCompile("[^a-zA-Z0-9]+") - - uprobeRetprobeBit = struct { - once sync.Once - value uint64 - err error - }{} -) - -// Executable defines an executable program on the filesystem. -type Executable struct { - // Path of the executable on the filesystem. - path string - // Parsed ELF symbols and dynamic symbols. - symbols map[string]elf.Symbol -} - -// UprobeOptions defines additional parameters that will be used -// when loading Uprobes. -type UprobeOptions struct { - // Symbol offset. Must be provided in case of external symbols (shared libs). - // If set, overrides the offset eventually parsed from the executable. - Offset uint64 -} - -// To open a new Executable, use: -// -// OpenExecutable("/bin/bash") -// -// The returned value can then be used to open Uprobe(s). -func OpenExecutable(path string) (*Executable, error) { - if path == "" { - return nil, fmt.Errorf("path cannot be empty") - } - - f, err := os.Open(path) - if err != nil { - return nil, fmt.Errorf("open file '%s': %w", path, err) - } - defer f.Close() - - se, err := internal.NewSafeELFFile(f) - if err != nil { - return nil, fmt.Errorf("parse ELF file: %w", err) - } - - var ex = Executable{ - path: path, - symbols: make(map[string]elf.Symbol), - } - if err := ex.addSymbols(se.Symbols); err != nil { - return nil, err - } - - if err := ex.addSymbols(se.DynamicSymbols); err != nil { - return nil, err - } - - return &ex, nil -} - -func (ex *Executable) addSymbols(f func() ([]elf.Symbol, error)) error { - // elf.Symbols and elf.DynamicSymbols return ErrNoSymbols if the section is not found. - syms, err := f() - if err != nil && !errors.Is(err, elf.ErrNoSymbols) { - return err - } - for _, s := range syms { - if elf.ST_TYPE(s.Info) != elf.STT_FUNC { - // Symbol not associated with a function or other executable code. - continue - } - ex.symbols[s.Name] = s - } - return nil -} - -func (ex *Executable) symbol(symbol string) (*elf.Symbol, error) { - if s, ok := ex.symbols[symbol]; ok { - return &s, nil - } - return nil, fmt.Errorf("symbol %s not found", symbol) -} - -// Uprobe attaches the given eBPF program to a perf event that fires when the -// given symbol starts executing in the given Executable. -// For example, /bin/bash::main(): -// -// ex, _ = OpenExecutable("/bin/bash") -// ex.Uprobe("main", prog, nil) -// -// When using symbols which belongs to shared libraries, -// an offset must be provided via options: -// -// ex.Uprobe("main", prog, &UprobeOptions{Offset: 0x123}) -// -// The resulting Link must be Closed during program shutdown to avoid leaking -// system resources. Functions provided by shared libraries can currently not -// be traced and will result in an ErrNotSupported. -func (ex *Executable) Uprobe(symbol string, prog *ebpf.Program, opts *UprobeOptions) (Link, error) { - u, err := ex.uprobe(symbol, prog, opts, false) - if err != nil { - return nil, err - } - - err = u.attach(prog) - if err != nil { - u.Close() - return nil, err - } - - return u, nil -} - -// Uretprobe attaches the given eBPF program to a perf event that fires right -// before the given symbol exits. For example, /bin/bash::main(): -// -// ex, _ = OpenExecutable("/bin/bash") -// ex.Uretprobe("main", prog, nil) -// -// When using symbols which belongs to shared libraries, -// an offset must be provided via options: -// -// ex.Uretprobe("main", prog, &UprobeOptions{Offset: 0x123}) -// -// The resulting Link must be Closed during program shutdown to avoid leaking -// system resources. Functions provided by shared libraries can currently not -// be traced and will result in an ErrNotSupported. -func (ex *Executable) Uretprobe(symbol string, prog *ebpf.Program, opts *UprobeOptions) (Link, error) { - u, err := ex.uprobe(symbol, prog, opts, true) - if err != nil { - return nil, err - } - - err = u.attach(prog) - if err != nil { - u.Close() - return nil, err - } - - return u, nil -} - -// uprobe opens a perf event for the given binary/symbol and attaches prog to it. -// If ret is true, create a uretprobe. -func (ex *Executable) uprobe(symbol string, prog *ebpf.Program, opts *UprobeOptions, ret bool) (*perfEvent, error) { - if prog == nil { - return nil, fmt.Errorf("prog cannot be nil: %w", errInvalidInput) - } - if prog.Type() != ebpf.Kprobe { - return nil, fmt.Errorf("eBPF program type %s is not Kprobe: %w", prog.Type(), errInvalidInput) - } - - var offset uint64 - if opts != nil && opts.Offset != 0 { - offset = opts.Offset - } else { - sym, err := ex.symbol(symbol) - if err != nil { - return nil, fmt.Errorf("symbol '%s' not found: %w", symbol, err) - } - - // Symbols with location 0 from section undef are shared library calls and - // are relocated before the binary is executed. Dynamic linking is not - // implemented by the library, so mark this as unsupported for now. - if sym.Section == elf.SHN_UNDEF && sym.Value == 0 { - return nil, fmt.Errorf("cannot resolve %s library call '%s', "+ - "consider providing the offset via options: %w", ex.path, symbol, ErrNotSupported) - } - - offset = sym.Value - } - - // Use uprobe PMU if the kernel has it available. - tp, err := pmuUprobe(symbol, ex.path, offset, ret) - if err == nil { - return tp, nil - } - if err != nil && !errors.Is(err, ErrNotSupported) { - return nil, fmt.Errorf("creating perf_uprobe PMU: %w", err) - } - - // Use tracefs if uprobe PMU is missing. - tp, err = tracefsUprobe(uprobeSanitizedSymbol(symbol), ex.path, offset, ret) - if err != nil { - return nil, fmt.Errorf("creating trace event '%s:%s' in tracefs: %w", ex.path, symbol, err) - } - - return tp, nil -} - -// pmuUprobe opens a perf event based on the uprobe PMU. -func pmuUprobe(symbol, path string, offset uint64, ret bool) (*perfEvent, error) { - return pmuProbe(uprobeType, symbol, path, offset, ret) -} - -// tracefsUprobe creates a Uprobe tracefs entry. -func tracefsUprobe(symbol, path string, offset uint64, ret bool) (*perfEvent, error) { - return tracefsProbe(uprobeType, symbol, path, offset, ret) -} - -// uprobeSanitizedSymbol replaces every invalid characted for the tracefs api with an underscore. -func uprobeSanitizedSymbol(symbol string) string { - return rgxUprobeSymbol.ReplaceAllString(symbol, "_") -} - -// uprobePathOffset creates the PATH:OFFSET token for the tracefs api. -func uprobePathOffset(path string, offset uint64) string { - return fmt.Sprintf("%s:%#x", path, offset) -} - -func uretprobeBit() (uint64, error) { - uprobeRetprobeBit.once.Do(func() { - uprobeRetprobeBit.value, uprobeRetprobeBit.err = determineRetprobeBit(uprobeType) - }) - return uprobeRetprobeBit.value, uprobeRetprobeBit.err -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/linker.go b/src/runtime/vendor/github.com/cilium/ebpf/linker.go deleted file mode 100644 index 6c2efef9e4..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/linker.go +++ /dev/null @@ -1,140 +0,0 @@ -package ebpf - -import ( - "fmt" - - "github.com/cilium/ebpf/asm" - "github.com/cilium/ebpf/internal/btf" -) - -// link resolves bpf-to-bpf calls. -// -// Each library may contain multiple functions / labels, and is only linked -// if prog references one of these functions. -// -// Libraries also linked. -func link(prog *ProgramSpec, libs []*ProgramSpec) error { - var ( - linked = make(map[*ProgramSpec]bool) - pending = []asm.Instructions{prog.Instructions} - insns asm.Instructions - ) - for len(pending) > 0 { - insns, pending = pending[0], pending[1:] - for _, lib := range libs { - if linked[lib] { - continue - } - - needed, err := needSection(insns, lib.Instructions) - if err != nil { - return fmt.Errorf("linking %s: %w", lib.Name, err) - } - - if !needed { - continue - } - - linked[lib] = true - prog.Instructions = append(prog.Instructions, lib.Instructions...) - pending = append(pending, lib.Instructions) - - if prog.BTF != nil && lib.BTF != nil { - if err := btf.ProgramAppend(prog.BTF, lib.BTF); err != nil { - return fmt.Errorf("linking BTF of %s: %w", lib.Name, err) - } - } - } - } - - return nil -} - -func needSection(insns, section asm.Instructions) (bool, error) { - // A map of symbols to the libraries which contain them. - symbols, err := section.SymbolOffsets() - if err != nil { - return false, err - } - - for _, ins := range insns { - if ins.Reference == "" { - continue - } - - if ins.OpCode.JumpOp() != asm.Call || ins.Src != asm.PseudoCall { - continue - } - - if ins.Constant != -1 { - // This is already a valid call, no need to link again. - continue - } - - if _, ok := symbols[ins.Reference]; !ok { - // Symbol isn't available in this section - continue - } - - // At this point we know that at least one function in the - // library is called from insns, so we have to link it. - return true, nil - } - - // None of the functions in the section are called. - return false, nil -} - -func fixupJumpsAndCalls(insns asm.Instructions) error { - symbolOffsets := make(map[string]asm.RawInstructionOffset) - iter := insns.Iterate() - for iter.Next() { - ins := iter.Ins - - if ins.Symbol == "" { - continue - } - - if _, ok := symbolOffsets[ins.Symbol]; ok { - return fmt.Errorf("duplicate symbol %s", ins.Symbol) - } - - symbolOffsets[ins.Symbol] = iter.Offset - } - - iter = insns.Iterate() - for iter.Next() { - i := iter.Index - offset := iter.Offset - ins := iter.Ins - - if ins.Reference == "" { - continue - } - - switch { - case ins.IsFunctionCall() && ins.Constant == -1: - // Rewrite bpf to bpf call - callOffset, ok := symbolOffsets[ins.Reference] - if !ok { - return fmt.Errorf("call at %d: reference to missing symbol %q", i, ins.Reference) - } - - ins.Constant = int64(callOffset - offset - 1) - - case ins.OpCode.Class() == asm.JumpClass && ins.Offset == -1: - // Rewrite jump to label - jumpOffset, ok := symbolOffsets[ins.Reference] - if !ok { - return fmt.Errorf("jump at %d: reference to missing symbol %q", i, ins.Reference) - } - - ins.Offset = int16(jumpOffset - offset - 1) - - case ins.IsLoadFromMap() && ins.MapPtr() == -1: - return fmt.Errorf("map %s: %w", ins.Reference, errUnsatisfiedReference) - } - } - - return nil -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/map.go b/src/runtime/vendor/github.com/cilium/ebpf/map.go deleted file mode 100644 index f257d88c03..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/map.go +++ /dev/null @@ -1,1232 +0,0 @@ -package ebpf - -import ( - "errors" - "fmt" - "io" - "path/filepath" - "reflect" - "strings" - - "github.com/cilium/ebpf/internal" - "github.com/cilium/ebpf/internal/btf" - "github.com/cilium/ebpf/internal/unix" -) - -// Errors returned by Map and MapIterator methods. -var ( - ErrKeyNotExist = errors.New("key does not exist") - ErrKeyExist = errors.New("key already exists") - ErrIterationAborted = errors.New("iteration aborted") - ErrMapIncompatible = errors.New("map's spec is incompatible with pinned map") -) - -// MapOptions control loading a map into the kernel. -type MapOptions struct { - // The base path to pin maps in if requested via PinByName. - // Existing maps will be re-used if they are compatible, otherwise an - // error is returned. - PinPath string - LoadPinOptions LoadPinOptions -} - -// MapID represents the unique ID of an eBPF map -type MapID uint32 - -// MapSpec defines a Map. -type MapSpec struct { - // Name is passed to the kernel as a debug aid. Must only contain - // alpha numeric and '_' characters. - Name string - Type MapType - KeySize uint32 - ValueSize uint32 - MaxEntries uint32 - - // Flags is passed to the kernel and specifies additional map - // creation attributes. - Flags uint32 - - // Automatically pin and load a map from MapOptions.PinPath. - // Generates an error if an existing pinned map is incompatible with the MapSpec. - Pinning PinType - - // Specify numa node during map creation - // (effective only if unix.BPF_F_NUMA_NODE flag is set, - // which can be imported from golang.org/x/sys/unix) - NumaNode uint32 - - // The initial contents of the map. May be nil. - Contents []MapKV - - // Whether to freeze a map after setting its initial contents. - Freeze bool - - // InnerMap is used as a template for ArrayOfMaps and HashOfMaps - InnerMap *MapSpec - - // The BTF associated with this map. - BTF *btf.Map -} - -func (ms *MapSpec) String() string { - return fmt.Sprintf("%s(keySize=%d, valueSize=%d, maxEntries=%d, flags=%d)", ms.Type, ms.KeySize, ms.ValueSize, ms.MaxEntries, ms.Flags) -} - -// Copy returns a copy of the spec. -// -// MapSpec.Contents is a shallow copy. -func (ms *MapSpec) Copy() *MapSpec { - if ms == nil { - return nil - } - - cpy := *ms - cpy.Contents = make([]MapKV, len(ms.Contents)) - copy(cpy.Contents, ms.Contents) - cpy.InnerMap = ms.InnerMap.Copy() - return &cpy -} - -func (ms *MapSpec) clampPerfEventArraySize() error { - if ms.Type != PerfEventArray { - return nil - } - - n, err := internal.PossibleCPUs() - if err != nil { - return fmt.Errorf("perf event array: %w", err) - } - - if n := uint32(n); ms.MaxEntries > n { - ms.MaxEntries = n - } - - return nil -} - -// MapKV is used to initialize the contents of a Map. -type MapKV struct { - Key interface{} - Value interface{} -} - -func (ms *MapSpec) checkCompatibility(m *Map) error { - switch { - case m.typ != ms.Type: - return fmt.Errorf("expected type %v, got %v: %w", ms.Type, m.typ, ErrMapIncompatible) - - case m.keySize != ms.KeySize: - return fmt.Errorf("expected key size %v, got %v: %w", ms.KeySize, m.keySize, ErrMapIncompatible) - - case m.valueSize != ms.ValueSize: - return fmt.Errorf("expected value size %v, got %v: %w", ms.ValueSize, m.valueSize, ErrMapIncompatible) - - case m.maxEntries != ms.MaxEntries: - return fmt.Errorf("expected max entries %v, got %v: %w", ms.MaxEntries, m.maxEntries, ErrMapIncompatible) - - case m.flags != ms.Flags: - return fmt.Errorf("expected flags %v, got %v: %w", ms.Flags, m.flags, ErrMapIncompatible) - } - return nil -} - -// Map represents a Map file descriptor. -// -// It is not safe to close a map which is used by other goroutines. -// -// Methods which take interface{} arguments by default encode -// them using binary.Read/Write in the machine's native endianness. -// -// Implement encoding.BinaryMarshaler or encoding.BinaryUnmarshaler -// if you require custom encoding. -type Map struct { - name string - fd *internal.FD - typ MapType - keySize uint32 - valueSize uint32 - maxEntries uint32 - flags uint32 - pinnedPath string - // Per CPU maps return values larger than the size in the spec - fullValueSize int -} - -// NewMapFromFD creates a map from a raw fd. -// -// You should not use fd after calling this function. -func NewMapFromFD(fd int) (*Map, error) { - if fd < 0 { - return nil, errors.New("invalid fd") - } - - return newMapFromFD(internal.NewFD(uint32(fd))) -} - -func newMapFromFD(fd *internal.FD) (*Map, error) { - info, err := newMapInfoFromFd(fd) - if err != nil { - fd.Close() - return nil, fmt.Errorf("get map info: %s", err) - } - - return newMap(fd, info.Name, info.Type, info.KeySize, info.ValueSize, info.MaxEntries, info.Flags) -} - -// NewMap creates a new Map. -// -// It's equivalent to calling NewMapWithOptions with default options. -func NewMap(spec *MapSpec) (*Map, error) { - return NewMapWithOptions(spec, MapOptions{}) -} - -// NewMapWithOptions creates a new Map. -// -// Creating a map for the first time will perform feature detection -// by creating small, temporary maps. -// -// The caller is responsible for ensuring the process' rlimit is set -// sufficiently high for locking memory during map creation. This can be done -// by calling unix.Setrlimit with unix.RLIMIT_MEMLOCK prior to calling NewMapWithOptions. -// -// May return an error wrapping ErrMapIncompatible. -func NewMapWithOptions(spec *MapSpec, opts MapOptions) (*Map, error) { - handles := newHandleCache() - defer handles.close() - - return newMapWithOptions(spec, opts, handles) -} - -func newMapWithOptions(spec *MapSpec, opts MapOptions, handles *handleCache) (_ *Map, err error) { - closeOnError := func(c io.Closer) { - if err != nil { - c.Close() - } - } - - switch spec.Pinning { - case PinByName: - if spec.Name == "" || opts.PinPath == "" { - return nil, fmt.Errorf("pin by name: missing Name or PinPath") - } - - path := filepath.Join(opts.PinPath, spec.Name) - m, err := LoadPinnedMap(path, &opts.LoadPinOptions) - if errors.Is(err, unix.ENOENT) { - break - } - if err != nil { - return nil, fmt.Errorf("load pinned map: %w", err) - } - defer closeOnError(m) - - if err := spec.checkCompatibility(m); err != nil { - return nil, fmt.Errorf("use pinned map %s: %w", spec.Name, err) - } - - return m, nil - - case PinNone: - // Nothing to do here - - default: - return nil, fmt.Errorf("pin type %d: %w", int(spec.Pinning), ErrNotSupported) - } - - var innerFd *internal.FD - if spec.Type == ArrayOfMaps || spec.Type == HashOfMaps { - if spec.InnerMap == nil { - return nil, fmt.Errorf("%s requires InnerMap", spec.Type) - } - - if spec.InnerMap.Pinning != PinNone { - return nil, errors.New("inner maps cannot be pinned") - } - - template, err := createMap(spec.InnerMap, nil, opts, handles) - if err != nil { - return nil, err - } - defer template.Close() - - innerFd = template.fd - } - - m, err := createMap(spec, innerFd, opts, handles) - if err != nil { - return nil, err - } - defer closeOnError(m) - - if spec.Pinning == PinByName { - path := filepath.Join(opts.PinPath, spec.Name) - if err := m.Pin(path); err != nil { - return nil, fmt.Errorf("pin map: %s", err) - } - } - - return m, nil -} - -func createMap(spec *MapSpec, inner *internal.FD, opts MapOptions, handles *handleCache) (_ *Map, err error) { - closeOnError := func(closer io.Closer) { - if err != nil { - closer.Close() - } - } - - spec = spec.Copy() - - switch spec.Type { - case ArrayOfMaps: - fallthrough - case HashOfMaps: - if err := haveNestedMaps(); err != nil { - return nil, err - } - - if spec.ValueSize != 0 && spec.ValueSize != 4 { - return nil, errors.New("ValueSize must be zero or four for map of map") - } - spec.ValueSize = 4 - - case PerfEventArray: - if spec.KeySize != 0 && spec.KeySize != 4 { - return nil, errors.New("KeySize must be zero or four for perf event array") - } - spec.KeySize = 4 - - if spec.ValueSize != 0 && spec.ValueSize != 4 { - return nil, errors.New("ValueSize must be zero or four for perf event array") - } - spec.ValueSize = 4 - - if spec.MaxEntries == 0 { - n, err := internal.PossibleCPUs() - if err != nil { - return nil, fmt.Errorf("perf event array: %w", err) - } - spec.MaxEntries = uint32(n) - } - } - - if spec.Flags&(unix.BPF_F_RDONLY_PROG|unix.BPF_F_WRONLY_PROG) > 0 || spec.Freeze { - if err := haveMapMutabilityModifiers(); err != nil { - return nil, fmt.Errorf("map create: %w", err) - } - } - if spec.Flags&unix.BPF_F_MMAPABLE > 0 { - if err := haveMmapableMaps(); err != nil { - return nil, fmt.Errorf("map create: %w", err) - } - } - if spec.Flags&unix.BPF_F_INNER_MAP > 0 { - if err := haveInnerMaps(); err != nil { - return nil, fmt.Errorf("map create: %w", err) - } - } - - attr := internal.BPFMapCreateAttr{ - MapType: uint32(spec.Type), - KeySize: spec.KeySize, - ValueSize: spec.ValueSize, - MaxEntries: spec.MaxEntries, - Flags: spec.Flags, - NumaNode: spec.NumaNode, - } - - if inner != nil { - var err error - attr.InnerMapFd, err = inner.Value() - if err != nil { - return nil, fmt.Errorf("map create: %w", err) - } - } - - if haveObjName() == nil { - attr.MapName = internal.NewBPFObjName(spec.Name) - } - - var btfDisabled bool - if spec.BTF != nil { - handle, err := handles.btfHandle(btf.MapSpec(spec.BTF)) - btfDisabled = errors.Is(err, btf.ErrNotSupported) - if err != nil && !btfDisabled { - return nil, fmt.Errorf("load BTF: %w", err) - } - - if handle != nil { - attr.BTFFd = uint32(handle.FD()) - attr.BTFKeyTypeID = uint32(btf.MapKey(spec.BTF).ID()) - attr.BTFValueTypeID = uint32(btf.MapValue(spec.BTF).ID()) - } - } - - fd, err := internal.BPFMapCreate(&attr) - if err != nil { - if errors.Is(err, unix.EPERM) { - return nil, fmt.Errorf("map create: RLIMIT_MEMLOCK may be too low: %w", err) - } - if btfDisabled { - return nil, fmt.Errorf("map create without BTF: %w", err) - } - return nil, fmt.Errorf("map create: %w", err) - } - defer closeOnError(fd) - - m, err := newMap(fd, spec.Name, spec.Type, spec.KeySize, spec.ValueSize, spec.MaxEntries, spec.Flags) - if err != nil { - return nil, fmt.Errorf("map create: %w", err) - } - - if err := m.populate(spec.Contents); err != nil { - return nil, fmt.Errorf("map create: can't set initial contents: %w", err) - } - - if spec.Freeze { - if err := m.Freeze(); err != nil { - return nil, fmt.Errorf("can't freeze map: %w", err) - } - } - - return m, nil -} - -func newMap(fd *internal.FD, name string, typ MapType, keySize, valueSize, maxEntries, flags uint32) (*Map, error) { - m := &Map{ - name, - fd, - typ, - keySize, - valueSize, - maxEntries, - flags, - "", - int(valueSize), - } - - if !typ.hasPerCPUValue() { - return m, nil - } - - possibleCPUs, err := internal.PossibleCPUs() - if err != nil { - return nil, err - } - - m.fullValueSize = align(int(valueSize), 8) * possibleCPUs - return m, nil -} - -func (m *Map) String() string { - if m.name != "" { - return fmt.Sprintf("%s(%s)#%v", m.typ, m.name, m.fd) - } - return fmt.Sprintf("%s#%v", m.typ, m.fd) -} - -// Type returns the underlying type of the map. -func (m *Map) Type() MapType { - return m.typ -} - -// KeySize returns the size of the map key in bytes. -func (m *Map) KeySize() uint32 { - return m.keySize -} - -// ValueSize returns the size of the map value in bytes. -func (m *Map) ValueSize() uint32 { - return m.valueSize -} - -// MaxEntries returns the maximum number of elements the map can hold. -func (m *Map) MaxEntries() uint32 { - return m.maxEntries -} - -// Flags returns the flags of the map. -func (m *Map) Flags() uint32 { - return m.flags -} - -// Info returns metadata about the map. -func (m *Map) Info() (*MapInfo, error) { - return newMapInfoFromFd(m.fd) -} - -// Lookup retrieves a value from a Map. -// -// Calls Close() on valueOut if it is of type **Map or **Program, -// and *valueOut is not nil. -// -// Returns an error if the key doesn't exist, see ErrKeyNotExist. -func (m *Map) Lookup(key, valueOut interface{}) error { - valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize) - if err := m.lookup(key, valuePtr); err != nil { - return err - } - - return m.unmarshalValue(valueOut, valueBytes) -} - -// LookupAndDelete retrieves and deletes a value from a Map. -// -// Returns ErrKeyNotExist if the key doesn't exist. -func (m *Map) LookupAndDelete(key, valueOut interface{}) error { - valuePtr, valueBytes := makeBuffer(valueOut, m.fullValueSize) - - keyPtr, err := m.marshalKey(key) - if err != nil { - return fmt.Errorf("can't marshal key: %w", err) - } - - if err := bpfMapLookupAndDelete(m.fd, keyPtr, valuePtr); err != nil { - return fmt.Errorf("lookup and delete failed: %w", err) - } - - return m.unmarshalValue(valueOut, valueBytes) -} - -// LookupBytes gets a value from Map. -// -// Returns a nil value if a key doesn't exist. -func (m *Map) LookupBytes(key interface{}) ([]byte, error) { - valueBytes := make([]byte, m.fullValueSize) - valuePtr := internal.NewSlicePointer(valueBytes) - - err := m.lookup(key, valuePtr) - if errors.Is(err, ErrKeyNotExist) { - return nil, nil - } - - return valueBytes, err -} - -func (m *Map) lookup(key interface{}, valueOut internal.Pointer) error { - keyPtr, err := m.marshalKey(key) - if err != nil { - return fmt.Errorf("can't marshal key: %w", err) - } - - if err = bpfMapLookupElem(m.fd, keyPtr, valueOut); err != nil { - return fmt.Errorf("lookup failed: %w", err) - } - return nil -} - -// MapUpdateFlags controls the behaviour of the Map.Update call. -// -// The exact semantics depend on the specific MapType. -type MapUpdateFlags uint64 - -const ( - // UpdateAny creates a new element or update an existing one. - UpdateAny MapUpdateFlags = iota - // UpdateNoExist creates a new element. - UpdateNoExist MapUpdateFlags = 1 << (iota - 1) - // UpdateExist updates an existing element. - UpdateExist -) - -// Put replaces or creates a value in map. -// -// It is equivalent to calling Update with UpdateAny. -func (m *Map) Put(key, value interface{}) error { - return m.Update(key, value, UpdateAny) -} - -// Update changes the value of a key. -func (m *Map) Update(key, value interface{}, flags MapUpdateFlags) error { - keyPtr, err := m.marshalKey(key) - if err != nil { - return fmt.Errorf("can't marshal key: %w", err) - } - - valuePtr, err := m.marshalValue(value) - if err != nil { - return fmt.Errorf("can't marshal value: %w", err) - } - - if err = bpfMapUpdateElem(m.fd, keyPtr, valuePtr, uint64(flags)); err != nil { - return fmt.Errorf("update failed: %w", err) - } - - return nil -} - -// Delete removes a value. -// -// Returns ErrKeyNotExist if the key does not exist. -func (m *Map) Delete(key interface{}) error { - keyPtr, err := m.marshalKey(key) - if err != nil { - return fmt.Errorf("can't marshal key: %w", err) - } - - if err = bpfMapDeleteElem(m.fd, keyPtr); err != nil { - return fmt.Errorf("delete failed: %w", err) - } - return nil -} - -// NextKey finds the key following an initial key. -// -// See NextKeyBytes for details. -// -// Returns ErrKeyNotExist if there is no next key. -func (m *Map) NextKey(key, nextKeyOut interface{}) error { - nextKeyPtr, nextKeyBytes := makeBuffer(nextKeyOut, int(m.keySize)) - - if err := m.nextKey(key, nextKeyPtr); err != nil { - return err - } - - if err := m.unmarshalKey(nextKeyOut, nextKeyBytes); err != nil { - return fmt.Errorf("can't unmarshal next key: %w", err) - } - return nil -} - -// NextKeyBytes returns the key following an initial key as a byte slice. -// -// Passing nil will return the first key. -// -// Use Iterate if you want to traverse all entries in the map. -// -// Returns nil if there are no more keys. -func (m *Map) NextKeyBytes(key interface{}) ([]byte, error) { - nextKey := make([]byte, m.keySize) - nextKeyPtr := internal.NewSlicePointer(nextKey) - - err := m.nextKey(key, nextKeyPtr) - if errors.Is(err, ErrKeyNotExist) { - return nil, nil - } - - return nextKey, err -} - -func (m *Map) nextKey(key interface{}, nextKeyOut internal.Pointer) error { - var ( - keyPtr internal.Pointer - err error - ) - - if key != nil { - keyPtr, err = m.marshalKey(key) - if err != nil { - return fmt.Errorf("can't marshal key: %w", err) - } - } - - if err = bpfMapGetNextKey(m.fd, keyPtr, nextKeyOut); err != nil { - return fmt.Errorf("next key failed: %w", err) - } - return nil -} - -// BatchLookup looks up many elements in a map at once. -// -// "keysOut" and "valuesOut" must be of type slice, a pointer -// to a slice or buffer will not work. -// "prevKey" is the key to start the batch lookup from, it will -// *not* be included in the results. Use nil to start at the first key. -// -// ErrKeyNotExist is returned when the batch lookup has reached -// the end of all possible results, even when partial results -// are returned. It should be used to evaluate when lookup is "done". -func (m *Map) BatchLookup(prevKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { - return m.batchLookup(internal.BPF_MAP_LOOKUP_BATCH, prevKey, nextKeyOut, keysOut, valuesOut, opts) -} - -// BatchLookupAndDelete looks up many elements in a map at once, -// -// It then deletes all those elements. -// "keysOut" and "valuesOut" must be of type slice, a pointer -// to a slice or buffer will not work. -// "prevKey" is the key to start the batch lookup from, it will -// *not* be included in the results. Use nil to start at the first key. -// -// ErrKeyNotExist is returned when the batch lookup has reached -// the end of all possible results, even when partial results -// are returned. It should be used to evaluate when lookup is "done". -func (m *Map) BatchLookupAndDelete(prevKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { - return m.batchLookup(internal.BPF_MAP_LOOKUP_AND_DELETE_BATCH, prevKey, nextKeyOut, keysOut, valuesOut, opts) -} - -func (m *Map) batchLookup(cmd internal.BPFCmd, startKey, nextKeyOut, keysOut, valuesOut interface{}, opts *BatchOptions) (int, error) { - if err := haveBatchAPI(); err != nil { - return 0, err - } - if m.typ.hasPerCPUValue() { - return 0, ErrNotSupported - } - keysValue := reflect.ValueOf(keysOut) - if keysValue.Kind() != reflect.Slice { - return 0, fmt.Errorf("keys must be a slice") - } - valuesValue := reflect.ValueOf(valuesOut) - if valuesValue.Kind() != reflect.Slice { - return 0, fmt.Errorf("valuesOut must be a slice") - } - count := keysValue.Len() - if count != valuesValue.Len() { - return 0, fmt.Errorf("keysOut and valuesOut must be the same length") - } - keyBuf := make([]byte, count*int(m.keySize)) - keyPtr := internal.NewSlicePointer(keyBuf) - valueBuf := make([]byte, count*int(m.fullValueSize)) - valuePtr := internal.NewSlicePointer(valueBuf) - - var ( - startPtr internal.Pointer - err error - retErr error - ) - if startKey != nil { - startPtr, err = marshalPtr(startKey, int(m.keySize)) - if err != nil { - return 0, err - } - } - nextPtr, nextBuf := makeBuffer(nextKeyOut, int(m.keySize)) - - ct, err := bpfMapBatch(cmd, m.fd, startPtr, nextPtr, keyPtr, valuePtr, uint32(count), opts) - if err != nil { - if !errors.Is(err, ErrKeyNotExist) { - return 0, err - } - retErr = ErrKeyNotExist - } - - err = m.unmarshalKey(nextKeyOut, nextBuf) - if err != nil { - return 0, err - } - err = unmarshalBytes(keysOut, keyBuf) - if err != nil { - return 0, err - } - err = unmarshalBytes(valuesOut, valueBuf) - if err != nil { - retErr = err - } - return int(ct), retErr -} - -// BatchUpdate updates the map with multiple keys and values -// simultaneously. -// "keys" and "values" must be of type slice, a pointer -// to a slice or buffer will not work. -func (m *Map) BatchUpdate(keys, values interface{}, opts *BatchOptions) (int, error) { - if err := haveBatchAPI(); err != nil { - return 0, err - } - if m.typ.hasPerCPUValue() { - return 0, ErrNotSupported - } - keysValue := reflect.ValueOf(keys) - if keysValue.Kind() != reflect.Slice { - return 0, fmt.Errorf("keys must be a slice") - } - valuesValue := reflect.ValueOf(values) - if valuesValue.Kind() != reflect.Slice { - return 0, fmt.Errorf("values must be a slice") - } - var ( - count = keysValue.Len() - valuePtr internal.Pointer - err error - ) - if count != valuesValue.Len() { - return 0, fmt.Errorf("keys and values must be the same length") - } - keyPtr, err := marshalPtr(keys, count*int(m.keySize)) - if err != nil { - return 0, err - } - valuePtr, err = marshalPtr(values, count*int(m.valueSize)) - if err != nil { - return 0, err - } - var nilPtr internal.Pointer - ct, err := bpfMapBatch(internal.BPF_MAP_UPDATE_BATCH, m.fd, nilPtr, nilPtr, keyPtr, valuePtr, uint32(count), opts) - return int(ct), err -} - -// BatchDelete batch deletes entries in the map by keys. -// "keys" must be of type slice, a pointer to a slice or buffer will not work. -func (m *Map) BatchDelete(keys interface{}, opts *BatchOptions) (int, error) { - if err := haveBatchAPI(); err != nil { - return 0, err - } - if m.typ.hasPerCPUValue() { - return 0, ErrNotSupported - } - keysValue := reflect.ValueOf(keys) - if keysValue.Kind() != reflect.Slice { - return 0, fmt.Errorf("keys must be a slice") - } - count := keysValue.Len() - keyPtr, err := marshalPtr(keys, count*int(m.keySize)) - if err != nil { - return 0, fmt.Errorf("cannot marshal keys: %v", err) - } - var nilPtr internal.Pointer - ct, err := bpfMapBatch(internal.BPF_MAP_DELETE_BATCH, m.fd, nilPtr, nilPtr, keyPtr, nilPtr, uint32(count), opts) - return int(ct), err -} - -// Iterate traverses a map. -// -// It's safe to create multiple iterators at the same time. -// -// It's not possible to guarantee that all keys in a map will be -// returned if there are concurrent modifications to the map. -func (m *Map) Iterate() *MapIterator { - return newMapIterator(m) -} - -// Close removes a Map -func (m *Map) Close() error { - if m == nil { - // This makes it easier to clean up when iterating maps - // of maps / programs. - return nil - } - - return m.fd.Close() -} - -// FD gets the file descriptor of the Map. -// -// Calling this function is invalid after Close has been called. -func (m *Map) FD() int { - fd, err := m.fd.Value() - if err != nil { - // Best effort: -1 is the number most likely to be an - // invalid file descriptor. - return -1 - } - - return int(fd) -} - -// Clone creates a duplicate of the Map. -// -// Closing the duplicate does not affect the original, and vice versa. -// Changes made to the map are reflected by both instances however. -// If the original map was pinned, the cloned map will not be pinned by default. -// -// Cloning a nil Map returns nil. -func (m *Map) Clone() (*Map, error) { - if m == nil { - return nil, nil - } - - dup, err := m.fd.Dup() - if err != nil { - return nil, fmt.Errorf("can't clone map: %w", err) - } - - return &Map{ - m.name, - dup, - m.typ, - m.keySize, - m.valueSize, - m.maxEntries, - m.flags, - "", - m.fullValueSize, - }, nil -} - -// Pin persists the map on the BPF virtual file system past the lifetime of -// the process that created it . -// -// Calling Pin on a previously pinned map will overwrite the path, except when -// the new path already exists. Re-pinning across filesystems is not supported. -// You can Clone a map to pin it to a different path. -// -// This requires bpffs to be mounted above fileName. See https://docs.cilium.io/en/k8s-doc/admin/#admin-mount-bpffs -func (m *Map) Pin(fileName string) error { - if err := internal.Pin(m.pinnedPath, fileName, m.fd); err != nil { - return err - } - m.pinnedPath = fileName - return nil -} - -// Unpin removes the persisted state for the map from the BPF virtual filesystem. -// -// Failed calls to Unpin will not alter the state returned by IsPinned. -// -// Unpinning an unpinned Map returns nil. -func (m *Map) Unpin() error { - if err := internal.Unpin(m.pinnedPath); err != nil { - return err - } - m.pinnedPath = "" - return nil -} - -// IsPinned returns true if the map has a non-empty pinned path. -func (m *Map) IsPinned() bool { - return m.pinnedPath != "" -} - -// Freeze prevents a map to be modified from user space. -// -// It makes no changes to kernel-side restrictions. -func (m *Map) Freeze() error { - if err := haveMapMutabilityModifiers(); err != nil { - return fmt.Errorf("can't freeze map: %w", err) - } - - if err := bpfMapFreeze(m.fd); err != nil { - return fmt.Errorf("can't freeze map: %w", err) - } - return nil -} - -func (m *Map) populate(contents []MapKV) error { - for _, kv := range contents { - if err := m.Put(kv.Key, kv.Value); err != nil { - return fmt.Errorf("key %v: %w", kv.Key, err) - } - } - return nil -} - -func (m *Map) marshalKey(data interface{}) (internal.Pointer, error) { - if data == nil { - if m.keySize == 0 { - // Queues have a key length of zero, so passing nil here is valid. - return internal.NewPointer(nil), nil - } - return internal.Pointer{}, errors.New("can't use nil as key of map") - } - - return marshalPtr(data, int(m.keySize)) -} - -func (m *Map) unmarshalKey(data interface{}, buf []byte) error { - if buf == nil { - // This is from a makeBuffer call, nothing do do here. - return nil - } - - return unmarshalBytes(data, buf) -} - -func (m *Map) marshalValue(data interface{}) (internal.Pointer, error) { - if m.typ.hasPerCPUValue() { - return marshalPerCPUValue(data, int(m.valueSize)) - } - - var ( - buf []byte - err error - ) - - switch value := data.(type) { - case *Map: - if !m.typ.canStoreMap() { - return internal.Pointer{}, fmt.Errorf("can't store map in %s", m.typ) - } - buf, err = marshalMap(value, int(m.valueSize)) - - case *Program: - if !m.typ.canStoreProgram() { - return internal.Pointer{}, fmt.Errorf("can't store program in %s", m.typ) - } - buf, err = marshalProgram(value, int(m.valueSize)) - - default: - return marshalPtr(data, int(m.valueSize)) - } - - if err != nil { - return internal.Pointer{}, err - } - - return internal.NewSlicePointer(buf), nil -} - -func (m *Map) unmarshalValue(value interface{}, buf []byte) error { - if buf == nil { - // This is from a makeBuffer call, nothing do do here. - return nil - } - - if m.typ.hasPerCPUValue() { - return unmarshalPerCPUValue(value, int(m.valueSize), buf) - } - - switch value := value.(type) { - case **Map: - if !m.typ.canStoreMap() { - return fmt.Errorf("can't read a map from %s", m.typ) - } - - other, err := unmarshalMap(buf) - if err != nil { - return err - } - - // The caller might close the map externally, so ignore errors. - _ = (*value).Close() - - *value = other - return nil - - case *Map: - if !m.typ.canStoreMap() { - return fmt.Errorf("can't read a map from %s", m.typ) - } - return errors.New("require pointer to *Map") - - case **Program: - if !m.typ.canStoreProgram() { - return fmt.Errorf("can't read a program from %s", m.typ) - } - - other, err := unmarshalProgram(buf) - if err != nil { - return err - } - - // The caller might close the program externally, so ignore errors. - _ = (*value).Close() - - *value = other - return nil - - case *Program: - if !m.typ.canStoreProgram() { - return fmt.Errorf("can't read a program from %s", m.typ) - } - return errors.New("require pointer to *Program") - } - - return unmarshalBytes(value, buf) -} - -// LoadPinnedMap loads a Map from a BPF file. -func LoadPinnedMap(fileName string, opts *LoadPinOptions) (*Map, error) { - fd, err := internal.BPFObjGet(fileName, opts.Marshal()) - if err != nil { - return nil, err - } - - m, err := newMapFromFD(fd) - if err == nil { - m.pinnedPath = fileName - } - - return m, err -} - -// unmarshalMap creates a map from a map ID encoded in host endianness. -func unmarshalMap(buf []byte) (*Map, error) { - if len(buf) != 4 { - return nil, errors.New("map id requires 4 byte value") - } - - id := internal.NativeEndian.Uint32(buf) - return NewMapFromID(MapID(id)) -} - -// marshalMap marshals the fd of a map into a buffer in host endianness. -func marshalMap(m *Map, length int) ([]byte, error) { - if length != 4 { - return nil, fmt.Errorf("can't marshal map to %d bytes", length) - } - - fd, err := m.fd.Value() - if err != nil { - return nil, err - } - - buf := make([]byte, 4) - internal.NativeEndian.PutUint32(buf, fd) - return buf, nil -} - -func patchValue(value []byte, typ btf.Type, replacements map[string]interface{}) error { - replaced := make(map[string]bool) - replace := func(name string, offset, size int, replacement interface{}) error { - if offset+size > len(value) { - return fmt.Errorf("%s: offset %d(+%d) is out of bounds", name, offset, size) - } - - buf, err := marshalBytes(replacement, size) - if err != nil { - return fmt.Errorf("marshal %s: %w", name, err) - } - - copy(value[offset:offset+size], buf) - replaced[name] = true - return nil - } - - switch parent := typ.(type) { - case *btf.Datasec: - for _, secinfo := range parent.Vars { - name := string(secinfo.Type.(*btf.Var).Name) - replacement, ok := replacements[name] - if !ok { - continue - } - - err := replace(name, int(secinfo.Offset), int(secinfo.Size), replacement) - if err != nil { - return err - } - } - - default: - return fmt.Errorf("patching %T is not supported", typ) - } - - if len(replaced) == len(replacements) { - return nil - } - - var missing []string - for name := range replacements { - if !replaced[name] { - missing = append(missing, name) - } - } - - if len(missing) == 1 { - return fmt.Errorf("unknown field: %s", missing[0]) - } - - return fmt.Errorf("unknown fields: %s", strings.Join(missing, ",")) -} - -// MapIterator iterates a Map. -// -// See Map.Iterate. -type MapIterator struct { - target *Map - prevKey interface{} - prevBytes []byte - count, maxEntries uint32 - done bool - err error -} - -func newMapIterator(target *Map) *MapIterator { - return &MapIterator{ - target: target, - maxEntries: target.maxEntries, - prevBytes: make([]byte, target.keySize), - } -} - -// Next decodes the next key and value. -// -// Iterating a hash map from which keys are being deleted is not -// safe. You may see the same key multiple times. Iteration may -// also abort with an error, see IsIterationAborted. -// -// Returns false if there are no more entries. You must check -// the result of Err afterwards. -// -// See Map.Get for further caveats around valueOut. -func (mi *MapIterator) Next(keyOut, valueOut interface{}) bool { - if mi.err != nil || mi.done { - return false - } - - // For array-like maps NextKeyBytes returns nil only on after maxEntries - // iterations. - for mi.count <= mi.maxEntries { - var nextBytes []byte - nextBytes, mi.err = mi.target.NextKeyBytes(mi.prevKey) - if mi.err != nil { - return false - } - - if nextBytes == nil { - mi.done = true - return false - } - - // The user can get access to nextBytes since unmarshalBytes - // does not copy when unmarshaling into a []byte. - // Make a copy to prevent accidental corruption of - // iterator state. - copy(mi.prevBytes, nextBytes) - mi.prevKey = mi.prevBytes - - mi.count++ - mi.err = mi.target.Lookup(nextBytes, valueOut) - if errors.Is(mi.err, ErrKeyNotExist) { - // Even though the key should be valid, we couldn't look up - // its value. If we're iterating a hash map this is probably - // because a concurrent delete removed the value before we - // could get it. This means that the next call to NextKeyBytes - // is very likely to restart iteration. - // If we're iterating one of the fd maps like - // ProgramArray it means that a given slot doesn't have - // a valid fd associated. It's OK to continue to the next slot. - continue - } - if mi.err != nil { - return false - } - - mi.err = mi.target.unmarshalKey(keyOut, nextBytes) - return mi.err == nil - } - - mi.err = fmt.Errorf("%w", ErrIterationAborted) - return false -} - -// Err returns any encountered error. -// -// The method must be called after Next returns nil. -// -// Returns ErrIterationAborted if it wasn't possible to do a full iteration. -func (mi *MapIterator) Err() error { - return mi.err -} - -// MapGetNextID returns the ID of the next eBPF map. -// -// Returns ErrNotExist, if there is no next eBPF map. -func MapGetNextID(startID MapID) (MapID, error) { - id, err := objGetNextID(internal.BPF_MAP_GET_NEXT_ID, uint32(startID)) - return MapID(id), err -} - -// NewMapFromID returns the map for a given id. -// -// Returns ErrNotExist, if there is no eBPF map with the given id. -func NewMapFromID(id MapID) (*Map, error) { - fd, err := bpfObjGetFDByID(internal.BPF_MAP_GET_FD_BY_ID, uint32(id)) - if err != nil { - return nil, err - } - - return newMapFromFD(fd) -} - -// ID returns the systemwide unique ID of the map. -// -// Deprecated: use MapInfo.ID() instead. -func (m *Map) ID() (MapID, error) { - info, err := bpfGetMapInfoByFD(m.fd) - if err != nil { - return MapID(0), err - } - return MapID(info.id), nil -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/marshalers.go b/src/runtime/vendor/github.com/cilium/ebpf/marshalers.go deleted file mode 100644 index f2610eff9c..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/marshalers.go +++ /dev/null @@ -1,218 +0,0 @@ -package ebpf - -import ( - "bytes" - "encoding" - "encoding/binary" - "errors" - "fmt" - "reflect" - "runtime" - "unsafe" - - "github.com/cilium/ebpf/internal" -) - -// marshalPtr converts an arbitrary value into a pointer suitable -// to be passed to the kernel. -// -// As an optimization, it returns the original value if it is an -// unsafe.Pointer. -func marshalPtr(data interface{}, length int) (internal.Pointer, error) { - if ptr, ok := data.(unsafe.Pointer); ok { - return internal.NewPointer(ptr), nil - } - - buf, err := marshalBytes(data, length) - if err != nil { - return internal.Pointer{}, err - } - - return internal.NewSlicePointer(buf), nil -} - -// marshalBytes converts an arbitrary value into a byte buffer. -// -// Prefer using Map.marshalKey and Map.marshalValue if possible, since -// those have special cases that allow more types to be encoded. -// -// Returns an error if the given value isn't representable in exactly -// length bytes. -func marshalBytes(data interface{}, length int) (buf []byte, err error) { - switch value := data.(type) { - case encoding.BinaryMarshaler: - buf, err = value.MarshalBinary() - case string: - buf = []byte(value) - case []byte: - buf = value - case unsafe.Pointer: - err = errors.New("can't marshal from unsafe.Pointer") - case Map, *Map, Program, *Program: - err = fmt.Errorf("can't marshal %T", value) - default: - var wr bytes.Buffer - err = binary.Write(&wr, internal.NativeEndian, value) - if err != nil { - err = fmt.Errorf("encoding %T: %v", value, err) - } - buf = wr.Bytes() - } - if err != nil { - return nil, err - } - - if len(buf) != length { - return nil, fmt.Errorf("%T doesn't marshal to %d bytes", data, length) - } - return buf, nil -} - -func makeBuffer(dst interface{}, length int) (internal.Pointer, []byte) { - if ptr, ok := dst.(unsafe.Pointer); ok { - return internal.NewPointer(ptr), nil - } - - buf := make([]byte, length) - return internal.NewSlicePointer(buf), buf -} - -// unmarshalBytes converts a byte buffer into an arbitrary value. -// -// Prefer using Map.unmarshalKey and Map.unmarshalValue if possible, since -// those have special cases that allow more types to be encoded. -func unmarshalBytes(data interface{}, buf []byte) error { - switch value := data.(type) { - case unsafe.Pointer: - // This could be solved in Go 1.17 by unsafe.Slice instead. (https://github.com/golang/go/issues/19367) - // We could opt for removing unsafe.Pointer support in the lib as well. - sh := &reflect.SliceHeader{ //nolint:govet - Data: uintptr(value), - Len: len(buf), - Cap: len(buf), - } - - dst := *(*[]byte)(unsafe.Pointer(sh)) - copy(dst, buf) - runtime.KeepAlive(value) - return nil - case Map, *Map, Program, *Program: - return fmt.Errorf("can't unmarshal into %T", value) - case encoding.BinaryUnmarshaler: - return value.UnmarshalBinary(buf) - case *string: - *value = string(buf) - return nil - case *[]byte: - *value = buf - return nil - case string: - return errors.New("require pointer to string") - case []byte: - return errors.New("require pointer to []byte") - default: - rd := bytes.NewReader(buf) - if err := binary.Read(rd, internal.NativeEndian, value); err != nil { - return fmt.Errorf("decoding %T: %v", value, err) - } - return nil - } -} - -// marshalPerCPUValue encodes a slice containing one value per -// possible CPU into a buffer of bytes. -// -// Values are initialized to zero if the slice has less elements than CPUs. -// -// slice must have a type like []elementType. -func marshalPerCPUValue(slice interface{}, elemLength int) (internal.Pointer, error) { - sliceType := reflect.TypeOf(slice) - if sliceType.Kind() != reflect.Slice { - return internal.Pointer{}, errors.New("per-CPU value requires slice") - } - - possibleCPUs, err := internal.PossibleCPUs() - if err != nil { - return internal.Pointer{}, err - } - - sliceValue := reflect.ValueOf(slice) - sliceLen := sliceValue.Len() - if sliceLen > possibleCPUs { - return internal.Pointer{}, fmt.Errorf("per-CPU value exceeds number of CPUs") - } - - alignedElemLength := align(elemLength, 8) - buf := make([]byte, alignedElemLength*possibleCPUs) - - for i := 0; i < sliceLen; i++ { - elem := sliceValue.Index(i).Interface() - elemBytes, err := marshalBytes(elem, elemLength) - if err != nil { - return internal.Pointer{}, err - } - - offset := i * alignedElemLength - copy(buf[offset:offset+elemLength], elemBytes) - } - - return internal.NewSlicePointer(buf), nil -} - -// unmarshalPerCPUValue decodes a buffer into a slice containing one value per -// possible CPU. -// -// valueOut must have a type like *[]elementType -func unmarshalPerCPUValue(slicePtr interface{}, elemLength int, buf []byte) error { - slicePtrType := reflect.TypeOf(slicePtr) - if slicePtrType.Kind() != reflect.Ptr || slicePtrType.Elem().Kind() != reflect.Slice { - return fmt.Errorf("per-cpu value requires pointer to slice") - } - - possibleCPUs, err := internal.PossibleCPUs() - if err != nil { - return err - } - - sliceType := slicePtrType.Elem() - slice := reflect.MakeSlice(sliceType, possibleCPUs, possibleCPUs) - - sliceElemType := sliceType.Elem() - sliceElemIsPointer := sliceElemType.Kind() == reflect.Ptr - if sliceElemIsPointer { - sliceElemType = sliceElemType.Elem() - } - - step := len(buf) / possibleCPUs - if step < elemLength { - return fmt.Errorf("per-cpu element length is larger than available data") - } - for i := 0; i < possibleCPUs; i++ { - var elem interface{} - if sliceElemIsPointer { - newElem := reflect.New(sliceElemType) - slice.Index(i).Set(newElem) - elem = newElem.Interface() - } else { - elem = slice.Index(i).Addr().Interface() - } - - // Make a copy, since unmarshal can hold on to itemBytes - elemBytes := make([]byte, elemLength) - copy(elemBytes, buf[:elemLength]) - - err := unmarshalBytes(elem, elemBytes) - if err != nil { - return fmt.Errorf("cpu %d: %w", i, err) - } - - buf = buf[step:] - } - - reflect.ValueOf(slicePtr).Elem().Set(slice) - return nil -} - -func align(n, alignment int) int { - return (int(n) + alignment - 1) / alignment * alignment -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/prog.go b/src/runtime/vendor/github.com/cilium/ebpf/prog.go deleted file mode 100644 index 13bdb6ddad..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/prog.go +++ /dev/null @@ -1,728 +0,0 @@ -package ebpf - -import ( - "bytes" - "encoding/binary" - "errors" - "fmt" - "io" - "math" - "path/filepath" - "strings" - "time" - - "github.com/cilium/ebpf/asm" - "github.com/cilium/ebpf/internal" - "github.com/cilium/ebpf/internal/btf" - "github.com/cilium/ebpf/internal/unix" -) - -// ErrNotSupported is returned whenever the kernel doesn't support a feature. -var ErrNotSupported = internal.ErrNotSupported - -var errUnsatisfiedReference = errors.New("unsatisfied reference") - -// ProgramID represents the unique ID of an eBPF program. -type ProgramID uint32 - -const ( - // Number of bytes to pad the output buffer for BPF_PROG_TEST_RUN. - // This is currently the maximum of spare space allocated for SKB - // and XDP programs, and equal to XDP_PACKET_HEADROOM + NET_IP_ALIGN. - outputPad = 256 + 2 -) - -// DefaultVerifierLogSize is the default number of bytes allocated for the -// verifier log. -const DefaultVerifierLogSize = 64 * 1024 - -// ProgramOptions control loading a program into the kernel. -type ProgramOptions struct { - // Controls the detail emitted by the kernel verifier. Set to non-zero - // to enable logging. - LogLevel uint32 - // Controls the output buffer size for the verifier. Defaults to - // DefaultVerifierLogSize. - LogSize int - // An ELF containing the target BTF for this program. It is used both to - // find the correct function to trace and to apply CO-RE relocations. - // This is useful in environments where the kernel BTF is not available - // (containers) or where it is in a non-standard location. Defaults to - // use the kernel BTF from a well-known location. - TargetBTF io.ReaderAt -} - -// ProgramSpec defines a Program. -type ProgramSpec struct { - // Name is passed to the kernel as a debug aid. Must only contain - // alpha numeric and '_' characters. - Name string - // Type determines at which hook in the kernel a program will run. - Type ProgramType - AttachType AttachType - // Name of a kernel data structure to attach to. It's interpretation - // depends on Type and AttachType. - AttachTo string - Instructions asm.Instructions - // Flags is passed to the kernel and specifies additional program - // load attributes. - Flags uint32 - // License of the program. Some helpers are only available if - // the license is deemed compatible with the GPL. - // - // See https://www.kernel.org/doc/html/latest/process/license-rules.html#id1 - License string - - // Version used by Kprobe programs. - // - // Deprecated on kernels 5.0 and later. Leave empty to let the library - // detect this value automatically. - KernelVersion uint32 - - // The BTF associated with this program. Changing Instructions - // will most likely invalidate the contained data, and may - // result in errors when attempting to load it into the kernel. - BTF *btf.Program - - // The byte order this program was compiled for, may be nil. - ByteOrder binary.ByteOrder -} - -// Copy returns a copy of the spec. -func (ps *ProgramSpec) Copy() *ProgramSpec { - if ps == nil { - return nil - } - - cpy := *ps - cpy.Instructions = make(asm.Instructions, len(ps.Instructions)) - copy(cpy.Instructions, ps.Instructions) - return &cpy -} - -// Tag calculates the kernel tag for a series of instructions. -// -// Use asm.Instructions.Tag if you need to calculate for non-native endianness. -func (ps *ProgramSpec) Tag() (string, error) { - return ps.Instructions.Tag(internal.NativeEndian) -} - -// Program represents BPF program loaded into the kernel. -// -// It is not safe to close a Program which is used by other goroutines. -type Program struct { - // Contains the output of the kernel verifier if enabled, - // otherwise it is empty. - VerifierLog string - - fd *internal.FD - name string - pinnedPath string - typ ProgramType -} - -// NewProgram creates a new Program. -// -// Loading a program for the first time will perform -// feature detection by loading small, temporary programs. -func NewProgram(spec *ProgramSpec) (*Program, error) { - return NewProgramWithOptions(spec, ProgramOptions{}) -} - -// NewProgramWithOptions creates a new Program. -// -// Loading a program for the first time will perform -// feature detection by loading small, temporary programs. -func NewProgramWithOptions(spec *ProgramSpec, opts ProgramOptions) (*Program, error) { - handles := newHandleCache() - defer handles.close() - - prog, err := newProgramWithOptions(spec, opts, handles) - if errors.Is(err, errUnsatisfiedReference) { - return nil, fmt.Errorf("cannot load program without loading its whole collection: %w", err) - } - return prog, err -} - -func newProgramWithOptions(spec *ProgramSpec, opts ProgramOptions, handles *handleCache) (*Program, error) { - if len(spec.Instructions) == 0 { - return nil, errors.New("Instructions cannot be empty") - } - - if spec.ByteOrder != nil && spec.ByteOrder != internal.NativeEndian { - return nil, fmt.Errorf("can't load %s program on %s", spec.ByteOrder, internal.NativeEndian) - } - - // Kernels before 5.0 (6c4fc209fcf9 "bpf: remove useless version check for prog load") - // require the version field to be set to the value of the KERNEL_VERSION - // macro for kprobe-type programs. - // Overwrite Kprobe program version if set to zero or the magic version constant. - kv := spec.KernelVersion - if spec.Type == Kprobe && (kv == 0 || kv == internal.MagicKernelVersion) { - v, err := internal.KernelVersion() - if err != nil { - return nil, fmt.Errorf("detecting kernel version: %w", err) - } - kv = v.Kernel() - } - - attr := &bpfProgLoadAttr{ - progType: spec.Type, - progFlags: spec.Flags, - expectedAttachType: spec.AttachType, - license: internal.NewStringPointer(spec.License), - kernelVersion: kv, - } - - if haveObjName() == nil { - attr.progName = internal.NewBPFObjName(spec.Name) - } - - var err error - var targetBTF *btf.Spec - if opts.TargetBTF != nil { - targetBTF, err = handles.btfSpec(opts.TargetBTF) - if err != nil { - return nil, fmt.Errorf("load target BTF: %w", err) - } - } - - var btfDisabled bool - var core btf.COREFixups - if spec.BTF != nil { - core, err = btf.ProgramFixups(spec.BTF, targetBTF) - if err != nil { - return nil, fmt.Errorf("CO-RE relocations: %w", err) - } - - handle, err := handles.btfHandle(btf.ProgramSpec(spec.BTF)) - btfDisabled = errors.Is(err, btf.ErrNotSupported) - if err != nil && !btfDisabled { - return nil, fmt.Errorf("load BTF: %w", err) - } - - if handle != nil { - attr.progBTFFd = uint32(handle.FD()) - - recSize, bytes, err := btf.ProgramLineInfos(spec.BTF) - if err != nil { - return nil, fmt.Errorf("get BTF line infos: %w", err) - } - attr.lineInfoRecSize = recSize - attr.lineInfoCnt = uint32(uint64(len(bytes)) / uint64(recSize)) - attr.lineInfo = internal.NewSlicePointer(bytes) - - recSize, bytes, err = btf.ProgramFuncInfos(spec.BTF) - if err != nil { - return nil, fmt.Errorf("get BTF function infos: %w", err) - } - attr.funcInfoRecSize = recSize - attr.funcInfoCnt = uint32(uint64(len(bytes)) / uint64(recSize)) - attr.funcInfo = internal.NewSlicePointer(bytes) - } - } - - insns, err := core.Apply(spec.Instructions) - if err != nil { - return nil, fmt.Errorf("CO-RE fixup: %w", err) - } - - if err := fixupJumpsAndCalls(insns); err != nil { - return nil, err - } - - buf := bytes.NewBuffer(make([]byte, 0, len(spec.Instructions)*asm.InstructionSize)) - err = insns.Marshal(buf, internal.NativeEndian) - if err != nil { - return nil, err - } - - bytecode := buf.Bytes() - attr.instructions = internal.NewSlicePointer(bytecode) - attr.insCount = uint32(len(bytecode) / asm.InstructionSize) - - if spec.AttachTo != "" { - target, err := resolveBTFType(targetBTF, spec.AttachTo, spec.Type, spec.AttachType) - if err != nil { - return nil, err - } - if target != nil { - attr.attachBTFID = target.ID() - } - } - - logSize := DefaultVerifierLogSize - if opts.LogSize > 0 { - logSize = opts.LogSize - } - - var logBuf []byte - if opts.LogLevel > 0 { - logBuf = make([]byte, logSize) - attr.logLevel = opts.LogLevel - attr.logSize = uint32(len(logBuf)) - attr.logBuf = internal.NewSlicePointer(logBuf) - } - - fd, err := bpfProgLoad(attr) - if err == nil { - return &Program{internal.CString(logBuf), fd, spec.Name, "", spec.Type}, nil - } - - logErr := err - if opts.LogLevel == 0 && opts.LogSize >= 0 { - // Re-run with the verifier enabled to get better error messages. - logBuf = make([]byte, logSize) - attr.logLevel = 1 - attr.logSize = uint32(len(logBuf)) - attr.logBuf = internal.NewSlicePointer(logBuf) - - _, logErr = bpfProgLoad(attr) - } - - if errors.Is(logErr, unix.EPERM) && logBuf[0] == 0 { - // EPERM due to RLIMIT_MEMLOCK happens before the verifier, so we can - // check that the log is empty to reduce false positives. - return nil, fmt.Errorf("load program: RLIMIT_MEMLOCK may be too low: %w", logErr) - } - - err = internal.ErrorWithLog(err, logBuf, logErr) - if btfDisabled { - return nil, fmt.Errorf("load program without BTF: %w", err) - } - return nil, fmt.Errorf("load program: %w", err) -} - -// NewProgramFromFD creates a program from a raw fd. -// -// You should not use fd after calling this function. -// -// Requires at least Linux 4.10. -func NewProgramFromFD(fd int) (*Program, error) { - if fd < 0 { - return nil, errors.New("invalid fd") - } - - return newProgramFromFD(internal.NewFD(uint32(fd))) -} - -// NewProgramFromID returns the program for a given id. -// -// Returns ErrNotExist, if there is no eBPF program with the given id. -func NewProgramFromID(id ProgramID) (*Program, error) { - fd, err := bpfObjGetFDByID(internal.BPF_PROG_GET_FD_BY_ID, uint32(id)) - if err != nil { - return nil, fmt.Errorf("get program by id: %w", err) - } - - return newProgramFromFD(fd) -} - -func newProgramFromFD(fd *internal.FD) (*Program, error) { - info, err := newProgramInfoFromFd(fd) - if err != nil { - fd.Close() - return nil, fmt.Errorf("discover program type: %w", err) - } - - return &Program{"", fd, "", "", info.Type}, nil -} - -func (p *Program) String() string { - if p.name != "" { - return fmt.Sprintf("%s(%s)#%v", p.typ, p.name, p.fd) - } - return fmt.Sprintf("%s(%v)", p.typ, p.fd) -} - -// Type returns the underlying type of the program. -func (p *Program) Type() ProgramType { - return p.typ -} - -// Info returns metadata about the program. -// -// Requires at least 4.10. -func (p *Program) Info() (*ProgramInfo, error) { - return newProgramInfoFromFd(p.fd) -} - -// FD gets the file descriptor of the Program. -// -// It is invalid to call this function after Close has been called. -func (p *Program) FD() int { - fd, err := p.fd.Value() - if err != nil { - // Best effort: -1 is the number most likely to be an - // invalid file descriptor. - return -1 - } - - return int(fd) -} - -// Clone creates a duplicate of the Program. -// -// Closing the duplicate does not affect the original, and vice versa. -// -// Cloning a nil Program returns nil. -func (p *Program) Clone() (*Program, error) { - if p == nil { - return nil, nil - } - - dup, err := p.fd.Dup() - if err != nil { - return nil, fmt.Errorf("can't clone program: %w", err) - } - - return &Program{p.VerifierLog, dup, p.name, "", p.typ}, nil -} - -// Pin persists the Program on the BPF virtual file system past the lifetime of -// the process that created it -// -// Calling Pin on a previously pinned program will overwrite the path, except when -// the new path already exists. Re-pinning across filesystems is not supported. -// -// This requires bpffs to be mounted above fileName. See https://docs.cilium.io/en/k8s-doc/admin/#admin-mount-bpffs -func (p *Program) Pin(fileName string) error { - if err := internal.Pin(p.pinnedPath, fileName, p.fd); err != nil { - return err - } - p.pinnedPath = fileName - return nil -} - -// Unpin removes the persisted state for the Program from the BPF virtual filesystem. -// -// Failed calls to Unpin will not alter the state returned by IsPinned. -// -// Unpinning an unpinned Program returns nil. -func (p *Program) Unpin() error { - if err := internal.Unpin(p.pinnedPath); err != nil { - return err - } - p.pinnedPath = "" - return nil -} - -// IsPinned returns true if the Program has a non-empty pinned path. -func (p *Program) IsPinned() bool { - return p.pinnedPath != "" -} - -// Close unloads the program from the kernel. -func (p *Program) Close() error { - if p == nil { - return nil - } - - return p.fd.Close() -} - -// Test runs the Program in the kernel with the given input and returns the -// value returned by the eBPF program. outLen may be zero. -// -// Note: the kernel expects at least 14 bytes input for an ethernet header for -// XDP and SKB programs. -// -// This function requires at least Linux 4.12. -func (p *Program) Test(in []byte) (uint32, []byte, error) { - ret, out, _, err := p.testRun(in, 1, nil) - if err != nil { - return ret, nil, fmt.Errorf("can't test program: %w", err) - } - return ret, out, nil -} - -// Benchmark runs the Program with the given input for a number of times -// and returns the time taken per iteration. -// -// Returns the result of the last execution of the program and the time per -// run or an error. reset is called whenever the benchmark syscall is -// interrupted, and should be set to testing.B.ResetTimer or similar. -// -// Note: profiling a call to this function will skew it's results, see -// https://github.com/cilium/ebpf/issues/24 -// -// This function requires at least Linux 4.12. -func (p *Program) Benchmark(in []byte, repeat int, reset func()) (uint32, time.Duration, error) { - ret, _, total, err := p.testRun(in, repeat, reset) - if err != nil { - return ret, total, fmt.Errorf("can't benchmark program: %w", err) - } - return ret, total, nil -} - -var haveProgTestRun = internal.FeatureTest("BPF_PROG_TEST_RUN", "4.12", func() error { - prog, err := NewProgram(&ProgramSpec{ - Type: SocketFilter, - Instructions: asm.Instructions{ - asm.LoadImm(asm.R0, 0, asm.DWord), - asm.Return(), - }, - License: "MIT", - }) - if err != nil { - // This may be because we lack sufficient permissions, etc. - return err - } - defer prog.Close() - - // Programs require at least 14 bytes input - in := make([]byte, 14) - attr := bpfProgTestRunAttr{ - fd: uint32(prog.FD()), - dataSizeIn: uint32(len(in)), - dataIn: internal.NewSlicePointer(in), - } - - err = bpfProgTestRun(&attr) - if errors.Is(err, unix.EINVAL) { - // Check for EINVAL specifically, rather than err != nil since we - // otherwise misdetect due to insufficient permissions. - return internal.ErrNotSupported - } - if errors.Is(err, unix.EINTR) { - // We know that PROG_TEST_RUN is supported if we get EINTR. - return nil - } - return err -}) - -func (p *Program) testRun(in []byte, repeat int, reset func()) (uint32, []byte, time.Duration, error) { - if uint(repeat) > math.MaxUint32 { - return 0, nil, 0, fmt.Errorf("repeat is too high") - } - - if len(in) == 0 { - return 0, nil, 0, fmt.Errorf("missing input") - } - - if uint(len(in)) > math.MaxUint32 { - return 0, nil, 0, fmt.Errorf("input is too long") - } - - if err := haveProgTestRun(); err != nil { - return 0, nil, 0, err - } - - // Older kernels ignore the dataSizeOut argument when copying to user space. - // Combined with things like bpf_xdp_adjust_head() we don't really know what the final - // size will be. Hence we allocate an output buffer which we hope will always be large - // enough, and panic if the kernel wrote past the end of the allocation. - // See https://patchwork.ozlabs.org/cover/1006822/ - out := make([]byte, len(in)+outputPad) - - fd, err := p.fd.Value() - if err != nil { - return 0, nil, 0, err - } - - attr := bpfProgTestRunAttr{ - fd: fd, - dataSizeIn: uint32(len(in)), - dataSizeOut: uint32(len(out)), - dataIn: internal.NewSlicePointer(in), - dataOut: internal.NewSlicePointer(out), - repeat: uint32(repeat), - } - - for { - err = bpfProgTestRun(&attr) - if err == nil { - break - } - - if errors.Is(err, unix.EINTR) { - if reset != nil { - reset() - } - continue - } - - return 0, nil, 0, fmt.Errorf("can't run test: %w", err) - } - - if int(attr.dataSizeOut) > cap(out) { - // Houston, we have a problem. The program created more data than we allocated, - // and the kernel wrote past the end of our buffer. - panic("kernel wrote past end of output buffer") - } - out = out[:int(attr.dataSizeOut)] - - total := time.Duration(attr.duration) * time.Nanosecond - return attr.retval, out, total, nil -} - -func unmarshalProgram(buf []byte) (*Program, error) { - if len(buf) != 4 { - return nil, errors.New("program id requires 4 byte value") - } - - // Looking up an entry in a nested map or prog array returns an id, - // not an fd. - id := internal.NativeEndian.Uint32(buf) - return NewProgramFromID(ProgramID(id)) -} - -func marshalProgram(p *Program, length int) ([]byte, error) { - if length != 4 { - return nil, fmt.Errorf("can't marshal program to %d bytes", length) - } - - value, err := p.fd.Value() - if err != nil { - return nil, err - } - - buf := make([]byte, 4) - internal.NativeEndian.PutUint32(buf, value) - return buf, nil -} - -// Attach a Program. -// -// Deprecated: use link.RawAttachProgram instead. -func (p *Program) Attach(fd int, typ AttachType, flags AttachFlags) error { - if fd < 0 { - return errors.New("invalid fd") - } - - pfd, err := p.fd.Value() - if err != nil { - return err - } - - attr := internal.BPFProgAttachAttr{ - TargetFd: uint32(fd), - AttachBpfFd: pfd, - AttachType: uint32(typ), - AttachFlags: uint32(flags), - } - - return internal.BPFProgAttach(&attr) -} - -// Detach a Program. -// -// Deprecated: use link.RawDetachProgram instead. -func (p *Program) Detach(fd int, typ AttachType, flags AttachFlags) error { - if fd < 0 { - return errors.New("invalid fd") - } - - if flags != 0 { - return errors.New("flags must be zero") - } - - pfd, err := p.fd.Value() - if err != nil { - return err - } - - attr := internal.BPFProgDetachAttr{ - TargetFd: uint32(fd), - AttachBpfFd: pfd, - AttachType: uint32(typ), - } - - return internal.BPFProgDetach(&attr) -} - -// LoadPinnedProgram loads a Program from a BPF file. -// -// Requires at least Linux 4.11. -func LoadPinnedProgram(fileName string, opts *LoadPinOptions) (*Program, error) { - fd, err := internal.BPFObjGet(fileName, opts.Marshal()) - if err != nil { - return nil, err - } - - info, err := newProgramInfoFromFd(fd) - if err != nil { - _ = fd.Close() - return nil, fmt.Errorf("info for %s: %w", fileName, err) - } - - return &Program{"", fd, filepath.Base(fileName), fileName, info.Type}, nil -} - -// SanitizeName replaces all invalid characters in name with replacement. -// Passing a negative value for replacement will delete characters instead -// of replacing them. Use this to automatically generate valid names for maps -// and programs at runtime. -// -// The set of allowed characters depends on the running kernel version. -// Dots are only allowed as of kernel 5.2. -func SanitizeName(name string, replacement rune) string { - return strings.Map(func(char rune) rune { - if invalidBPFObjNameChar(char) { - return replacement - } - return char - }, name) -} - -// ProgramGetNextID returns the ID of the next eBPF program. -// -// Returns ErrNotExist, if there is no next eBPF program. -func ProgramGetNextID(startID ProgramID) (ProgramID, error) { - id, err := objGetNextID(internal.BPF_PROG_GET_NEXT_ID, uint32(startID)) - return ProgramID(id), err -} - -// ID returns the systemwide unique ID of the program. -// -// Deprecated: use ProgramInfo.ID() instead. -func (p *Program) ID() (ProgramID, error) { - info, err := bpfGetProgInfoByFD(p.fd) - if err != nil { - return ProgramID(0), err - } - return ProgramID(info.id), nil -} - -func resolveBTFType(kernel *btf.Spec, name string, progType ProgramType, attachType AttachType) (btf.Type, error) { - type match struct { - p ProgramType - a AttachType - } - - var target btf.Type - var typeName, featureName string - switch (match{progType, attachType}) { - case match{LSM, AttachLSMMac}: - target = new(btf.Func) - typeName = "bpf_lsm_" + name - featureName = name + " LSM hook" - - case match{Tracing, AttachTraceIter}: - target = new(btf.Func) - typeName = "bpf_iter_" + name - featureName = name + " iterator" - - default: - return nil, nil - } - - if kernel == nil { - var err error - kernel, err = btf.LoadKernelSpec() - if err != nil { - return nil, fmt.Errorf("load kernel spec: %w", err) - } - } - - err := kernel.FindType(typeName, target) - if errors.Is(err, btf.ErrNotFound) { - return nil, &internal.UnsupportedFeatureError{ - Name: featureName, - } - } - if err != nil { - return nil, fmt.Errorf("resolve BTF for %s: %w", featureName, err) - } - return target, nil -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/run-tests.sh b/src/runtime/vendor/github.com/cilium/ebpf/run-tests.sh deleted file mode 100644 index e2437beed2..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/run-tests.sh +++ /dev/null @@ -1,123 +0,0 @@ -#!/bin/bash -# Test the current package under a different kernel. -# Requires virtme and qemu to be installed. -# Examples: -# Run all tests on a 5.4 kernel -# $ ./run-tests.sh 5.4 -# Run a subset of tests: -# $ ./run-tests.sh 5.4 go test ./link - -set -euo pipefail - -script="$(realpath "$0")" -readonly script - -# This script is a bit like a Matryoshka doll since it keeps re-executing itself -# in various different contexts: -# -# 1. invoked by the user like run-tests.sh 5.4 -# 2. invoked by go test like run-tests.sh --exec-vm -# 3. invoked by init in the vm like run-tests.sh --exec-test -# -# This allows us to use all available CPU on the host machine to compile our -# code, and then only use the VM to execute the test. This is because the VM -# is usually slower at compiling than the host. -if [[ "${1:-}" = "--exec-vm" ]]; then - shift - - input="$1" - shift - - # Use sudo if /dev/kvm isn't accessible by the current user. - sudo="" - if [[ ! -r /dev/kvm || ! -w /dev/kvm ]]; then - sudo="sudo" - fi - readonly sudo - - testdir="$(dirname "$1")" - output="$(mktemp -d)" - printf -v cmd "%q " "$@" - - if [[ "$(stat -c '%t:%T' -L /proc/$$/fd/0)" == "1:3" ]]; then - # stdin is /dev/null, which doesn't play well with qemu. Use a fifo as a - # blocking substitute. - mkfifo "${output}/fake-stdin" - # Open for reading and writing to avoid blocking. - exec 0<> "${output}/fake-stdin" - rm "${output}/fake-stdin" - fi - - $sudo virtme-run --kimg "${input}/bzImage" --memory 768M --pwd \ - --rwdir="${testdir}=${testdir}" \ - --rodir=/run/input="${input}" \ - --rwdir=/run/output="${output}" \ - --script-sh "PATH=\"$PATH\" \"$script\" --exec-test $cmd" \ - --qemu-opts -smp 2 # need at least two CPUs for some tests - - if [[ ! -e "${output}/success" ]]; then - exit 1 - fi - - $sudo rm -r "$output" - exit 0 -elif [[ "${1:-}" = "--exec-test" ]]; then - shift - - mount -t bpf bpf /sys/fs/bpf - mount -t tracefs tracefs /sys/kernel/debug/tracing - - if [[ -d "/run/input/bpf" ]]; then - export KERNEL_SELFTESTS="/run/input/bpf" - fi - - dmesg -C - if ! "$@"; then - dmesg - exit 1 - fi - touch "/run/output/success" - exit 0 -fi - -readonly kernel_version="${1:-}" -if [[ -z "${kernel_version}" ]]; then - echo "Expecting kernel version as first argument" - exit 1 -fi -shift - -readonly kernel="linux-${kernel_version}.bz" -readonly selftests="linux-${kernel_version}-selftests-bpf.bz" -readonly input="$(mktemp -d)" -readonly tmp_dir="${TMPDIR:-/tmp}" -readonly branch="${BRANCH:-master}" - -fetch() { - echo Fetching "${1}" - wget -nv -N -P "${tmp_dir}" "https://github.com/cilium/ci-kernels/raw/${branch}/${1}" -} - -fetch "${kernel}" -cp "${tmp_dir}/${kernel}" "${input}/bzImage" - -if fetch "${selftests}"; then - mkdir "${input}/bpf" - tar --strip-components=4 -xjf "${tmp_dir}/${selftests}" -C "${input}/bpf" -else - echo "No selftests found, disabling" -fi - -args=(-v -short -coverpkg=./... -coverprofile=coverage.out -count 1 ./...) -if (( $# > 0 )); then - args=("$@") -fi - -export GOFLAGS=-mod=readonly -export CGO_ENABLED=0 - -echo Testing on "${kernel_version}" -go test -exec "$script --exec-vm $input" "${args[@]}" -echo "Test successful on ${kernel_version}" - -rm -r "${input}" diff --git a/src/runtime/vendor/github.com/cilium/ebpf/syscalls.go b/src/runtime/vendor/github.com/cilium/ebpf/syscalls.go deleted file mode 100644 index f5a38549bb..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/syscalls.go +++ /dev/null @@ -1,480 +0,0 @@ -package ebpf - -import ( - "errors" - "fmt" - "os" - "unsafe" - - "github.com/cilium/ebpf/internal" - "github.com/cilium/ebpf/internal/btf" - "github.com/cilium/ebpf/internal/unix" -) - -// ErrNotExist is returned when loading a non-existing map or program. -// -// Deprecated: use os.ErrNotExist instead. -var ErrNotExist = os.ErrNotExist - -// invalidBPFObjNameChar returns true if char may not appear in -// a BPF object name. -func invalidBPFObjNameChar(char rune) bool { - dotAllowed := objNameAllowsDot() == nil - - switch { - case char >= 'A' && char <= 'Z': - return false - case char >= 'a' && char <= 'z': - return false - case char >= '0' && char <= '9': - return false - case dotAllowed && char == '.': - return false - case char == '_': - return false - default: - return true - } -} - -type bpfMapOpAttr struct { - mapFd uint32 - padding uint32 - key internal.Pointer - value internal.Pointer - flags uint64 -} - -type bpfBatchMapOpAttr struct { - inBatch internal.Pointer - outBatch internal.Pointer - keys internal.Pointer - values internal.Pointer - count uint32 - mapFd uint32 - elemFlags uint64 - flags uint64 -} - -type bpfMapInfo struct { - map_type uint32 // since 4.12 1e2709769086 - id uint32 - key_size uint32 - value_size uint32 - max_entries uint32 - map_flags uint32 - name internal.BPFObjName // since 4.15 ad5b177bd73f - ifindex uint32 // since 4.16 52775b33bb50 - btf_vmlinux_value_type_id uint32 // since 5.6 85d33df357b6 - netns_dev uint64 // since 4.16 52775b33bb50 - netns_ino uint64 - btf_id uint32 // since 4.18 78958fca7ead - btf_key_type_id uint32 // since 4.18 9b2cf328b2ec - btf_value_type_id uint32 -} - -type bpfProgLoadAttr struct { - progType ProgramType - insCount uint32 - instructions internal.Pointer - license internal.Pointer - logLevel uint32 - logSize uint32 - logBuf internal.Pointer - kernelVersion uint32 // since 4.1 2541517c32be - progFlags uint32 // since 4.11 e07b98d9bffe - progName internal.BPFObjName // since 4.15 067cae47771c - progIfIndex uint32 // since 4.15 1f6f4cb7ba21 - expectedAttachType AttachType // since 4.17 5e43f899b03a - progBTFFd uint32 - funcInfoRecSize uint32 - funcInfo internal.Pointer - funcInfoCnt uint32 - lineInfoRecSize uint32 - lineInfo internal.Pointer - lineInfoCnt uint32 - attachBTFID btf.TypeID - attachProgFd uint32 -} - -type bpfProgInfo struct { - prog_type uint32 - id uint32 - tag [unix.BPF_TAG_SIZE]byte - jited_prog_len uint32 - xlated_prog_len uint32 - jited_prog_insns internal.Pointer - xlated_prog_insns internal.Pointer - load_time uint64 // since 4.15 cb4d2b3f03d8 - created_by_uid uint32 - nr_map_ids uint32 - map_ids internal.Pointer - name internal.BPFObjName // since 4.15 067cae47771c - ifindex uint32 - gpl_compatible uint32 - netns_dev uint64 - netns_ino uint64 - nr_jited_ksyms uint32 - nr_jited_func_lens uint32 - jited_ksyms internal.Pointer - jited_func_lens internal.Pointer - btf_id uint32 - func_info_rec_size uint32 - func_info internal.Pointer - nr_func_info uint32 - nr_line_info uint32 - line_info internal.Pointer - jited_line_info internal.Pointer - nr_jited_line_info uint32 - line_info_rec_size uint32 - jited_line_info_rec_size uint32 - nr_prog_tags uint32 - prog_tags internal.Pointer - run_time_ns uint64 - run_cnt uint64 -} - -type bpfProgTestRunAttr struct { - fd uint32 - retval uint32 - dataSizeIn uint32 - dataSizeOut uint32 - dataIn internal.Pointer - dataOut internal.Pointer - repeat uint32 - duration uint32 -} - -type bpfGetFDByIDAttr struct { - id uint32 - next uint32 -} - -type bpfMapFreezeAttr struct { - mapFd uint32 -} - -type bpfObjGetNextIDAttr struct { - startID uint32 - nextID uint32 - openFlags uint32 -} - -func bpfProgLoad(attr *bpfProgLoadAttr) (*internal.FD, error) { - for { - fd, err := internal.BPF(internal.BPF_PROG_LOAD, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) - // As of ~4.20 the verifier can be interrupted by a signal, - // and returns EAGAIN in that case. - if errors.Is(err, unix.EAGAIN) { - continue - } - - if err != nil { - return nil, err - } - - return internal.NewFD(uint32(fd)), nil - } -} - -func bpfProgTestRun(attr *bpfProgTestRunAttr) error { - _, err := internal.BPF(internal.BPF_PROG_TEST_RUN, unsafe.Pointer(attr), unsafe.Sizeof(*attr)) - return err -} - -var haveNestedMaps = internal.FeatureTest("nested maps", "4.12", func() error { - _, err := internal.BPFMapCreate(&internal.BPFMapCreateAttr{ - MapType: uint32(ArrayOfMaps), - KeySize: 4, - ValueSize: 4, - MaxEntries: 1, - // Invalid file descriptor. - InnerMapFd: ^uint32(0), - }) - if errors.Is(err, unix.EINVAL) { - return internal.ErrNotSupported - } - if errors.Is(err, unix.EBADF) { - return nil - } - return err -}) - -var haveMapMutabilityModifiers = internal.FeatureTest("read- and write-only maps", "5.2", func() error { - // This checks BPF_F_RDONLY_PROG and BPF_F_WRONLY_PROG. Since - // BPF_MAP_FREEZE appeared in 5.2 as well we don't do a separate check. - m, err := internal.BPFMapCreate(&internal.BPFMapCreateAttr{ - MapType: uint32(Array), - KeySize: 4, - ValueSize: 4, - MaxEntries: 1, - Flags: unix.BPF_F_RDONLY_PROG, - }) - if err != nil { - return internal.ErrNotSupported - } - _ = m.Close() - return nil -}) - -var haveMmapableMaps = internal.FeatureTest("mmapable maps", "5.5", func() error { - // This checks BPF_F_MMAPABLE, which appeared in 5.5 for array maps. - m, err := internal.BPFMapCreate(&internal.BPFMapCreateAttr{ - MapType: uint32(Array), - KeySize: 4, - ValueSize: 4, - MaxEntries: 1, - Flags: unix.BPF_F_MMAPABLE, - }) - if err != nil { - return internal.ErrNotSupported - } - _ = m.Close() - return nil -}) - -var haveInnerMaps = internal.FeatureTest("inner maps", "5.10", func() error { - // This checks BPF_F_INNER_MAP, which appeared in 5.10. - m, err := internal.BPFMapCreate(&internal.BPFMapCreateAttr{ - MapType: uint32(Array), - KeySize: 4, - ValueSize: 4, - MaxEntries: 1, - Flags: unix.BPF_F_INNER_MAP, - }) - if err != nil { - return internal.ErrNotSupported - } - _ = m.Close() - return nil -}) - -func bpfMapLookupElem(m *internal.FD, key, valueOut internal.Pointer) error { - fd, err := m.Value() - if err != nil { - return err - } - - attr := bpfMapOpAttr{ - mapFd: fd, - key: key, - value: valueOut, - } - _, err = internal.BPF(internal.BPF_MAP_LOOKUP_ELEM, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) - return wrapMapError(err) -} - -func bpfMapLookupAndDelete(m *internal.FD, key, valueOut internal.Pointer) error { - fd, err := m.Value() - if err != nil { - return err - } - - attr := bpfMapOpAttr{ - mapFd: fd, - key: key, - value: valueOut, - } - _, err = internal.BPF(internal.BPF_MAP_LOOKUP_AND_DELETE_ELEM, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) - return wrapMapError(err) -} - -func bpfMapUpdateElem(m *internal.FD, key, valueOut internal.Pointer, flags uint64) error { - fd, err := m.Value() - if err != nil { - return err - } - - attr := bpfMapOpAttr{ - mapFd: fd, - key: key, - value: valueOut, - flags: flags, - } - _, err = internal.BPF(internal.BPF_MAP_UPDATE_ELEM, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) - return wrapMapError(err) -} - -func bpfMapDeleteElem(m *internal.FD, key internal.Pointer) error { - fd, err := m.Value() - if err != nil { - return err - } - - attr := bpfMapOpAttr{ - mapFd: fd, - key: key, - } - _, err = internal.BPF(internal.BPF_MAP_DELETE_ELEM, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) - return wrapMapError(err) -} - -func bpfMapGetNextKey(m *internal.FD, key, nextKeyOut internal.Pointer) error { - fd, err := m.Value() - if err != nil { - return err - } - - attr := bpfMapOpAttr{ - mapFd: fd, - key: key, - value: nextKeyOut, - } - _, err = internal.BPF(internal.BPF_MAP_GET_NEXT_KEY, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) - return wrapMapError(err) -} - -func objGetNextID(cmd internal.BPFCmd, start uint32) (uint32, error) { - attr := bpfObjGetNextIDAttr{ - startID: start, - } - _, err := internal.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) - return attr.nextID, err -} - -func bpfMapBatch(cmd internal.BPFCmd, m *internal.FD, inBatch, outBatch, keys, values internal.Pointer, count uint32, opts *BatchOptions) (uint32, error) { - fd, err := m.Value() - if err != nil { - return 0, err - } - - attr := bpfBatchMapOpAttr{ - inBatch: inBatch, - outBatch: outBatch, - keys: keys, - values: values, - count: count, - mapFd: fd, - } - if opts != nil { - attr.elemFlags = opts.ElemFlags - attr.flags = opts.Flags - } - _, err = internal.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) - // always return count even on an error, as things like update might partially be fulfilled. - return attr.count, wrapMapError(err) -} - -func wrapMapError(err error) error { - if err == nil { - return nil - } - - if errors.Is(err, unix.ENOENT) { - return internal.SyscallError(ErrKeyNotExist, unix.ENOENT) - } - - if errors.Is(err, unix.EEXIST) { - return internal.SyscallError(ErrKeyExist, unix.EEXIST) - } - - if errors.Is(err, unix.ENOTSUPP) { - return internal.SyscallError(ErrNotSupported, unix.ENOTSUPP) - } - - return err -} - -func bpfMapFreeze(m *internal.FD) error { - fd, err := m.Value() - if err != nil { - return err - } - - attr := bpfMapFreezeAttr{ - mapFd: fd, - } - _, err = internal.BPF(internal.BPF_MAP_FREEZE, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) - return err -} - -func bpfGetProgInfoByFD(fd *internal.FD) (*bpfProgInfo, error) { - var info bpfProgInfo - if err := internal.BPFObjGetInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info)); err != nil { - return nil, fmt.Errorf("can't get program info: %w", err) - } - return &info, nil -} - -func bpfGetMapInfoByFD(fd *internal.FD) (*bpfMapInfo, error) { - var info bpfMapInfo - err := internal.BPFObjGetInfoByFD(fd, unsafe.Pointer(&info), unsafe.Sizeof(info)) - if err != nil { - return nil, fmt.Errorf("can't get map info: %w", err) - } - return &info, nil -} - -var haveObjName = internal.FeatureTest("object names", "4.15", func() error { - attr := internal.BPFMapCreateAttr{ - MapType: uint32(Array), - KeySize: 4, - ValueSize: 4, - MaxEntries: 1, - MapName: internal.NewBPFObjName("feature_test"), - } - - fd, err := internal.BPFMapCreate(&attr) - if err != nil { - return internal.ErrNotSupported - } - - _ = fd.Close() - return nil -}) - -var objNameAllowsDot = internal.FeatureTest("dot in object names", "5.2", func() error { - if err := haveObjName(); err != nil { - return err - } - - attr := internal.BPFMapCreateAttr{ - MapType: uint32(Array), - KeySize: 4, - ValueSize: 4, - MaxEntries: 1, - MapName: internal.NewBPFObjName(".test"), - } - - fd, err := internal.BPFMapCreate(&attr) - if err != nil { - return internal.ErrNotSupported - } - - _ = fd.Close() - return nil -}) - -var haveBatchAPI = internal.FeatureTest("map batch api", "5.6", func() error { - var maxEntries uint32 = 2 - attr := internal.BPFMapCreateAttr{ - MapType: uint32(Hash), - KeySize: 4, - ValueSize: 4, - MaxEntries: maxEntries, - } - - fd, err := internal.BPFMapCreate(&attr) - if err != nil { - return internal.ErrNotSupported - } - defer fd.Close() - keys := []uint32{1, 2} - values := []uint32{3, 4} - kp, _ := marshalPtr(keys, 8) - vp, _ := marshalPtr(values, 8) - nilPtr := internal.NewPointer(nil) - _, err = bpfMapBatch(internal.BPF_MAP_UPDATE_BATCH, fd, nilPtr, nilPtr, kp, vp, maxEntries, nil) - if err != nil { - return internal.ErrNotSupported - } - return nil -}) - -func bpfObjGetFDByID(cmd internal.BPFCmd, id uint32) (*internal.FD, error) { - attr := bpfGetFDByIDAttr{ - id: id, - } - ptr, err := internal.BPF(cmd, unsafe.Pointer(&attr), unsafe.Sizeof(attr)) - return internal.NewFD(uint32(ptr)), err -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/types.go b/src/runtime/vendor/github.com/cilium/ebpf/types.go deleted file mode 100644 index 441a82fe4c..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/types.go +++ /dev/null @@ -1,248 +0,0 @@ -package ebpf - -import ( - "github.com/cilium/ebpf/internal/unix" -) - -//go:generate stringer -output types_string.go -type=MapType,ProgramType,AttachType,PinType - -// MapType indicates the type map structure -// that will be initialized in the kernel. -type MapType uint32 - -// All the various map types that can be created -const ( - UnspecifiedMap MapType = iota - // Hash is a hash map - Hash - // Array is an array map - Array - // ProgramArray - A program array map is a special kind of array map whose map - // values contain only file descriptors referring to other eBPF - // programs. Thus, both the key_size and value_size must be - // exactly four bytes. This map is used in conjunction with the - // TailCall helper. - ProgramArray - // PerfEventArray - A perf event array is used in conjunction with PerfEventRead - // and PerfEventOutput calls, to read the raw bpf_perf_data from the registers. - PerfEventArray - // PerCPUHash - This data structure is useful for people who have high performance - // network needs and can reconcile adds at the end of some cycle, so that - // hashes can be lock free without the use of XAdd, which can be costly. - PerCPUHash - // PerCPUArray - This data structure is useful for people who have high performance - // network needs and can reconcile adds at the end of some cycle, so that - // hashes can be lock free without the use of XAdd, which can be costly. - // Each CPU gets a copy of this hash, the contents of all of which can be reconciled - // later. - PerCPUArray - // StackTrace - This holds whole user and kernel stack traces, it can be retrieved with - // GetStackID - StackTrace - // CGroupArray - This is a very niche structure used to help SKBInCGroup determine - // if an skb is from a socket belonging to a specific cgroup - CGroupArray - // LRUHash - This allows you to create a small hash structure that will purge the - // least recently used items rather than thow an error when you run out of memory - LRUHash - // LRUCPUHash - This is NOT like PerCPUHash, this structure is shared among the CPUs, - // it has more to do with including the CPU id with the LRU calculation so that if a - // particular CPU is using a value over-and-over again, then it will be saved, but if - // a value is being retrieved a lot but sparsely across CPUs it is not as important, basically - // giving weight to CPU locality over overall usage. - LRUCPUHash - // LPMTrie - This is an implementation of Longest-Prefix-Match Trie structure. It is useful, - // for storing things like IP addresses which can be bit masked allowing for keys of differing - // values to refer to the same reference based on their masks. See wikipedia for more details. - LPMTrie - // ArrayOfMaps - Each item in the array is another map. The inner map mustn't be a map of maps - // itself. - ArrayOfMaps - // HashOfMaps - Each item in the hash map is another map. The inner map mustn't be a map of maps - // itself. - HashOfMaps - // DevMap - Specialized map to store references to network devices. - DevMap - // SockMap - Specialized map to store references to sockets. - SockMap - // CPUMap - Specialized map to store references to CPUs. - CPUMap - // XSKMap - Specialized map for XDP programs to store references to open sockets. - XSKMap - // SockHash - Specialized hash to store references to sockets. - SockHash - // CGroupStorage - Special map for CGroups. - CGroupStorage - // ReusePortSockArray - Specialized map to store references to sockets that can be reused. - ReusePortSockArray - // PerCPUCGroupStorage - Special per CPU map for CGroups. - PerCPUCGroupStorage - // Queue - FIFO storage for BPF programs. - Queue - // Stack - LIFO storage for BPF programs. - Stack - // SkStorage - Specialized map for local storage at SK for BPF programs. - SkStorage - // DevMapHash - Hash-based indexing scheme for references to network devices. - DevMapHash - StructOpts - RingBuf - InodeStorage - TaskStorage -) - -// hasPerCPUValue returns true if the Map stores a value per CPU. -func (mt MapType) hasPerCPUValue() bool { - return mt == PerCPUHash || mt == PerCPUArray || mt == LRUCPUHash -} - -// canStoreMap returns true if the map type accepts a map fd -// for update and returns a map id for lookup. -func (mt MapType) canStoreMap() bool { - return mt == ArrayOfMaps || mt == HashOfMaps -} - -// canStoreProgram returns true if the map type accepts a program fd -// for update and returns a program id for lookup. -func (mt MapType) canStoreProgram() bool { - return mt == ProgramArray -} - -// ProgramType of the eBPF program -type ProgramType uint32 - -// eBPF program types -const ( - UnspecifiedProgram ProgramType = iota - SocketFilter - Kprobe - SchedCLS - SchedACT - TracePoint - XDP - PerfEvent - CGroupSKB - CGroupSock - LWTIn - LWTOut - LWTXmit - SockOps - SkSKB - CGroupDevice - SkMsg - RawTracepoint - CGroupSockAddr - LWTSeg6Local - LircMode2 - SkReuseport - FlowDissector - CGroupSysctl - RawTracepointWritable - CGroupSockopt - Tracing - StructOps - Extension - LSM - SkLookup -) - -// AttachType of the eBPF program, needed to differentiate allowed context accesses in -// some newer program types like CGroupSockAddr. Should be set to AttachNone if not required. -// Will cause invalid argument (EINVAL) at program load time if set incorrectly. -type AttachType uint32 - -// AttachNone is an alias for AttachCGroupInetIngress for readability reasons. -const AttachNone AttachType = 0 - -const ( - AttachCGroupInetIngress AttachType = iota - AttachCGroupInetEgress - AttachCGroupInetSockCreate - AttachCGroupSockOps - AttachSkSKBStreamParser - AttachSkSKBStreamVerdict - AttachCGroupDevice - AttachSkMsgVerdict - AttachCGroupInet4Bind - AttachCGroupInet6Bind - AttachCGroupInet4Connect - AttachCGroupInet6Connect - AttachCGroupInet4PostBind - AttachCGroupInet6PostBind - AttachCGroupUDP4Sendmsg - AttachCGroupUDP6Sendmsg - AttachLircMode2 - AttachFlowDissector - AttachCGroupSysctl - AttachCGroupUDP4Recvmsg - AttachCGroupUDP6Recvmsg - AttachCGroupGetsockopt - AttachCGroupSetsockopt - AttachTraceRawTp - AttachTraceFEntry - AttachTraceFExit - AttachModifyReturn - AttachLSMMac - AttachTraceIter - AttachCgroupInet4GetPeername - AttachCgroupInet6GetPeername - AttachCgroupInet4GetSockname - AttachCgroupInet6GetSockname - AttachXDPDevMap - AttachCgroupInetSockRelease - AttachXDPCPUMap - AttachSkLookup - AttachXDP -) - -// AttachFlags of the eBPF program used in BPF_PROG_ATTACH command -type AttachFlags uint32 - -// PinType determines whether a map is pinned into a BPFFS. -type PinType int - -// Valid pin types. -// -// Mirrors enum libbpf_pin_type. -const ( - PinNone PinType = iota - // Pin an object by using its name as the filename. - PinByName -) - -// LoadPinOptions control how a pinned object is loaded. -type LoadPinOptions struct { - // Request a read-only or write-only object. The default is a read-write - // object. Only one of the flags may be set. - ReadOnly bool - WriteOnly bool - - // Raw flags for the syscall. Other fields of this struct take precedence. - Flags uint32 -} - -// Marshal returns a value suitable for BPF_OBJ_GET syscall file_flags parameter. -func (lpo *LoadPinOptions) Marshal() uint32 { - if lpo == nil { - return 0 - } - - flags := lpo.Flags - if lpo.ReadOnly { - flags |= unix.BPF_F_RDONLY - } - if lpo.WriteOnly { - flags |= unix.BPF_F_WRONLY - } - return flags -} - -// BatchOptions batch map operations options -// -// Mirrors libbpf struct bpf_map_batch_opts -// Currently BPF_F_FLAG is the only supported -// flag (for ElemFlags). -type BatchOptions struct { - ElemFlags uint64 - Flags uint64 -} diff --git a/src/runtime/vendor/github.com/cilium/ebpf/types_string.go b/src/runtime/vendor/github.com/cilium/ebpf/types_string.go deleted file mode 100644 index c25f765647..0000000000 --- a/src/runtime/vendor/github.com/cilium/ebpf/types_string.go +++ /dev/null @@ -1,172 +0,0 @@ -// Code generated by "stringer -output types_string.go -type=MapType,ProgramType,AttachType,PinType"; DO NOT EDIT. - -package ebpf - -import "strconv" - -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[UnspecifiedMap-0] - _ = x[Hash-1] - _ = x[Array-2] - _ = x[ProgramArray-3] - _ = x[PerfEventArray-4] - _ = x[PerCPUHash-5] - _ = x[PerCPUArray-6] - _ = x[StackTrace-7] - _ = x[CGroupArray-8] - _ = x[LRUHash-9] - _ = x[LRUCPUHash-10] - _ = x[LPMTrie-11] - _ = x[ArrayOfMaps-12] - _ = x[HashOfMaps-13] - _ = x[DevMap-14] - _ = x[SockMap-15] - _ = x[CPUMap-16] - _ = x[XSKMap-17] - _ = x[SockHash-18] - _ = x[CGroupStorage-19] - _ = x[ReusePortSockArray-20] - _ = x[PerCPUCGroupStorage-21] - _ = x[Queue-22] - _ = x[Stack-23] - _ = x[SkStorage-24] - _ = x[DevMapHash-25] - _ = x[StructOpts-26] - _ = x[RingBuf-27] - _ = x[InodeStorage-28] - _ = x[TaskStorage-29] -} - -const _MapType_name = "UnspecifiedMapHashArrayProgramArrayPerfEventArrayPerCPUHashPerCPUArrayStackTraceCGroupArrayLRUHashLRUCPUHashLPMTrieArrayOfMapsHashOfMapsDevMapSockMapCPUMapXSKMapSockHashCGroupStorageReusePortSockArrayPerCPUCGroupStorageQueueStackSkStorageDevMapHashStructOptsRingBufInodeStorageTaskStorage" - -var _MapType_index = [...]uint16{0, 14, 18, 23, 35, 49, 59, 70, 80, 91, 98, 108, 115, 126, 136, 142, 149, 155, 161, 169, 182, 200, 219, 224, 229, 238, 248, 258, 265, 277, 288} - -func (i MapType) String() string { - if i >= MapType(len(_MapType_index)-1) { - return "MapType(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _MapType_name[_MapType_index[i]:_MapType_index[i+1]] -} -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[UnspecifiedProgram-0] - _ = x[SocketFilter-1] - _ = x[Kprobe-2] - _ = x[SchedCLS-3] - _ = x[SchedACT-4] - _ = x[TracePoint-5] - _ = x[XDP-6] - _ = x[PerfEvent-7] - _ = x[CGroupSKB-8] - _ = x[CGroupSock-9] - _ = x[LWTIn-10] - _ = x[LWTOut-11] - _ = x[LWTXmit-12] - _ = x[SockOps-13] - _ = x[SkSKB-14] - _ = x[CGroupDevice-15] - _ = x[SkMsg-16] - _ = x[RawTracepoint-17] - _ = x[CGroupSockAddr-18] - _ = x[LWTSeg6Local-19] - _ = x[LircMode2-20] - _ = x[SkReuseport-21] - _ = x[FlowDissector-22] - _ = x[CGroupSysctl-23] - _ = x[RawTracepointWritable-24] - _ = x[CGroupSockopt-25] - _ = x[Tracing-26] - _ = x[StructOps-27] - _ = x[Extension-28] - _ = x[LSM-29] - _ = x[SkLookup-30] -} - -const _ProgramType_name = "UnspecifiedProgramSocketFilterKprobeSchedCLSSchedACTTracePointXDPPerfEventCGroupSKBCGroupSockLWTInLWTOutLWTXmitSockOpsSkSKBCGroupDeviceSkMsgRawTracepointCGroupSockAddrLWTSeg6LocalLircMode2SkReuseportFlowDissectorCGroupSysctlRawTracepointWritableCGroupSockoptTracingStructOpsExtensionLSMSkLookup" - -var _ProgramType_index = [...]uint16{0, 18, 30, 36, 44, 52, 62, 65, 74, 83, 93, 98, 104, 111, 118, 123, 135, 140, 153, 167, 179, 188, 199, 212, 224, 245, 258, 265, 274, 283, 286, 294} - -func (i ProgramType) String() string { - if i >= ProgramType(len(_ProgramType_index)-1) { - return "ProgramType(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _ProgramType_name[_ProgramType_index[i]:_ProgramType_index[i+1]] -} -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[AttachNone-0] - _ = x[AttachCGroupInetIngress-0] - _ = x[AttachCGroupInetEgress-1] - _ = x[AttachCGroupInetSockCreate-2] - _ = x[AttachCGroupSockOps-3] - _ = x[AttachSkSKBStreamParser-4] - _ = x[AttachSkSKBStreamVerdict-5] - _ = x[AttachCGroupDevice-6] - _ = x[AttachSkMsgVerdict-7] - _ = x[AttachCGroupInet4Bind-8] - _ = x[AttachCGroupInet6Bind-9] - _ = x[AttachCGroupInet4Connect-10] - _ = x[AttachCGroupInet6Connect-11] - _ = x[AttachCGroupInet4PostBind-12] - _ = x[AttachCGroupInet6PostBind-13] - _ = x[AttachCGroupUDP4Sendmsg-14] - _ = x[AttachCGroupUDP6Sendmsg-15] - _ = x[AttachLircMode2-16] - _ = x[AttachFlowDissector-17] - _ = x[AttachCGroupSysctl-18] - _ = x[AttachCGroupUDP4Recvmsg-19] - _ = x[AttachCGroupUDP6Recvmsg-20] - _ = x[AttachCGroupGetsockopt-21] - _ = x[AttachCGroupSetsockopt-22] - _ = x[AttachTraceRawTp-23] - _ = x[AttachTraceFEntry-24] - _ = x[AttachTraceFExit-25] - _ = x[AttachModifyReturn-26] - _ = x[AttachLSMMac-27] - _ = x[AttachTraceIter-28] - _ = x[AttachCgroupInet4GetPeername-29] - _ = x[AttachCgroupInet6GetPeername-30] - _ = x[AttachCgroupInet4GetSockname-31] - _ = x[AttachCgroupInet6GetSockname-32] - _ = x[AttachXDPDevMap-33] - _ = x[AttachCgroupInetSockRelease-34] - _ = x[AttachXDPCPUMap-35] - _ = x[AttachSkLookup-36] - _ = x[AttachXDP-37] -} - -const _AttachType_name = "AttachNoneAttachCGroupInetEgressAttachCGroupInetSockCreateAttachCGroupSockOpsAttachSkSKBStreamParserAttachSkSKBStreamVerdictAttachCGroupDeviceAttachSkMsgVerdictAttachCGroupInet4BindAttachCGroupInet6BindAttachCGroupInet4ConnectAttachCGroupInet6ConnectAttachCGroupInet4PostBindAttachCGroupInet6PostBindAttachCGroupUDP4SendmsgAttachCGroupUDP6SendmsgAttachLircMode2AttachFlowDissectorAttachCGroupSysctlAttachCGroupUDP4RecvmsgAttachCGroupUDP6RecvmsgAttachCGroupGetsockoptAttachCGroupSetsockoptAttachTraceRawTpAttachTraceFEntryAttachTraceFExitAttachModifyReturnAttachLSMMacAttachTraceIterAttachCgroupInet4GetPeernameAttachCgroupInet6GetPeernameAttachCgroupInet4GetSocknameAttachCgroupInet6GetSocknameAttachXDPDevMapAttachCgroupInetSockReleaseAttachXDPCPUMapAttachSkLookupAttachXDP" - -var _AttachType_index = [...]uint16{0, 10, 32, 58, 77, 100, 124, 142, 160, 181, 202, 226, 250, 275, 300, 323, 346, 361, 380, 398, 421, 444, 466, 488, 504, 521, 537, 555, 567, 582, 610, 638, 666, 694, 709, 736, 751, 765, 774} - -func (i AttachType) String() string { - if i >= AttachType(len(_AttachType_index)-1) { - return "AttachType(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _AttachType_name[_AttachType_index[i]:_AttachType_index[i+1]] -} -func _() { - // An "invalid array index" compiler error signifies that the constant values have changed. - // Re-run the stringer command to generate them again. - var x [1]struct{} - _ = x[PinNone-0] - _ = x[PinByName-1] -} - -const _PinType_name = "PinNonePinByName" - -var _PinType_index = [...]uint8{0, 7, 16} - -func (i PinType) String() string { - if i < 0 || i >= PinType(len(_PinType_index)-1) { - return "PinType(" + strconv.FormatInt(int64(i), 10) + ")" - } - return _PinType_name[_PinType_index[i]:_PinType_index[i+1]] -} diff --git a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/.travis.yml b/src/runtime/vendor/github.com/cyphar/filepath-securejoin/.travis.yml deleted file mode 100644 index 3938f38349..0000000000 --- a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/.travis.yml +++ /dev/null @@ -1,19 +0,0 @@ -# Copyright (C) 2017 SUSE LLC. All rights reserved. -# Use of this source code is governed by a BSD-style -# license that can be found in the LICENSE file. - -language: go -go: - - 1.7.x - - 1.8.x - - tip - -os: - - linux - - osx - -script: - - go test -cover -v ./... - -notifications: - email: false diff --git a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/LICENSE b/src/runtime/vendor/github.com/cyphar/filepath-securejoin/LICENSE deleted file mode 100644 index bec842f294..0000000000 --- a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/LICENSE +++ /dev/null @@ -1,28 +0,0 @@ -Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. -Copyright (C) 2017 SUSE LLC. All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above -copyright notice, this list of conditions and the following disclaimer -in the documentation and/or other materials provided with the -distribution. - * Neither the name of Google Inc. nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/README.md b/src/runtime/vendor/github.com/cyphar/filepath-securejoin/README.md deleted file mode 100644 index 49b2baa9f3..0000000000 --- a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/README.md +++ /dev/null @@ -1,65 +0,0 @@ -## `filepath-securejoin` ## - -[![Build Status](https://travis-ci.org/cyphar/filepath-securejoin.svg?branch=master)](https://travis-ci.org/cyphar/filepath-securejoin) - -An implementation of `SecureJoin`, a [candidate for inclusion in the Go -standard library][go#20126]. The purpose of this function is to be a "secure" -alternative to `filepath.Join`, and in particular it provides certain -guarantees that are not provided by `filepath.Join`. - -This is the function prototype: - -```go -func SecureJoin(root, unsafePath string) (string, error) -``` - -This library **guarantees** the following: - -* If no error is set, the resulting string **must** be a child path of - `SecureJoin` and will not contain any symlink path components (they will all - be expanded). - -* When expanding symlinks, all symlink path components **must** be resolved - relative to the provided root. In particular, this can be considered a - userspace implementation of how `chroot(2)` operates on file paths. Note that - these symlinks will **not** be expanded lexically (`filepath.Clean` is not - called on the input before processing). - -* Non-existant path components are unaffected by `SecureJoin` (similar to - `filepath.EvalSymlinks`'s semantics). - -* The returned path will always be `filepath.Clean`ed and thus not contain any - `..` components. - -A (trivial) implementation of this function on GNU/Linux systems could be done -with the following (note that this requires root privileges and is far more -opaque than the implementation in this library, and also requires that -`readlink` is inside the `root` path): - -```go -package securejoin - -import ( - "os/exec" - "path/filepath" -) - -func SecureJoin(root, unsafePath string) (string, error) { - unsafePath = string(filepath.Separator) + unsafePath - cmd := exec.Command("chroot", root, - "readlink", "--canonicalize-missing", "--no-newline", unsafePath) - output, err := cmd.CombinedOutput() - if err != nil { - return "", err - } - expanded := string(output) - return filepath.Join(root, expanded), nil -} -``` - -[go#20126]: https://github.com/golang/go/issues/20126 - -### License ### - -The license of this project is the same as Go, which is a BSD 3-clause license -available in the `LICENSE` file. diff --git a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/VERSION b/src/runtime/vendor/github.com/cyphar/filepath-securejoin/VERSION deleted file mode 100644 index ee1372d33a..0000000000 --- a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.2.2 diff --git a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/join.go b/src/runtime/vendor/github.com/cyphar/filepath-securejoin/join.go deleted file mode 100644 index c4ca3d7130..0000000000 --- a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/join.go +++ /dev/null @@ -1,134 +0,0 @@ -// Copyright (C) 2014-2015 Docker Inc & Go Authors. All rights reserved. -// Copyright (C) 2017 SUSE LLC. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package securejoin is an implementation of the hopefully-soon-to-be-included -// SecureJoin helper that is meant to be part of the "path/filepath" package. -// The purpose of this project is to provide a PoC implementation to make the -// SecureJoin proposal (https://github.com/golang/go/issues/20126) more -// tangible. -package securejoin - -import ( - "bytes" - "os" - "path/filepath" - "strings" - "syscall" - - "github.com/pkg/errors" -) - -// ErrSymlinkLoop is returned by SecureJoinVFS when too many symlinks have been -// evaluated in attempting to securely join the two given paths. -var ErrSymlinkLoop = errors.Wrap(syscall.ELOOP, "secure join") - -// IsNotExist tells you if err is an error that implies that either the path -// accessed does not exist (or path components don't exist). This is -// effectively a more broad version of os.IsNotExist. -func IsNotExist(err error) bool { - // If it's a bone-fide ENOENT just bail. - if os.IsNotExist(errors.Cause(err)) { - return true - } - - // Check that it's not actually an ENOTDIR, which in some cases is a more - // convoluted case of ENOENT (usually involving weird paths). - var errno error - switch err := errors.Cause(err).(type) { - case *os.PathError: - errno = err.Err - case *os.LinkError: - errno = err.Err - case *os.SyscallError: - errno = err.Err - } - return errno == syscall.ENOTDIR || errno == syscall.ENOENT -} - -// SecureJoinVFS joins the two given path components (similar to Join) except -// that the returned path is guaranteed to be scoped inside the provided root -// path (when evaluated). Any symbolic links in the path are evaluated with the -// given root treated as the root of the filesystem, similar to a chroot. The -// filesystem state is evaluated through the given VFS interface (if nil, the -// standard os.* family of functions are used). -// -// Note that the guarantees provided by this function only apply if the path -// components in the returned string are not modified (in other words are not -// replaced with symlinks on the filesystem) after this function has returned. -// Such a symlink race is necessarily out-of-scope of SecureJoin. -func SecureJoinVFS(root, unsafePath string, vfs VFS) (string, error) { - // Use the os.* VFS implementation if none was specified. - if vfs == nil { - vfs = osVFS{} - } - - var path bytes.Buffer - n := 0 - for unsafePath != "" { - if n > 255 { - return "", ErrSymlinkLoop - } - - // Next path component, p. - i := strings.IndexRune(unsafePath, filepath.Separator) - var p string - if i == -1 { - p, unsafePath = unsafePath, "" - } else { - p, unsafePath = unsafePath[:i], unsafePath[i+1:] - } - - // Create a cleaned path, using the lexical semantics of /../a, to - // create a "scoped" path component which can safely be joined to fullP - // for evaluation. At this point, path.String() doesn't contain any - // symlink components. - cleanP := filepath.Clean(string(filepath.Separator) + path.String() + p) - if cleanP == string(filepath.Separator) { - path.Reset() - continue - } - fullP := filepath.Clean(root + cleanP) - - // Figure out whether the path is a symlink. - fi, err := vfs.Lstat(fullP) - if err != nil && !IsNotExist(err) { - return "", err - } - // Treat non-existent path components the same as non-symlinks (we - // can't do any better here). - if IsNotExist(err) || fi.Mode()&os.ModeSymlink == 0 { - path.WriteString(p) - path.WriteRune(filepath.Separator) - continue - } - - // Only increment when we actually dereference a link. - n++ - - // It's a symlink, expand it by prepending it to the yet-unparsed path. - dest, err := vfs.Readlink(fullP) - if err != nil { - return "", err - } - // Absolute symlinks reset any work we've already done. - if filepath.IsAbs(dest) { - path.Reset() - } - unsafePath = dest + string(filepath.Separator) + unsafePath - } - - // We have to clean path.String() here because it may contain '..' - // components that are entirely lexical, but would be misleading otherwise. - // And finally do a final clean to ensure that root is also lexically - // clean. - fullP := filepath.Clean(string(filepath.Separator) + path.String()) - return filepath.Clean(root + fullP), nil -} - -// SecureJoin is a wrapper around SecureJoinVFS that just uses the os.* library -// of functions as the VFS. If in doubt, use this function over SecureJoinVFS. -func SecureJoin(root, unsafePath string) (string, error) { - return SecureJoinVFS(root, unsafePath, nil) -} diff --git a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/vendor.conf b/src/runtime/vendor/github.com/cyphar/filepath-securejoin/vendor.conf deleted file mode 100644 index 66bb574b95..0000000000 --- a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/vendor.conf +++ /dev/null @@ -1 +0,0 @@ -github.com/pkg/errors v0.8.0 diff --git a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/vfs.go b/src/runtime/vendor/github.com/cyphar/filepath-securejoin/vfs.go deleted file mode 100644 index a82a5eae11..0000000000 --- a/src/runtime/vendor/github.com/cyphar/filepath-securejoin/vfs.go +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (C) 2017 SUSE LLC. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package securejoin - -import "os" - -// In future this should be moved into a separate package, because now there -// are several projects (umoci and go-mtree) that are using this sort of -// interface. - -// VFS is the minimal interface necessary to use SecureJoinVFS. A nil VFS is -// equivalent to using the standard os.* family of functions. This is mainly -// used for the purposes of mock testing, but also can be used to otherwise use -// SecureJoin with VFS-like system. -type VFS interface { - // Lstat returns a FileInfo describing the named file. If the file is a - // symbolic link, the returned FileInfo describes the symbolic link. Lstat - // makes no attempt to follow the link. These semantics are identical to - // os.Lstat. - Lstat(name string) (os.FileInfo, error) - - // Readlink returns the destination of the named symbolic link. These - // semantics are identical to os.Readlink. - Readlink(name string) (string, error) -} - -// osVFS is the "nil" VFS, in that it just passes everything through to the os -// module. -type osVFS struct{} - -// Lstat returns a FileInfo describing the named file. If the file is a -// symbolic link, the returned FileInfo describes the symbolic link. Lstat -// makes no attempt to follow the link. These semantics are identical to -// os.Lstat. -func (o osVFS) Lstat(name string) (os.FileInfo, error) { return os.Lstat(name) } - -// Readlink returns the destination of the named symbolic link. These -// semantics are identical to os.Readlink. -func (o osVFS) Readlink(name string) (string, error) { return os.Readlink(name) } diff --git a/src/runtime/vendor/github.com/mrunalp/fileutils/.gitignore b/src/runtime/vendor/github.com/mrunalp/fileutils/.gitignore deleted file mode 100644 index aac977bcae..0000000000 --- a/src/runtime/vendor/github.com/mrunalp/fileutils/.gitignore +++ /dev/null @@ -1 +0,0 @@ -/gocp diff --git a/src/runtime/vendor/github.com/mrunalp/fileutils/LICENSE b/src/runtime/vendor/github.com/mrunalp/fileutils/LICENSE deleted file mode 100644 index 27448585ad..0000000000 --- a/src/runtime/vendor/github.com/mrunalp/fileutils/LICENSE +++ /dev/null @@ -1,191 +0,0 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - Copyright 2014 Docker, Inc. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/src/runtime/vendor/github.com/mrunalp/fileutils/MAINTAINERS b/src/runtime/vendor/github.com/mrunalp/fileutils/MAINTAINERS deleted file mode 100644 index 4a2cafa5c4..0000000000 --- a/src/runtime/vendor/github.com/mrunalp/fileutils/MAINTAINERS +++ /dev/null @@ -1 +0,0 @@ -Mrunal Patel (@mrunalp) diff --git a/src/runtime/vendor/github.com/mrunalp/fileutils/README.md b/src/runtime/vendor/github.com/mrunalp/fileutils/README.md deleted file mode 100644 index 6cb4140eae..0000000000 --- a/src/runtime/vendor/github.com/mrunalp/fileutils/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# fileutils - -Collection of utilities for file manipulation in golang - -The library is based on docker pkg/archive pkg/idtools but does copies instead of handling archive formats. diff --git a/src/runtime/vendor/github.com/mrunalp/fileutils/fileutils.go b/src/runtime/vendor/github.com/mrunalp/fileutils/fileutils.go deleted file mode 100644 index 7421e6207f..0000000000 --- a/src/runtime/vendor/github.com/mrunalp/fileutils/fileutils.go +++ /dev/null @@ -1,168 +0,0 @@ -package fileutils - -import ( - "fmt" - "io" - "os" - "path/filepath" - "syscall" -) - -// CopyFile copies the file at source to dest -func CopyFile(source string, dest string) error { - si, err := os.Lstat(source) - if err != nil { - return err - } - - st, ok := si.Sys().(*syscall.Stat_t) - if !ok { - return fmt.Errorf("could not convert to syscall.Stat_t") - } - - uid := int(st.Uid) - gid := int(st.Gid) - modeType := si.Mode() & os.ModeType - - // Handle symlinks - if modeType == os.ModeSymlink { - target, err := os.Readlink(source) - if err != nil { - return err - } - if err := os.Symlink(target, dest); err != nil { - return err - } - } - - // Handle device files - if modeType == os.ModeDevice { - devMajor := int64(major(uint64(st.Rdev))) - devMinor := int64(minor(uint64(st.Rdev))) - mode := uint32(si.Mode() & os.ModePerm) - if si.Mode()&os.ModeCharDevice != 0 { - mode |= syscall.S_IFCHR - } else { - mode |= syscall.S_IFBLK - } - if err := syscall.Mknod(dest, mode, int(mkdev(devMajor, devMinor))); err != nil { - return err - } - } - - // Handle regular files - if si.Mode().IsRegular() { - err = copyInternal(source, dest) - if err != nil { - return err - } - } - - // Chown the file - if err := os.Lchown(dest, uid, gid); err != nil { - return err - } - - // Chmod the file - if !(modeType == os.ModeSymlink) { - if err := os.Chmod(dest, si.Mode()); err != nil { - return err - } - } - - return nil -} - -func copyInternal(source, dest string) (retErr error) { - sf, err := os.Open(source) - if err != nil { - return err - } - defer sf.Close() - - df, err := os.Create(dest) - if err != nil { - return err - } - defer func() { - err := df.Close() - if retErr == nil { - retErr = err - } - }() - - _, err = io.Copy(df, sf) - return err -} - -// CopyDirectory copies the files under the source directory -// to dest directory. The dest directory is created if it -// does not exist. -func CopyDirectory(source string, dest string) error { - fi, err := os.Stat(source) - if err != nil { - return err - } - - // Get owner. - st, ok := fi.Sys().(*syscall.Stat_t) - if !ok { - return fmt.Errorf("could not convert to syscall.Stat_t") - } - - // We have to pick an owner here anyway. - if err := MkdirAllNewAs(dest, fi.Mode(), int(st.Uid), int(st.Gid)); err != nil { - return err - } - - return filepath.Walk(source, func(path string, info os.FileInfo, err error) error { - if err != nil { - return err - } - - // Get the relative path - relPath, err := filepath.Rel(source, path) - if err != nil { - return nil - } - - if info.IsDir() { - // Skip the source directory. - if path != source { - // Get the owner. - st, ok := info.Sys().(*syscall.Stat_t) - if !ok { - return fmt.Errorf("could not convert to syscall.Stat_t") - } - - uid := int(st.Uid) - gid := int(st.Gid) - - if err := os.Mkdir(filepath.Join(dest, relPath), info.Mode()); err != nil { - return err - } - - if err := os.Lchown(filepath.Join(dest, relPath), uid, gid); err != nil { - return err - } - } - return nil - } - - return CopyFile(path, filepath.Join(dest, relPath)) - }) -} - -// Gives a number indicating the device driver to be used to access the passed device -func major(device uint64) uint64 { - return (device >> 8) & 0xfff -} - -// Gives a number that serves as a flag to the device driver for the passed device -func minor(device uint64) uint64 { - return (device & 0xff) | ((device >> 12) & 0xfff00) -} - -func mkdev(major int64, minor int64) uint32 { - return uint32(((minor & 0xfff00) << 12) | ((major & 0xfff) << 8) | (minor & 0xff)) -} diff --git a/src/runtime/vendor/github.com/mrunalp/fileutils/go.mod b/src/runtime/vendor/github.com/mrunalp/fileutils/go.mod deleted file mode 100644 index d8971cabc4..0000000000 --- a/src/runtime/vendor/github.com/mrunalp/fileutils/go.mod +++ /dev/null @@ -1,3 +0,0 @@ -module github.com/mrunalp/fileutils - -go 1.13 diff --git a/src/runtime/vendor/github.com/mrunalp/fileutils/idtools.go b/src/runtime/vendor/github.com/mrunalp/fileutils/idtools.go deleted file mode 100644 index bad6539df5..0000000000 --- a/src/runtime/vendor/github.com/mrunalp/fileutils/idtools.go +++ /dev/null @@ -1,54 +0,0 @@ -package fileutils - -import ( - "os" - "path/filepath" - "syscall" -) - -// MkdirAllNewAs creates a directory (include any along the path) and then modifies -// ownership ONLY of newly created directories to the requested uid/gid. If the -// directories along the path exist, no change of ownership will be performed -func MkdirAllNewAs(path string, mode os.FileMode, ownerUID, ownerGID int) error { - // make an array containing the original path asked for, plus (for mkAll == true) - // all path components leading up to the complete path that don't exist before we MkdirAll - // so that we can chown all of them properly at the end. If chownExisting is false, we won't - // chown the full directory path if it exists - var paths []string - st, err := os.Stat(path) - if err != nil && os.IsNotExist(err) { - paths = []string{path} - } else if err == nil { - if !st.IsDir() { - return &os.PathError{Op: "mkdir", Path: path, Err: syscall.ENOTDIR} - } - // nothing to do; directory path fully exists already - return nil - } - - // walk back to "/" looking for directories which do not exist - // and add them to the paths array for chown after creation - dirPath := path - for { - dirPath = filepath.Dir(dirPath) - if dirPath == "/" { - break - } - if _, err := os.Stat(dirPath); err != nil && os.IsNotExist(err) { - paths = append(paths, dirPath) - } - } - - if err := os.MkdirAll(path, mode); err != nil { - return err - } - - // even if it existed, we will chown the requested path + any subpaths that - // didn't exist when we called MkdirAll - for _, pathComponent := range paths { - if err := os.Chown(pathComponent, ownerUID, ownerGID); err != nil { - return err - } - } - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/README.md b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/README.md deleted file mode 100644 index 13eee49d4b..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/README.md +++ /dev/null @@ -1,334 +0,0 @@ -# libcontainer - -[![GoDoc](https://godoc.org/github.com/opencontainers/runc/libcontainer?status.svg)](https://godoc.org/github.com/opencontainers/runc/libcontainer) - -Libcontainer provides a native Go implementation for creating containers -with namespaces, cgroups, capabilities, and filesystem access controls. -It allows you to manage the lifecycle of the container performing additional operations -after the container is created. - - -#### Container -A container is a self contained execution environment that shares the kernel of the -host system and which is (optionally) isolated from other containers in the system. - -#### Using libcontainer - -Because containers are spawned in a two step process you will need a binary that -will be executed as the init process for the container. In libcontainer, we use -the current binary (/proc/self/exe) to be executed as the init process, and use -arg "init", we call the first step process "bootstrap", so you always need a "init" -function as the entry of "bootstrap". - -In addition to the go init function the early stage bootstrap is handled by importing -[nsenter](https://github.com/opencontainers/runc/blob/master/libcontainer/nsenter/README.md). - -```go -import ( - _ "github.com/opencontainers/runc/libcontainer/nsenter" -) - -func init() { - if len(os.Args) > 1 && os.Args[1] == "init" { - runtime.GOMAXPROCS(1) - runtime.LockOSThread() - factory, _ := libcontainer.New("") - if err := factory.StartInitialization(); err != nil { - logrus.Fatal(err) - } - panic("--this line should have never been executed, congratulations--") - } -} -``` - -Then to create a container you first have to initialize an instance of a factory -that will handle the creation and initialization for a container. - -```go -factory, err := libcontainer.New("/var/lib/container", libcontainer.Cgroupfs, libcontainer.InitArgs(os.Args[0], "init")) -if err != nil { - logrus.Fatal(err) - return -} -``` - -Once you have an instance of the factory created we can create a configuration -struct describing how the container is to be created. A sample would look similar to this: - -```go -defaultMountFlags := unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV -var devices []*configs.DeviceRule -for _, device := range specconv.AllowedDevices { - devices = append(devices, &device.Rule) -} -config := &configs.Config{ - Rootfs: "/your/path/to/rootfs", - Capabilities: &configs.Capabilities{ - Bounding: []string{ - "CAP_CHOWN", - "CAP_DAC_OVERRIDE", - "CAP_FSETID", - "CAP_FOWNER", - "CAP_MKNOD", - "CAP_NET_RAW", - "CAP_SETGID", - "CAP_SETUID", - "CAP_SETFCAP", - "CAP_SETPCAP", - "CAP_NET_BIND_SERVICE", - "CAP_SYS_CHROOT", - "CAP_KILL", - "CAP_AUDIT_WRITE", - }, - Effective: []string{ - "CAP_CHOWN", - "CAP_DAC_OVERRIDE", - "CAP_FSETID", - "CAP_FOWNER", - "CAP_MKNOD", - "CAP_NET_RAW", - "CAP_SETGID", - "CAP_SETUID", - "CAP_SETFCAP", - "CAP_SETPCAP", - "CAP_NET_BIND_SERVICE", - "CAP_SYS_CHROOT", - "CAP_KILL", - "CAP_AUDIT_WRITE", - }, - Inheritable: []string{ - "CAP_CHOWN", - "CAP_DAC_OVERRIDE", - "CAP_FSETID", - "CAP_FOWNER", - "CAP_MKNOD", - "CAP_NET_RAW", - "CAP_SETGID", - "CAP_SETUID", - "CAP_SETFCAP", - "CAP_SETPCAP", - "CAP_NET_BIND_SERVICE", - "CAP_SYS_CHROOT", - "CAP_KILL", - "CAP_AUDIT_WRITE", - }, - Permitted: []string{ - "CAP_CHOWN", - "CAP_DAC_OVERRIDE", - "CAP_FSETID", - "CAP_FOWNER", - "CAP_MKNOD", - "CAP_NET_RAW", - "CAP_SETGID", - "CAP_SETUID", - "CAP_SETFCAP", - "CAP_SETPCAP", - "CAP_NET_BIND_SERVICE", - "CAP_SYS_CHROOT", - "CAP_KILL", - "CAP_AUDIT_WRITE", - }, - Ambient: []string{ - "CAP_CHOWN", - "CAP_DAC_OVERRIDE", - "CAP_FSETID", - "CAP_FOWNER", - "CAP_MKNOD", - "CAP_NET_RAW", - "CAP_SETGID", - "CAP_SETUID", - "CAP_SETFCAP", - "CAP_SETPCAP", - "CAP_NET_BIND_SERVICE", - "CAP_SYS_CHROOT", - "CAP_KILL", - "CAP_AUDIT_WRITE", - }, - }, - Namespaces: configs.Namespaces([]configs.Namespace{ - {Type: configs.NEWNS}, - {Type: configs.NEWUTS}, - {Type: configs.NEWIPC}, - {Type: configs.NEWPID}, - {Type: configs.NEWUSER}, - {Type: configs.NEWNET}, - {Type: configs.NEWCGROUP}, - }), - Cgroups: &configs.Cgroup{ - Name: "test-container", - Parent: "system", - Resources: &configs.Resources{ - MemorySwappiness: nil, - Devices: devices, - }, - }, - MaskPaths: []string{ - "/proc/kcore", - "/sys/firmware", - }, - ReadonlyPaths: []string{ - "/proc/sys", "/proc/sysrq-trigger", "/proc/irq", "/proc/bus", - }, - Devices: specconv.AllowedDevices, - Hostname: "testing", - Mounts: []*configs.Mount{ - { - Source: "proc", - Destination: "/proc", - Device: "proc", - Flags: defaultMountFlags, - }, - { - Source: "tmpfs", - Destination: "/dev", - Device: "tmpfs", - Flags: unix.MS_NOSUID | unix.MS_STRICTATIME, - Data: "mode=755", - }, - { - Source: "devpts", - Destination: "/dev/pts", - Device: "devpts", - Flags: unix.MS_NOSUID | unix.MS_NOEXEC, - Data: "newinstance,ptmxmode=0666,mode=0620,gid=5", - }, - { - Device: "tmpfs", - Source: "shm", - Destination: "/dev/shm", - Data: "mode=1777,size=65536k", - Flags: defaultMountFlags, - }, - { - Source: "mqueue", - Destination: "/dev/mqueue", - Device: "mqueue", - Flags: defaultMountFlags, - }, - { - Source: "sysfs", - Destination: "/sys", - Device: "sysfs", - Flags: defaultMountFlags | unix.MS_RDONLY, - }, - }, - UidMappings: []configs.IDMap{ - { - ContainerID: 0, - HostID: 1000, - Size: 65536, - }, - }, - GidMappings: []configs.IDMap{ - { - ContainerID: 0, - HostID: 1000, - Size: 65536, - }, - }, - Networks: []*configs.Network{ - { - Type: "loopback", - Address: "127.0.0.1/0", - Gateway: "localhost", - }, - }, - Rlimits: []configs.Rlimit{ - { - Type: unix.RLIMIT_NOFILE, - Hard: uint64(1025), - Soft: uint64(1025), - }, - }, -} -``` - -Once you have the configuration populated you can create a container: - -```go -container, err := factory.Create("container-id", config) -if err != nil { - logrus.Fatal(err) - return -} -``` - -To spawn bash as the initial process inside the container and have the -processes pid returned in order to wait, signal, or kill the process: - -```go -process := &libcontainer.Process{ - Args: []string{"/bin/bash"}, - Env: []string{"PATH=/bin"}, - User: "daemon", - Stdin: os.Stdin, - Stdout: os.Stdout, - Stderr: os.Stderr, - Init: true, -} - -err := container.Run(process) -if err != nil { - container.Destroy() - logrus.Fatal(err) - return -} - -// wait for the process to finish. -_, err := process.Wait() -if err != nil { - logrus.Fatal(err) -} - -// destroy the container. -container.Destroy() -``` - -Additional ways to interact with a running container are: - -```go -// return all the pids for all processes running inside the container. -processes, err := container.Processes() - -// get detailed cpu, memory, io, and network statistics for the container and -// it's processes. -stats, err := container.Stats() - -// pause all processes inside the container. -container.Pause() - -// resume all paused processes. -container.Resume() - -// send signal to container's init process. -container.Signal(signal) - -// update container resource constraints. -container.Set(config) - -// get current status of the container. -status, err := container.Status() - -// get current container's state information. -state, err := container.State() -``` - - -#### Checkpoint & Restore - -libcontainer now integrates [CRIU](http://criu.org/) for checkpointing and restoring containers. -This lets you save the state of a process running inside a container to disk, and then restore -that state into a new process, on the same machine or on another machine. - -`criu` version 1.5.2 or higher is required to use checkpoint and restore. -If you don't already have `criu` installed, you can build it from source, following the -[online instructions](http://criu.org/Installation). `criu` is also installed in the docker image -generated when building libcontainer with docker. - - -## Copyright and license - -Code and documentation copyright 2014 Docker, inc. -The code and documentation are released under the [Apache 2.0 license](../LICENSE). -The documentation is also released under Creative Commons Attribution 4.0 International License. -You may obtain a copy of the license, titled CC-BY-4.0, at http://creativecommons.org/licenses/by/4.0/. diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/SPEC.md b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/SPEC.md deleted file mode 100644 index 07ebdc1215..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/SPEC.md +++ /dev/null @@ -1,465 +0,0 @@ -## Container Specification - v1 - -This is the standard configuration for version 1 containers. It includes -namespaces, standard filesystem setup, a default Linux capability set, and -information about resource reservations. It also has information about any -populated environment settings for the processes running inside a container. - -Along with the configuration of how a container is created the standard also -discusses actions that can be performed on a container to manage and inspect -information about the processes running inside. - -The v1 profile is meant to be able to accommodate the majority of applications -with a strong security configuration. - -### System Requirements and Compatibility - -Minimum requirements: -* Kernel version - 3.10 recommended 2.6.2x minimum(with backported patches) -* Mounted cgroups with each subsystem in its own hierarchy - - -### Namespaces - -| Flag | Enabled | -| --------------- | ------- | -| CLONE_NEWPID | 1 | -| CLONE_NEWUTS | 1 | -| CLONE_NEWIPC | 1 | -| CLONE_NEWNET | 1 | -| CLONE_NEWNS | 1 | -| CLONE_NEWUSER | 1 | -| CLONE_NEWCGROUP | 1 | - -Namespaces are created for the container via the `unshare` syscall. - - -### Filesystem - -A root filesystem must be provided to a container for execution. The container -will use this root filesystem (rootfs) to jail and spawn processes inside where -the binaries and system libraries are local to that directory. Any binaries -to be executed must be contained within this rootfs. - -Mounts that happen inside the container are automatically cleaned up when the -container exits as the mount namespace is destroyed and the kernel will -unmount all the mounts that were setup within that namespace. - -For a container to execute properly there are certain filesystems that -are required to be mounted within the rootfs that the runtime will setup. - -| Path | Type | Flags | Data | -| ----------- | ------ | -------------------------------------- | ---------------------------------------- | -| /proc | proc | MS_NOEXEC,MS_NOSUID,MS_NODEV | | -| /dev | tmpfs | MS_NOEXEC,MS_STRICTATIME | mode=755 | -| /dev/shm | tmpfs | MS_NOEXEC,MS_NOSUID,MS_NODEV | mode=1777,size=65536k | -| /dev/mqueue | mqueue | MS_NOEXEC,MS_NOSUID,MS_NODEV | | -| /dev/pts | devpts | MS_NOEXEC,MS_NOSUID | newinstance,ptmxmode=0666,mode=620,gid=5 | -| /sys | sysfs | MS_NOEXEC,MS_NOSUID,MS_NODEV,MS_RDONLY | | - - -After a container's filesystems are mounted within the newly created -mount namespace `/dev` will need to be populated with a set of device nodes. -It is expected that a rootfs does not need to have any device nodes specified -for `/dev` within the rootfs as the container will setup the correct devices -that are required for executing a container's process. - -| Path | Mode | Access | -| ------------ | ---- | ---------- | -| /dev/null | 0666 | rwm | -| /dev/zero | 0666 | rwm | -| /dev/full | 0666 | rwm | -| /dev/tty | 0666 | rwm | -| /dev/random | 0666 | rwm | -| /dev/urandom | 0666 | rwm | - - -**ptmx** -`/dev/ptmx` will need to be a symlink to the host's `/dev/ptmx` within -the container. - -The use of a pseudo TTY is optional within a container and it should support both. -If a pseudo is provided to the container `/dev/console` will need to be -setup by binding the console in `/dev/` after it has been populated and mounted -in tmpfs. - -| Source | Destination | UID GID | Mode | Type | -| --------------- | ------------ | ------- | ---- | ---- | -| *pty host path* | /dev/console | 0 0 | 0600 | bind | - - -After `/dev/null` has been setup we check for any external links between -the container's io, STDIN, STDOUT, STDERR. If the container's io is pointing -to `/dev/null` outside the container we close and `dup2` the `/dev/null` -that is local to the container's rootfs. - - -After the container has `/proc` mounted a few standard symlinks are setup -within `/dev/` for the io. - -| Source | Destination | -| --------------- | ----------- | -| /proc/self/fd | /dev/fd | -| /proc/self/fd/0 | /dev/stdin | -| /proc/self/fd/1 | /dev/stdout | -| /proc/self/fd/2 | /dev/stderr | - -A `pivot_root` is used to change the root for the process, effectively -jailing the process inside the rootfs. - -```c -put_old = mkdir(...); -pivot_root(rootfs, put_old); -chdir("/"); -unmount(put_old, MS_DETACH); -rmdir(put_old); -``` - -For container's running with a rootfs inside `ramfs` a `MS_MOVE` combined -with a `chroot` is required as `pivot_root` is not supported in `ramfs`. - -```c -mount(rootfs, "/", NULL, MS_MOVE, NULL); -chroot("."); -chdir("/"); -``` - -The `umask` is set back to `0022` after the filesystem setup has been completed. - -### Resources - -Cgroups are used to handle resource allocation for containers. This includes -system resources like cpu, memory, and device access. - -| Subsystem | Enabled | -| ---------- | ------- | -| devices | 1 | -| memory | 1 | -| cpu | 1 | -| cpuacct | 1 | -| cpuset | 1 | -| blkio | 1 | -| perf_event | 1 | -| freezer | 1 | -| hugetlb | 1 | -| pids | 1 | - - -All cgroup subsystem are joined so that statistics can be collected from -each of the subsystems. Freezer does not expose any stats but is joined -so that containers can be paused and resumed. - -The parent process of the container's init must place the init pid inside -the correct cgroups before the initialization begins. This is done so -that no processes or threads escape the cgroups. This sync is -done via a pipe ( specified in the runtime section below ) that the container's -init process will block waiting for the parent to finish setup. - -### IntelRdt - -Intel platforms with new Xeon CPU support Resource Director Technology (RDT). -Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are -two sub-features of RDT. - -Cache Allocation Technology (CAT) provides a way for the software to restrict -cache allocation to a defined 'subset' of L3 cache which may be overlapping -with other 'subsets'. The different subsets are identified by class of -service (CLOS) and each CLOS has a capacity bitmask (CBM). - -Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle -over memory bandwidth for the software. A user controls the resource by -indicating the percentage of maximum memory bandwidth or memory bandwidth limit -in MBps unit if MBA Software Controller is enabled. - -It can be used to handle L3 cache and memory bandwidth resources allocation -for containers if hardware and kernel support Intel RDT CAT and MBA features. - -In Linux 4.10 kernel or newer, the interface is defined and exposed via -"resource control" filesystem, which is a "cgroup-like" interface. - -Comparing with cgroups, it has similar process management lifecycle and -interfaces in a container. But unlike cgroups' hierarchy, it has single level -filesystem layout. - -CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via -"resource control" filesystem. - -Intel RDT "resource control" filesystem hierarchy: -``` -mount -t resctrl resctrl /sys/fs/resctrl -tree /sys/fs/resctrl -/sys/fs/resctrl/ -|-- info -| |-- L3 -| | |-- cbm_mask -| | |-- min_cbm_bits -| | |-- num_closids -| |-- MB -| |-- bandwidth_gran -| |-- delay_linear -| |-- min_bandwidth -| |-- num_closids -|-- ... -|-- schemata -|-- tasks -|-- - |-- ... - |-- schemata - |-- tasks -``` - -For runc, we can make use of `tasks` and `schemata` configuration for L3 -cache and memory bandwidth resources constraints. - -The file `tasks` has a list of tasks that belongs to this group (e.g., -" group). Tasks can be added to a group by writing the task ID -to the "tasks" file (which will automatically remove them from the previous -group to which they belonged). New tasks created by fork(2) and clone(2) are -added to the same group as their parent. - -The file `schemata` has a list of all the resources available to this group. -Each resource (L3 cache, memory bandwidth) has its own line and format. - -L3 cache schema: -It has allocation bitmasks/values for L3 cache on each socket, which -contains L3 cache id and capacity bitmask (CBM). -``` - Format: "L3:=;=;..." -``` -For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0" -which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. - -The valid L3 cache CBM is a *contiguous bits set* and number of bits that can -be set is less than the max bit. The max bits in the CBM is varied among -supported Intel CPU models. Kernel will check if it is valid when writing. -e.g., default value 0xfffff in root indicates the max bits of CBM is 20 -bits, which mapping to entire L3 cache capacity. Some valid CBM values to -set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. - -Memory bandwidth schema: -It has allocation values for memory bandwidth on each socket, which contains -L3 cache id and memory bandwidth. -``` - Format: "MB:=bandwidth0;=bandwidth1;..." -``` -For example, on a two-socket machine, the schema line could be "MB:0=20;1=70" - -The minimum bandwidth percentage value for each CPU model is predefined and -can be looked up through "info/MB/min_bandwidth". The bandwidth granularity -that is allocated is also dependent on the CPU model and can be looked up at -"info/MB/bandwidth_gran". The available bandwidth control steps are: -min_bw + N * bw_gran. Intermediate values are rounded to the next control -step available on the hardware. - -If MBA Software Controller is enabled through mount option "-o mba_MBps" -mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl -We could specify memory bandwidth in "MBps" (Mega Bytes per second) unit -instead of "percentages". The kernel underneath would use a software feedback -mechanism or a "Software Controller" which reads the actual bandwidth using -MBM counters and adjust the memory bandwidth percentages to ensure: -"actual memory bandwidth < user specified memory bandwidth". - -For example, on a two-socket machine, the schema line could be -"MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on socket 0 -and 7000 MBps memory bandwidth limit on socket 1. - -For more information about Intel RDT kernel interface: -https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt - -``` -An example for runc: -Consider a two-socket machine with two L3 caches where the default CBM is -0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10% -with a memory bandwidth granularity of 10%. - -Tasks inside the container only have access to the "upper" 7/11 of L3 cache -on socket 0 and the "lower" 5/11 L3 cache on socket 1, and may use a -maximum memory bandwidth of 20% on socket 0 and 70% on socket 1. - -"linux": { - "intelRdt": { - "closID": "guaranteed_group", - "l3CacheSchema": "L3:0=7f0;1=1f", - "memBwSchema": "MB:0=20;1=70" - } -} -``` - -### Security - -The standard set of Linux capabilities that are set in a container -provide a good default for security and flexibility for the applications. - - -| Capability | Enabled | -| -------------------- | ------- | -| CAP_NET_RAW | 1 | -| CAP_NET_BIND_SERVICE | 1 | -| CAP_AUDIT_READ | 1 | -| CAP_AUDIT_WRITE | 1 | -| CAP_DAC_OVERRIDE | 1 | -| CAP_SETFCAP | 1 | -| CAP_SETPCAP | 1 | -| CAP_SETGID | 1 | -| CAP_SETUID | 1 | -| CAP_MKNOD | 1 | -| CAP_CHOWN | 1 | -| CAP_FOWNER | 1 | -| CAP_FSETID | 1 | -| CAP_KILL | 1 | -| CAP_SYS_CHROOT | 1 | -| CAP_NET_BROADCAST | 0 | -| CAP_SYS_MODULE | 0 | -| CAP_SYS_RAWIO | 0 | -| CAP_SYS_PACCT | 0 | -| CAP_SYS_ADMIN | 0 | -| CAP_SYS_NICE | 0 | -| CAP_SYS_RESOURCE | 0 | -| CAP_SYS_TIME | 0 | -| CAP_SYS_TTY_CONFIG | 0 | -| CAP_AUDIT_CONTROL | 0 | -| CAP_MAC_OVERRIDE | 0 | -| CAP_MAC_ADMIN | 0 | -| CAP_NET_ADMIN | 0 | -| CAP_SYSLOG | 0 | -| CAP_DAC_READ_SEARCH | 0 | -| CAP_LINUX_IMMUTABLE | 0 | -| CAP_IPC_LOCK | 0 | -| CAP_IPC_OWNER | 0 | -| CAP_SYS_PTRACE | 0 | -| CAP_SYS_BOOT | 0 | -| CAP_LEASE | 0 | -| CAP_WAKE_ALARM | 0 | -| CAP_BLOCK_SUSPEND | 0 | - - -Additional security layers like [apparmor](https://wiki.ubuntu.com/AppArmor) -and [selinux](http://selinuxproject.org/page/Main_Page) can be used with -the containers. A container should support setting an apparmor profile or -selinux process and mount labels if provided in the configuration. - -Standard apparmor profile: -```c -#include -profile flags=(attach_disconnected,mediate_deleted) { - #include - network, - capability, - file, - umount, - - deny @{PROC}/sys/fs/** wklx, - deny @{PROC}/sysrq-trigger rwklx, - deny @{PROC}/mem rwklx, - deny @{PROC}/kmem rwklx, - deny @{PROC}/sys/kernel/[^s][^h][^m]* wklx, - deny @{PROC}/sys/kernel/*/** wklx, - - deny mount, - - deny /sys/[^f]*/** wklx, - deny /sys/f[^s]*/** wklx, - deny /sys/fs/[^c]*/** wklx, - deny /sys/fs/c[^g]*/** wklx, - deny /sys/fs/cg[^r]*/** wklx, - deny /sys/firmware/efi/efivars/** rwklx, - deny /sys/kernel/security/** rwklx, -} -``` - -*TODO: seccomp work is being done to find a good default config* - -### Runtime and Init Process - -During container creation the parent process needs to talk to the container's init -process and have a form of synchronization. This is accomplished by creating -a pipe that is passed to the container's init. When the init process first spawns -it will block on its side of the pipe until the parent closes its side. This -allows the parent to have time to set the new process inside a cgroup hierarchy -and/or write any uid/gid mappings required for user namespaces. -The pipe is passed to the init process via FD 3. - -The application consuming libcontainer should be compiled statically. libcontainer -does not define any init process and the arguments provided are used to `exec` the -process inside the application. There should be no long running init within the -container spec. - -If a pseudo tty is provided to a container it will open and `dup2` the console -as the container's STDIN, STDOUT, STDERR as well as mounting the console -as `/dev/console`. - -An extra set of mounts are provided to a container and setup for use. A container's -rootfs can contain some non portable files inside that can cause side effects during -execution of a process. These files are usually created and populated with the container -specific information via the runtime. - -**Extra runtime files:** -* /etc/hosts -* /etc/resolv.conf -* /etc/hostname -* /etc/localtime - - -#### Defaults - -There are a few defaults that can be overridden by users, but in their omission -these apply to processes within a container. - -| Type | Value | -| ------------------- | ------------------------------ | -| Parent Death Signal | SIGKILL | -| UID | 0 | -| GID | 0 | -| GROUPS | 0, NULL | -| CWD | "/" | -| $HOME | Current user's home dir or "/" | -| Readonly rootfs | false | -| Pseudo TTY | false | - - -## Actions - -After a container is created there is a standard set of actions that can -be done to the container. These actions are part of the public API for -a container. - -| Action | Description | -| -------------- | ------------------------------------------------------------------ | -| Get processes | Return all the pids for processes running inside a container | -| Get Stats | Return resource statistics for the container as a whole | -| Wait | Waits on the container's init process ( pid 1 ) | -| Wait Process | Wait on any of the container's processes returning the exit status | -| Destroy | Kill the container's init process and remove any filesystem state | -| Signal | Send a signal to the container's init process | -| Signal Process | Send a signal to any of the container's processes | -| Pause | Pause all processes inside the container | -| Resume | Resume all processes inside the container if paused | -| Exec | Execute a new process inside of the container ( requires setns ) | -| Set | Setup configs of the container after it's created | - -### Execute a new process inside of a running container - -User can execute a new process inside of a running container. Any binaries to be -executed must be accessible within the container's rootfs. - -The started process will run inside the container's rootfs. Any changes -made by the process to the container's filesystem will persist after the -process finished executing. - -The started process will join all the container's existing namespaces. When the -container is paused, the process will also be paused and will resume when -the container is unpaused. The started process will only run when the container's -primary process (PID 1) is running, and will not be restarted when the container -is restarted. - -#### Planned additions - -The started process will have its own cgroups nested inside the container's -cgroups. This is used for process tracking and optionally resource allocation -handling for the new process. Freezer cgroup is required, the rest of the cgroups -are optional. The process executor must place its pid inside the correct -cgroups before starting the process. This is done so that no child processes or -threads can escape the cgroups. - -When the process is stopped, the process executor will try (in a best-effort way) -to stop all its children and remove the sub-cgroups. diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go deleted file mode 100644 index 4b03d4c715..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor.go +++ /dev/null @@ -1,16 +0,0 @@ -package apparmor - -import "errors" - -var ( - // IsEnabled returns true if apparmor is enabled for the host. - IsEnabled = isEnabled - - // ApplyProfile will apply the profile with the specified name to the process after - // the next exec. It is only supported on Linux and produces an ErrApparmorNotEnabled - // on other platforms. - ApplyProfile = applyProfile - - // ErrApparmorNotEnabled indicates that AppArmor is not enabled or not supported. - ErrApparmorNotEnabled = errors.New("apparmor: config provided but apparmor not supported") -) diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go deleted file mode 100644 index 744d4e5705..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_linux.go +++ /dev/null @@ -1,69 +0,0 @@ -package apparmor - -import ( - "errors" - "fmt" - "io/ioutil" - "os" - "sync" - - "github.com/opencontainers/runc/libcontainer/utils" -) - -var ( - appArmorEnabled bool - checkAppArmor sync.Once -) - -// isEnabled returns true if apparmor is enabled for the host. -func isEnabled() bool { - checkAppArmor.Do(func() { - if _, err := os.Stat("/sys/kernel/security/apparmor"); err == nil { - buf, err := ioutil.ReadFile("/sys/module/apparmor/parameters/enabled") - appArmorEnabled = err == nil && len(buf) > 1 && buf[0] == 'Y' - } - }) - return appArmorEnabled -} - -func setProcAttr(attr, value string) error { - // Under AppArmor you can only change your own attr, so use /proc/self/ - // instead of /proc// like libapparmor does - attrPath := "/proc/self/attr/apparmor/" + attr - if _, err := os.Stat(attrPath); errors.Is(err, os.ErrNotExist) { - // fall back to the old convention - attrPath = "/proc/self/attr/" + attr - } - - f, err := os.OpenFile(attrPath, os.O_WRONLY, 0) - if err != nil { - return err - } - defer f.Close() - - if err := utils.EnsureProcHandle(f); err != nil { - return err - } - - _, err = f.WriteString(value) - return err -} - -// changeOnExec reimplements aa_change_onexec from libapparmor in Go -func changeOnExec(name string) error { - if err := setProcAttr("exec", "exec "+name); err != nil { - return fmt.Errorf("apparmor failed to apply profile: %s", err) - } - return nil -} - -// applyProfile will apply the profile with the specified name to the process after -// the next exec. It is only supported on Linux and produces an error on other -// platforms. -func applyProfile(name string) error { - if name == "" { - return nil - } - - return changeOnExec(name) -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go deleted file mode 100644 index 1adadafec8..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go +++ /dev/null @@ -1,14 +0,0 @@ -// +build !linux - -package apparmor - -func isEnabled() bool { - return false -} - -func applyProfile(name string) error { - if name != "" { - return ErrApparmorNotEnabled - } - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities.go deleted file mode 100644 index 1099d32b14..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities.go +++ /dev/null @@ -1,111 +0,0 @@ -// +build linux - -package capabilities - -import ( - "sort" - "strings" - - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/sirupsen/logrus" - "github.com/syndtr/gocapability/capability" -) - -const allCapabilityTypes = capability.CAPS | capability.BOUNDING | capability.AMBIENT - -var ( - capabilityMap map[string]capability.Cap - capTypes = []capability.CapType{ - capability.BOUNDING, - capability.PERMITTED, - capability.INHERITABLE, - capability.EFFECTIVE, - capability.AMBIENT, - } -) - -func init() { - capabilityMap = make(map[string]capability.Cap, capability.CAP_LAST_CAP+1) - for _, c := range capability.List() { - if c > capability.CAP_LAST_CAP { - continue - } - capabilityMap["CAP_"+strings.ToUpper(c.String())] = c - } -} - -// New creates a new Caps from the given Capabilities config. Unknown Capabilities -// or Capabilities that are unavailable in the current environment are ignored, -// printing a warning instead. -func New(capConfig *configs.Capabilities) (*Caps, error) { - var ( - err error - c Caps - ) - - unknownCaps := make(map[string]struct{}) - c.caps = map[capability.CapType][]capability.Cap{ - capability.BOUNDING: capSlice(capConfig.Bounding, unknownCaps), - capability.EFFECTIVE: capSlice(capConfig.Effective, unknownCaps), - capability.INHERITABLE: capSlice(capConfig.Inheritable, unknownCaps), - capability.PERMITTED: capSlice(capConfig.Permitted, unknownCaps), - capability.AMBIENT: capSlice(capConfig.Ambient, unknownCaps), - } - if c.pid, err = capability.NewPid2(0); err != nil { - return nil, err - } - if err = c.pid.Load(); err != nil { - return nil, err - } - if len(unknownCaps) > 0 { - logrus.Warn("ignoring unknown or unavailable capabilities: ", mapKeys(unknownCaps)) - } - return &c, nil -} - -// capSlice converts the slice of capability names in caps, to their numeric -// equivalent, and returns them as a slice. Unknown or unavailable capabilities -// are not returned, but appended to unknownCaps. -func capSlice(caps []string, unknownCaps map[string]struct{}) []capability.Cap { - var out []capability.Cap - for _, c := range caps { - if v, ok := capabilityMap[c]; !ok { - unknownCaps[c] = struct{}{} - } else { - out = append(out, v) - } - } - return out -} - -// mapKeys returns the keys of input in sorted order -func mapKeys(input map[string]struct{}) []string { - var keys []string - for c := range input { - keys = append(keys, c) - } - sort.Strings(keys) - return keys -} - -// Caps holds the capabilities for a container. -type Caps struct { - pid capability.Capabilities - caps map[capability.CapType][]capability.Cap -} - -// ApplyBoundingSet sets the capability bounding set to those specified in the whitelist. -func (c *Caps) ApplyBoundingSet() error { - c.pid.Clear(capability.BOUNDING) - c.pid.Set(capability.BOUNDING, c.caps[capability.BOUNDING]...) - return c.pid.Apply(capability.BOUNDING) -} - -// Apply sets all the capabilities for the current process in the config. -func (c *Caps) ApplyCaps() error { - c.pid.Clear(allCapabilityTypes) - for _, g := range capTypes { - c.pid.Set(g, c.caps[g]...) - } - return c.pid.Apply(allCapabilityTypes) -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities_unsupported.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities_unsupported.go deleted file mode 100644 index a3e82ac1fd..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/capabilities/capabilities_unsupported.go +++ /dev/null @@ -1,3 +0,0 @@ -// +build !linux - -package capabilities diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go deleted file mode 100644 index 68a346ca53..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go +++ /dev/null @@ -1,61 +0,0 @@ -// +build linux - -package cgroups - -import ( - "github.com/opencontainers/runc/libcontainer/configs" -) - -type Manager interface { - // Apply creates a cgroup, if not yet created, and adds a process - // with the specified pid into that cgroup. A special value of -1 - // can be used to merely create a cgroup. - Apply(pid int) error - - // GetPids returns the PIDs of all processes inside the cgroup. - GetPids() ([]int, error) - - // GetAllPids returns the PIDs of all processes inside the cgroup - // any all its sub-cgroups. - GetAllPids() ([]int, error) - - // GetStats returns cgroups statistics. - GetStats() (*Stats, error) - - // Freeze sets the freezer cgroup to the specified state. - Freeze(state configs.FreezerState) error - - // Destroy removes cgroup. - Destroy() error - - // Path returns a cgroup path to the specified controller/subsystem. - // For cgroupv2, the argument is unused and can be empty. - Path(string) string - - // Set sets cgroup resources parameters/limits. If the argument is nil, - // the resources specified during Manager creation (or the previous call - // to Set) are used. - Set(r *configs.Resources) error - - // GetPaths returns cgroup path(s) to save in a state file in order to - // restore later. - // - // For cgroup v1, a key is cgroup subsystem name, and the value is the - // path to the cgroup for this subsystem. - // - // For cgroup v2 unified hierarchy, a key is "", and the value is the - // unified path. - GetPaths() map[string]string - - // GetCgroups returns the cgroup data as configured. - GetCgroups() (*configs.Cgroup, error) - - // GetFreezerState retrieves the current FreezerState of the cgroup. - GetFreezerState() (configs.FreezerState, error) - - // Exists returns whether the cgroup path exists or not. - Exists() bool - - // OOMKillCount reports OOM kill count for the cgroup. - OOMKillCount() (uint64, error) -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go deleted file mode 100644 index 278d507e28..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups_unsupported.go +++ /dev/null @@ -1,3 +0,0 @@ -// +build !linux - -package cgroups diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go deleted file mode 100644 index c08477cbb3..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/devices/devices_emulator.go +++ /dev/null @@ -1,382 +0,0 @@ -// +build linux - -// SPDX-License-Identifier: Apache-2.0 -/* - * Copyright (C) 2020 Aleksa Sarai - * Copyright (C) 2020 SUSE LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package devices - -import ( - "bufio" - "io" - "regexp" - "sort" - "strconv" - - "github.com/opencontainers/runc/libcontainer/devices" - - "github.com/pkg/errors" -) - -// deviceMeta is a Rule without the Allow or Permissions fields, and no -// wildcard-type support. It's effectively the "match" portion of a metadata -// rule, for the purposes of our emulation. -type deviceMeta struct { - node devices.Type - major int64 - minor int64 -} - -// deviceRule is effectively the tuple (deviceMeta, Permissions). -type deviceRule struct { - meta deviceMeta - perms devices.Permissions -} - -// deviceRules is a mapping of device metadata rules to the associated -// permissions in the ruleset. -type deviceRules map[deviceMeta]devices.Permissions - -func (r deviceRules) orderedEntries() []deviceRule { - var rules []deviceRule - for meta, perms := range r { - rules = append(rules, deviceRule{meta: meta, perms: perms}) - } - sort.Slice(rules, func(i, j int) bool { - // Sort by (major, minor, type). - a, b := rules[i].meta, rules[j].meta - return a.major < b.major || - (a.major == b.major && a.minor < b.minor) || - (a.major == b.major && a.minor == b.minor && a.node < b.node) - }) - return rules -} - -type Emulator struct { - defaultAllow bool - rules deviceRules -} - -func (e *Emulator) IsBlacklist() bool { - return e.defaultAllow -} - -func (e *Emulator) IsAllowAll() bool { - return e.IsBlacklist() && len(e.rules) == 0 -} - -var devicesListRegexp = regexp.MustCompile(`^([abc])\s+(\d+|\*):(\d+|\*)\s+([rwm]+)$`) - -func parseLine(line string) (*deviceRule, error) { - matches := devicesListRegexp.FindStringSubmatch(line) - if matches == nil { - return nil, errors.Errorf("line doesn't match devices.list format") - } - var ( - rule deviceRule - node = matches[1] - major = matches[2] - minor = matches[3] - perms = matches[4] - ) - - // Parse the node type. - switch node { - case "a": - // Super-special case -- "a" always means every device with every - // access mode. In fact, for devices.list this actually indicates that - // the cgroup is in black-list mode. - // TODO: Double-check that the entire file is "a *:* rwm". - return nil, nil - case "b": - rule.meta.node = devices.BlockDevice - case "c": - rule.meta.node = devices.CharDevice - default: - // Should never happen! - return nil, errors.Errorf("unknown device type %q", node) - } - - // Parse the major number. - if major == "*" { - rule.meta.major = devices.Wildcard - } else { - val, err := strconv.ParseUint(major, 10, 32) - if err != nil { - return nil, errors.Wrap(err, "parse major number") - } - rule.meta.major = int64(val) - } - - // Parse the minor number. - if minor == "*" { - rule.meta.minor = devices.Wildcard - } else { - val, err := strconv.ParseUint(minor, 10, 32) - if err != nil { - return nil, errors.Wrap(err, "parse minor number") - } - rule.meta.minor = int64(val) - } - - // Parse the access permissions. - rule.perms = devices.Permissions(perms) - if !rule.perms.IsValid() || rule.perms.IsEmpty() { - // Should never happen! - return nil, errors.Errorf("parse access mode: contained unknown modes or is empty: %q", perms) - } - return &rule, nil -} - -func (e *Emulator) addRule(rule deviceRule) error { - if e.rules == nil { - e.rules = make(map[deviceMeta]devices.Permissions) - } - - // Merge with any pre-existing permissions. - oldPerms := e.rules[rule.meta] - newPerms := rule.perms.Union(oldPerms) - e.rules[rule.meta] = newPerms - return nil -} - -func (e *Emulator) rmRule(rule deviceRule) error { - // Give an error if any of the permissions requested to be removed are - // present in a partially-matching wildcard rule, because such rules will - // be ignored by cgroupv1. - // - // This is a diversion from cgroupv1, but is necessary to avoid leading - // users into a false sense of security. cgroupv1 will silently(!) ignore - // requests to remove partial exceptions, but we really shouldn't do that. - // - // It may seem like we could just "split" wildcard rules which hit this - // issue, but unfortunately there are 2^32 possible major and minor - // numbers, which would exhaust kernel memory quickly if we did this. Not - // to mention it'd be really slow (the kernel side is implemented as a - // linked-list of exceptions). - for _, partialMeta := range []deviceMeta{ - {node: rule.meta.node, major: devices.Wildcard, minor: rule.meta.minor}, - {node: rule.meta.node, major: rule.meta.major, minor: devices.Wildcard}, - {node: rule.meta.node, major: devices.Wildcard, minor: devices.Wildcard}, - } { - // This wildcard rule is equivalent to the requested rule, so skip it. - if rule.meta == partialMeta { - continue - } - // Only give an error if the set of permissions overlap. - partialPerms := e.rules[partialMeta] - if !partialPerms.Intersection(rule.perms).IsEmpty() { - return errors.Errorf("requested rule [%v %v] not supported by devices cgroupv1 (cannot punch hole in existing wildcard rule [%v %v])", rule.meta, rule.perms, partialMeta, partialPerms) - } - } - - // Subtract all of the permissions listed from the full match rule. If the - // rule didn't exist, all of this is a no-op. - newPerms := e.rules[rule.meta].Difference(rule.perms) - if newPerms.IsEmpty() { - delete(e.rules, rule.meta) - } else { - e.rules[rule.meta] = newPerms - } - // TODO: The actual cgroup code doesn't care if an exception didn't exist - // during removal, so not erroring out here is /accurate/ but quite - // worrying. Maybe we should do additional validation, but again we - // have to worry about backwards-compatibility. - return nil -} - -func (e *Emulator) allow(rule *deviceRule) error { - // This cgroup is configured as a black-list. Reset the entire emulator, - // and put is into black-list mode. - if rule == nil || rule.meta.node == devices.WildcardDevice { - *e = Emulator{ - defaultAllow: true, - rules: nil, - } - return nil - } - - var err error - if e.defaultAllow { - err = errors.Wrap(e.rmRule(*rule), "remove 'deny' exception") - } else { - err = errors.Wrap(e.addRule(*rule), "add 'allow' exception") - } - return err -} - -func (e *Emulator) deny(rule *deviceRule) error { - // This cgroup is configured as a white-list. Reset the entire emulator, - // and put is into white-list mode. - if rule == nil || rule.meta.node == devices.WildcardDevice { - *e = Emulator{ - defaultAllow: false, - rules: nil, - } - return nil - } - - var err error - if e.defaultAllow { - err = errors.Wrap(e.addRule(*rule), "add 'deny' exception") - } else { - err = errors.Wrap(e.rmRule(*rule), "remove 'allow' exception") - } - return err -} - -func (e *Emulator) Apply(rule devices.Rule) error { - if !rule.Type.CanCgroup() { - return errors.Errorf("cannot add rule [%#v] with non-cgroup type %q", rule, rule.Type) - } - - innerRule := &deviceRule{ - meta: deviceMeta{ - node: rule.Type, - major: rule.Major, - minor: rule.Minor, - }, - perms: rule.Permissions, - } - if innerRule.meta.node == devices.WildcardDevice { - innerRule = nil - } - - if rule.Allow { - return e.allow(innerRule) - } - - return e.deny(innerRule) -} - -// EmulatorFromList takes a reader to a "devices.list"-like source, and returns -// a new Emulator that represents the state of the devices cgroup. Note that -// black-list devices cgroups cannot be fully reconstructed, due to limitations -// in the devices cgroup API. Instead, such cgroups are always treated as -// "allow all" cgroups. -func EmulatorFromList(list io.Reader) (*Emulator, error) { - // Normally cgroups are in black-list mode by default, but the way we - // figure out the current mode is whether or not devices.list has an - // allow-all rule. So we default to a white-list, and the existence of an - // "a *:* rwm" entry will tell us otherwise. - e := &Emulator{ - defaultAllow: false, - } - - // Parse the "devices.list". - s := bufio.NewScanner(list) - for s.Scan() { - line := s.Text() - deviceRule, err := parseLine(line) - if err != nil { - return nil, errors.Wrapf(err, "parsing line %q", line) - } - // "devices.list" is an allow list. Note that this means that in - // black-list mode, we have no idea what rules are in play. As a - // result, we need to be very careful in Transition(). - if err := e.allow(deviceRule); err != nil { - return nil, errors.Wrapf(err, "adding devices.list rule") - } - } - if err := s.Err(); err != nil { - return nil, errors.Wrap(err, "reading devices.list lines") - } - return e, nil -} - -// Transition calculates what is the minimally-disruptive set of rules need to -// be applied to a devices cgroup in order to transition to the given target. -// This means that any already-existing rules will not be applied, and -// disruptive rules (like denying all device access) will only be applied if -// necessary. -// -// This function is the sole reason for all of Emulator -- to allow us -// to figure out how to update a containers' cgroups without causing spurrious -// device errors (if possible). -func (source *Emulator) Transition(target *Emulator) ([]*devices.Rule, error) { - var transitionRules []*devices.Rule - oldRules := source.rules - - // If the default policy doesn't match, we need to include a "disruptive" - // rule (either allow-all or deny-all) in order to switch the cgroup to the - // correct default policy. - // - // However, due to a limitation in "devices.list" we cannot be sure what - // deny rules are in place in a black-list cgroup. Thus if the source is a - // black-list we also have to include a disruptive rule. - if source.IsBlacklist() || source.defaultAllow != target.defaultAllow { - transitionRules = append(transitionRules, &devices.Rule{ - Type: 'a', - Major: -1, - Minor: -1, - Permissions: devices.Permissions("rwm"), - Allow: target.defaultAllow, - }) - // The old rules are only relevant if we aren't starting out with a - // disruptive rule. - oldRules = nil - } - - // NOTE: We traverse through the rules in a sorted order so we always write - // the same set of rules (this is to aid testing). - - // First, we create inverse rules for any old rules not in the new set. - // This includes partial-inverse rules for specific permissions. This is a - // no-op if we added a disruptive rule, since oldRules will be empty. - for _, rule := range oldRules.orderedEntries() { - meta, oldPerms := rule.meta, rule.perms - newPerms := target.rules[meta] - droppedPerms := oldPerms.Difference(newPerms) - if !droppedPerms.IsEmpty() { - transitionRules = append(transitionRules, &devices.Rule{ - Type: meta.node, - Major: meta.major, - Minor: meta.minor, - Permissions: droppedPerms, - Allow: target.defaultAllow, - }) - } - } - - // Add any additional rules which weren't in the old set. We happen to - // filter out rules which are present in both sets, though this isn't - // strictly necessary. - for _, rule := range target.rules.orderedEntries() { - meta, newPerms := rule.meta, rule.perms - oldPerms := oldRules[meta] - gainedPerms := newPerms.Difference(oldPerms) - if !gainedPerms.IsEmpty() { - transitionRules = append(transitionRules, &devices.Rule{ - Type: meta.node, - Major: meta.major, - Minor: meta.minor, - Permissions: gainedPerms, - Allow: !target.defaultAllow, - }) - } - } - return transitionRules, nil -} - -// Rules returns the minimum set of rules necessary to convert a *deny-all* -// cgroup to the emulated filter state (note that this is not the same as a -// default cgroupv1 cgroup -- which is allow-all). This is effectively just a -// wrapper around Transition() with the source emulator being an empty cgroup. -func (e *Emulator) Rules() ([]*devices.Rule, error) { - defaultCgroup := &Emulator{defaultAllow: false} - return defaultCgroup.Transition(e) -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go deleted file mode 100644 index 96cbca3916..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter/devicefilter.go +++ /dev/null @@ -1,208 +0,0 @@ -// Package devicefilter contains eBPF device filter program -// -// The implementation is based on https://github.com/containers/crun/blob/0.10.2/src/libcrun/ebpf.c -// -// Although ebpf.c is originally licensed under LGPL-3.0-or-later, the author (Giuseppe Scrivano) -// agreed to relicense the file in Apache License 2.0: https://github.com/opencontainers/runc/issues/2144#issuecomment-543116397 -package devicefilter - -import ( - "math" - "strconv" - - "github.com/cilium/ebpf/asm" - devicesemulator "github.com/opencontainers/runc/libcontainer/cgroups/devices" - "github.com/opencontainers/runc/libcontainer/devices" - "github.com/pkg/errors" - "golang.org/x/sys/unix" -) - -const ( - // license string format is same as kernel MODULE_LICENSE macro - license = "Apache" -) - -// DeviceFilter returns eBPF device filter program and its license string -func DeviceFilter(rules []*devices.Rule) (asm.Instructions, string, error) { - // Generate the minimum ruleset for the device rules we are given. While we - // don't care about minimum transitions in cgroupv2, using the emulator - // gives us a guarantee that the behaviour of devices filtering is the same - // as cgroupv1, including security hardenings to avoid misconfiguration - // (such as punching holes in wildcard rules). - emu := new(devicesemulator.Emulator) - for _, rule := range rules { - if err := emu.Apply(*rule); err != nil { - return nil, "", err - } - } - cleanRules, err := emu.Rules() - if err != nil { - return nil, "", err - } - - p := &program{ - defaultAllow: emu.IsBlacklist(), - } - p.init() - - for idx, rule := range cleanRules { - if rule.Type == devices.WildcardDevice { - // We can safely skip over wildcard entries because there should - // only be one (at most) at the very start to instruct cgroupv1 to - // go into allow-list mode. However we do double-check this here. - if idx != 0 || rule.Allow != emu.IsBlacklist() { - return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had bad wildcard at idx %v (%s)", idx, rule.CgroupString()) - } - continue - } - if rule.Allow == p.defaultAllow { - // There should be no rules which have an action equal to the - // default action, the emulator removes those. - return nil, "", errors.Errorf("[internal error] emulated cgroupv2 devices ruleset had no-op rule at idx %v (%s)", idx, rule.CgroupString()) - } - if err := p.appendRule(rule); err != nil { - return nil, "", err - } - } - insts, err := p.finalize() - return insts, license, err -} - -type program struct { - insts asm.Instructions - defaultAllow bool - blockID int -} - -func (p *program) init() { - // struct bpf_cgroup_dev_ctx: https://elixir.bootlin.com/linux/v5.3.6/source/include/uapi/linux/bpf.h#L3423 - /* - u32 access_type - u32 major - u32 minor - */ - // R2 <- type (lower 16 bit of u32 access_type at R1[0]) - p.insts = append(p.insts, - asm.LoadMem(asm.R2, asm.R1, 0, asm.Word), - asm.And.Imm32(asm.R2, 0xFFFF)) - - // R3 <- access (upper 16 bit of u32 access_type at R1[0]) - p.insts = append(p.insts, - asm.LoadMem(asm.R3, asm.R1, 0, asm.Word), - // RSh: bitwise shift right - asm.RSh.Imm32(asm.R3, 16)) - - // R4 <- major (u32 major at R1[4]) - p.insts = append(p.insts, - asm.LoadMem(asm.R4, asm.R1, 4, asm.Word)) - - // R5 <- minor (u32 minor at R1[8]) - p.insts = append(p.insts, - asm.LoadMem(asm.R5, asm.R1, 8, asm.Word)) -} - -// appendRule rule converts an OCI rule to the relevant eBPF block and adds it -// to the in-progress filter program. In order to operate properly, it must be -// called with a "clean" rule list (generated by devices.Emulator.Rules() -- -// with any "a" rules removed). -func (p *program) appendRule(rule *devices.Rule) error { - if p.blockID < 0 { - return errors.New("the program is finalized") - } - - var bpfType int32 - switch rule.Type { - case devices.CharDevice: - bpfType = int32(unix.BPF_DEVCG_DEV_CHAR) - case devices.BlockDevice: - bpfType = int32(unix.BPF_DEVCG_DEV_BLOCK) - default: - // We do not permit 'a', nor any other types we don't know about. - return errors.Errorf("invalid type %q", string(rule.Type)) - } - if rule.Major > math.MaxUint32 { - return errors.Errorf("invalid major %d", rule.Major) - } - if rule.Minor > math.MaxUint32 { - return errors.Errorf("invalid minor %d", rule.Major) - } - hasMajor := rule.Major >= 0 // if not specified in OCI json, major is set to -1 - hasMinor := rule.Minor >= 0 - bpfAccess := int32(0) - for _, r := range rule.Permissions { - switch r { - case 'r': - bpfAccess |= unix.BPF_DEVCG_ACC_READ - case 'w': - bpfAccess |= unix.BPF_DEVCG_ACC_WRITE - case 'm': - bpfAccess |= unix.BPF_DEVCG_ACC_MKNOD - default: - return errors.Errorf("unknown device access %v", r) - } - } - // If the access is rwm, skip the check. - hasAccess := bpfAccess != (unix.BPF_DEVCG_ACC_READ | unix.BPF_DEVCG_ACC_WRITE | unix.BPF_DEVCG_ACC_MKNOD) - - var ( - blockSym = "block-" + strconv.Itoa(p.blockID) - nextBlockSym = "block-" + strconv.Itoa(p.blockID+1) - prevBlockLastIdx = len(p.insts) - 1 - ) - p.insts = append(p.insts, - // if (R2 != bpfType) goto next - asm.JNE.Imm(asm.R2, bpfType, nextBlockSym), - ) - if hasAccess { - p.insts = append(p.insts, - // if (R3 & bpfAccess != R3 /* use R1 as a temp var */) goto next - asm.Mov.Reg32(asm.R1, asm.R3), - asm.And.Imm32(asm.R1, bpfAccess), - asm.JNE.Reg(asm.R1, asm.R3, nextBlockSym), - ) - } - if hasMajor { - p.insts = append(p.insts, - // if (R4 != major) goto next - asm.JNE.Imm(asm.R4, int32(rule.Major), nextBlockSym), - ) - } - if hasMinor { - p.insts = append(p.insts, - // if (R5 != minor) goto next - asm.JNE.Imm(asm.R5, int32(rule.Minor), nextBlockSym), - ) - } - p.insts = append(p.insts, acceptBlock(rule.Allow)...) - // set blockSym to the first instruction we added in this iteration - p.insts[prevBlockLastIdx+1] = p.insts[prevBlockLastIdx+1].Sym(blockSym) - p.blockID++ - return nil -} - -func (p *program) finalize() (asm.Instructions, error) { - var v int32 - if p.defaultAllow { - v = 1 - } - blockSym := "block-" + strconv.Itoa(p.blockID) - p.insts = append(p.insts, - // R0 <- v - asm.Mov.Imm32(asm.R0, v).Sym(blockSym), - asm.Return(), - ) - p.blockID = -1 - return p.insts, nil -} - -func acceptBlock(accept bool) asm.Instructions { - var v int32 - if accept { - v = 1 - } - return []asm.Instruction{ - // R0 <- v - asm.Mov.Imm32(asm.R0, v), - asm.Return(), - } -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go deleted file mode 100644 index 6c8de80dd8..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/ebpf/ebpf_linux.go +++ /dev/null @@ -1,253 +0,0 @@ -package ebpf - -import ( - "fmt" - "os" - "runtime" - "sync" - "unsafe" - - "github.com/cilium/ebpf" - "github.com/cilium/ebpf/asm" - "github.com/cilium/ebpf/link" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -func nilCloser() error { - return nil -} - -func findAttachedCgroupDeviceFilters(dirFd int) ([]*ebpf.Program, error) { - type bpfAttrQuery struct { - TargetFd uint32 - AttachType uint32 - QueryType uint32 - AttachFlags uint32 - ProgIds uint64 // __aligned_u64 - ProgCnt uint32 - } - - // Currently you can only have 64 eBPF programs attached to a cgroup. - size := 64 - retries := 0 - for retries < 10 { - progIds := make([]uint32, size) - query := bpfAttrQuery{ - TargetFd: uint32(dirFd), - AttachType: uint32(unix.BPF_CGROUP_DEVICE), - ProgIds: uint64(uintptr(unsafe.Pointer(&progIds[0]))), - ProgCnt: uint32(len(progIds)), - } - - // Fetch the list of program ids. - _, _, errno := unix.Syscall(unix.SYS_BPF, - uintptr(unix.BPF_PROG_QUERY), - uintptr(unsafe.Pointer(&query)), - unsafe.Sizeof(query)) - size = int(query.ProgCnt) - runtime.KeepAlive(query) - if errno != 0 { - // On ENOSPC we get the correct number of programs. - if errno == unix.ENOSPC { - retries++ - continue - } - return nil, fmt.Errorf("bpf_prog_query(BPF_CGROUP_DEVICE) failed: %w", errno) - } - - // Convert the ids to program handles. - progIds = progIds[:size] - programs := make([]*ebpf.Program, 0, len(progIds)) - for _, progId := range progIds { - program, err := ebpf.NewProgramFromID(ebpf.ProgramID(progId)) - if err != nil { - // We skip over programs that give us -EACCES or -EPERM. This - // is necessary because there may be BPF programs that have - // been attached (such as with --systemd-cgroup) which have an - // LSM label that blocks us from interacting with the program. - // - // Because additional BPF_CGROUP_DEVICE programs only can add - // restrictions, there's no real issue with just ignoring these - // programs (and stops runc from breaking on distributions with - // very strict SELinux policies). - if errors.Is(err, os.ErrPermission) { - logrus.Debugf("ignoring existing CGROUP_DEVICE program (prog_id=%v) which cannot be accessed by runc -- likely due to LSM policy: %v", progId, err) - continue - } - return nil, fmt.Errorf("cannot fetch program from id: %w", err) - } - programs = append(programs, program) - } - runtime.KeepAlive(progIds) - return programs, nil - } - - return nil, errors.New("could not get complete list of CGROUP_DEVICE programs") -} - -var ( - haveBpfProgReplaceBool bool - haveBpfProgReplaceOnce sync.Once -) - -// Loosely based on the BPF_F_REPLACE support check in -// . -// -// TODO: move this logic to cilium/ebpf -func haveBpfProgReplace() bool { - haveBpfProgReplaceOnce.Do(func() { - prog, err := ebpf.NewProgram(&ebpf.ProgramSpec{ - Type: ebpf.CGroupDevice, - License: "MIT", - Instructions: asm.Instructions{ - asm.Mov.Imm(asm.R0, 0), - asm.Return(), - }, - }) - if err != nil { - logrus.Debugf("checking for BPF_F_REPLACE support: ebpf.NewProgram failed: %v", err) - return - } - defer prog.Close() - - devnull, err := os.Open("/dev/null") - if err != nil { - logrus.Debugf("checking for BPF_F_REPLACE support: open dummy target fd: %v", err) - return - } - defer devnull.Close() - - // We know that we have BPF_PROG_ATTACH since we can load - // BPF_CGROUP_DEVICE programs. If passing BPF_F_REPLACE gives us EINVAL - // we know that the feature isn't present. - err = link.RawAttachProgram(link.RawAttachProgramOptions{ - // We rely on this fd being checked after attachFlags. - Target: int(devnull.Fd()), - // Attempt to "replace" bad fds with this program. - Program: prog, - Attach: ebpf.AttachCGroupDevice, - Flags: unix.BPF_F_ALLOW_MULTI | unix.BPF_F_REPLACE, - }) - if errors.Is(err, unix.EINVAL) { - // not supported - return - } - // attach_flags test succeded. - if !errors.Is(err, unix.EBADF) { - logrus.Debugf("checking for BPF_F_REPLACE: got unexpected (not EBADF or EINVAL) error: %v", err) - } - haveBpfProgReplaceBool = true - }) - return haveBpfProgReplaceBool -} - -// LoadAttachCgroupDeviceFilter installs eBPF device filter program to /sys/fs/cgroup/ directory. -// -// Requires the system to be running in cgroup2 unified-mode with kernel >= 4.15 . -// -// https://github.com/torvalds/linux/commit/ebc614f687369f9df99828572b1d85a7c2de3d92 -func LoadAttachCgroupDeviceFilter(insts asm.Instructions, license string, dirFd int) (func() error, error) { - // Increase `ulimit -l` limit to avoid BPF_PROG_LOAD error (#2167). - // This limit is not inherited into the container. - memlockLimit := &unix.Rlimit{ - Cur: unix.RLIM_INFINITY, - Max: unix.RLIM_INFINITY, - } - _ = unix.Setrlimit(unix.RLIMIT_MEMLOCK, memlockLimit) - - // Get the list of existing programs. - oldProgs, err := findAttachedCgroupDeviceFilters(dirFd) - if err != nil { - return nilCloser, err - } - useReplaceProg := haveBpfProgReplace() && len(oldProgs) == 1 - - // Generate new program. - spec := &ebpf.ProgramSpec{ - Type: ebpf.CGroupDevice, - Instructions: insts, - License: license, - } - prog, err := ebpf.NewProgram(spec) - if err != nil { - return nilCloser, err - } - - // If there is only one old program, we can just replace it directly. - var ( - replaceProg *ebpf.Program - attachFlags uint32 = unix.BPF_F_ALLOW_MULTI - ) - if useReplaceProg { - replaceProg = oldProgs[0] - attachFlags |= unix.BPF_F_REPLACE - } - err = link.RawAttachProgram(link.RawAttachProgramOptions{ - Target: dirFd, - Program: prog, - Replace: replaceProg, - Attach: ebpf.AttachCGroupDevice, - Flags: attachFlags, - }) - if err != nil { - return nilCloser, fmt.Errorf("failed to call BPF_PROG_ATTACH (BPF_CGROUP_DEVICE, BPF_F_ALLOW_MULTI): %w", err) - } - closer := func() error { - err = link.RawDetachProgram(link.RawDetachProgramOptions{ - Target: dirFd, - Program: prog, - Attach: ebpf.AttachCGroupDevice, - }) - if err != nil { - return fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE): %w", err) - } - // TODO: Should we attach the old filters back in this case? Otherwise - // we fail-open on a security feature, which is a bit scary. - return nil - } - if !useReplaceProg { - logLevel := logrus.DebugLevel - // If there was more than one old program, give a warning (since this - // really shouldn't happen with runc-managed cgroups) and then detach - // all the old programs. - if len(oldProgs) > 1 { - // NOTE: Ideally this should be a warning but it turns out that - // systemd-managed cgroups trigger this warning (apparently - // systemd doesn't delete old non-systemd programs when - // setting properties). - logrus.Infof("found more than one filter (%d) attached to a cgroup -- removing extra filters!", len(oldProgs)) - logLevel = logrus.InfoLevel - } - for idx, oldProg := range oldProgs { - // Output some extra debug info. - if info, err := oldProg.Info(); err == nil { - fields := logrus.Fields{ - "type": info.Type.String(), - "tag": info.Tag, - "name": info.Name, - } - if id, ok := info.ID(); ok { - fields["id"] = id - } - if runCount, ok := info.RunCount(); ok { - fields["run_count"] = runCount - } - if runtime, ok := info.Runtime(); ok { - fields["runtime"] = runtime.String() - } - logrus.WithFields(fields).Logf(logLevel, "removing old filter %d from cgroup", idx) - } - err = link.RawDetachProgram(link.RawDetachProgramOptions{ - Target: dirFd, - Program: oldProg, - Attach: ebpf.AttachCGroupDevice, - }) - if err != nil { - return closer, fmt.Errorf("failed to call BPF_PROG_DETACH (BPF_CGROUP_DEVICE) on old filter program: %w", err) - } - } - } - return closer, nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go deleted file mode 100644 index 5f6ab9fd69..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go +++ /dev/null @@ -1,166 +0,0 @@ -package cgroups - -import ( - "bytes" - "os" - "strings" - "sync" - - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -// OpenFile opens a cgroup file in a given dir with given flags. -// It is supposed to be used for cgroup files only. -func OpenFile(dir, file string, flags int) (*os.File, error) { - if dir == "" { - return nil, errors.Errorf("no directory specified for %s", file) - } - return openFile(dir, file, flags) -} - -// ReadFile reads data from a cgroup file in dir. -// It is supposed to be used for cgroup files only. -func ReadFile(dir, file string) (string, error) { - fd, err := OpenFile(dir, file, unix.O_RDONLY) - if err != nil { - return "", err - } - defer fd.Close() - var buf bytes.Buffer - - _, err = buf.ReadFrom(fd) - return buf.String(), err -} - -// WriteFile writes data to a cgroup file in dir. -// It is supposed to be used for cgroup files only. -func WriteFile(dir, file, data string) error { - fd, err := OpenFile(dir, file, unix.O_WRONLY) - if err != nil { - return err - } - defer fd.Close() - if err := retryingWriteFile(fd, data); err != nil { - return errors.Wrapf(err, "failed to write %q", data) - } - return nil -} - -func retryingWriteFile(fd *os.File, data string) error { - for { - _, err := fd.Write([]byte(data)) - if errors.Is(err, unix.EINTR) { - logrus.Infof("interrupted while writing %s to %s", data, fd.Name()) - continue - } - return err - } -} - -const ( - cgroupfsDir = "/sys/fs/cgroup" - cgroupfsPrefix = cgroupfsDir + "/" -) - -var ( - // TestMode is set to true by unit tests that need "fake" cgroupfs. - TestMode bool - - cgroupFd int = -1 - prepOnce sync.Once - prepErr error - resolveFlags uint64 -) - -func prepareOpenat2() error { - prepOnce.Do(func() { - fd, err := unix.Openat2(-1, cgroupfsDir, &unix.OpenHow{ - Flags: unix.O_DIRECTORY | unix.O_PATH, - }) - if err != nil { - prepErr = &os.PathError{Op: "openat2", Path: cgroupfsDir, Err: err} - if err != unix.ENOSYS { - logrus.Warnf("falling back to securejoin: %s", prepErr) - } else { - logrus.Debug("openat2 not available, falling back to securejoin") - } - return - } - var st unix.Statfs_t - if err = unix.Fstatfs(fd, &st); err != nil { - prepErr = &os.PathError{Op: "statfs", Path: cgroupfsDir, Err: err} - logrus.Warnf("falling back to securejoin: %s", prepErr) - return - } - - cgroupFd = fd - - resolveFlags = unix.RESOLVE_BENEATH | unix.RESOLVE_NO_MAGICLINKS - if st.Type == unix.CGROUP2_SUPER_MAGIC { - // cgroupv2 has a single mountpoint and no "cpu,cpuacct" symlinks - resolveFlags |= unix.RESOLVE_NO_XDEV | unix.RESOLVE_NO_SYMLINKS - } - }) - - return prepErr -} - -// OpenFile opens a cgroup file in a given dir with given flags. -// It is supposed to be used for cgroup files only. -func openFile(dir, file string, flags int) (*os.File, error) { - mode := os.FileMode(0) - if TestMode && flags&os.O_WRONLY != 0 { - // "emulate" cgroup fs for unit tests - flags |= os.O_TRUNC | os.O_CREATE - mode = 0o600 - } - if prepareOpenat2() != nil { - return openFallback(dir, file, flags, mode) - } - reldir := strings.TrimPrefix(dir, cgroupfsPrefix) - if len(reldir) == len(dir) { // non-standard path, old system? - return openFallback(dir, file, flags, mode) - } - - relname := reldir + "/" + file - fd, err := unix.Openat2(cgroupFd, relname, - &unix.OpenHow{ - Resolve: resolveFlags, - Flags: uint64(flags) | unix.O_CLOEXEC, - Mode: uint64(mode), - }) - if err != nil { - return nil, &os.PathError{Op: "openat2", Path: dir + "/" + file, Err: err} - } - - return os.NewFile(uintptr(fd), cgroupfsPrefix+relname), nil -} - -var errNotCgroupfs = errors.New("not a cgroup file") - -// openFallback is used when openat2(2) is not available. It checks the opened -// file is on cgroupfs, returning an error otherwise. -func openFallback(dir, file string, flags int, mode os.FileMode) (*os.File, error) { - path := dir + "/" + file - fd, err := os.OpenFile(path, flags, mode) - if err != nil { - return nil, err - } - if TestMode { - return fd, nil - } - // Check this is a cgroupfs file. - var st unix.Statfs_t - if err := unix.Fstatfs(int(fd.Fd()), &st); err != nil { - _ = fd.Close() - return nil, &os.PathError{Op: "statfs", Path: path, Err: err} - } - if st.Type != unix.CGROUP_SUPER_MAGIC && st.Type != unix.CGROUP2_SUPER_MAGIC { - _ = fd.Close() - return nil, &os.PathError{Op: "open", Path: path, Err: errNotCgroupfs} - } - - return fd, nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go deleted file mode 100644 index 88012a2f5f..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/blkio.go +++ /dev/null @@ -1,311 +0,0 @@ -// +build linux - -package fs - -import ( - "bufio" - "fmt" - "os" - "path/filepath" - "strconv" - "strings" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/configs" -) - -type BlkioGroup struct { - weightFilename string - weightDeviceFilename string -} - -func (s *BlkioGroup) Name() string { - return "blkio" -} - -func (s *BlkioGroup) Apply(path string, d *cgroupData) error { - return join(path, d.pid) -} - -func (s *BlkioGroup) Set(path string, r *configs.Resources) error { - s.detectWeightFilenames(path) - if r.BlkioWeight != 0 { - if err := cgroups.WriteFile(path, s.weightFilename, strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil { - return err - } - } - - if r.BlkioLeafWeight != 0 { - if err := cgroups.WriteFile(path, "blkio.leaf_weight", strconv.FormatUint(uint64(r.BlkioLeafWeight), 10)); err != nil { - return err - } - } - for _, wd := range r.BlkioWeightDevice { - if wd.Weight != 0 { - if err := cgroups.WriteFile(path, s.weightDeviceFilename, wd.WeightString()); err != nil { - return err - } - } - if wd.LeafWeight != 0 { - if err := cgroups.WriteFile(path, "blkio.leaf_weight_device", wd.LeafWeightString()); err != nil { - return err - } - } - } - for _, td := range r.BlkioThrottleReadBpsDevice { - if err := cgroups.WriteFile(path, "blkio.throttle.read_bps_device", td.String()); err != nil { - return err - } - } - for _, td := range r.BlkioThrottleWriteBpsDevice { - if err := cgroups.WriteFile(path, "blkio.throttle.write_bps_device", td.String()); err != nil { - return err - } - } - for _, td := range r.BlkioThrottleReadIOPSDevice { - if err := cgroups.WriteFile(path, "blkio.throttle.read_iops_device", td.String()); err != nil { - return err - } - } - for _, td := range r.BlkioThrottleWriteIOPSDevice { - if err := cgroups.WriteFile(path, "blkio.throttle.write_iops_device", td.String()); err != nil { - return err - } - } - - return nil -} - -/* -examples: - - blkio.sectors - 8:0 6792 - - blkio.io_service_bytes - 8:0 Read 1282048 - 8:0 Write 2195456 - 8:0 Sync 2195456 - 8:0 Async 1282048 - 8:0 Total 3477504 - Total 3477504 - - blkio.io_serviced - 8:0 Read 124 - 8:0 Write 104 - 8:0 Sync 104 - 8:0 Async 124 - 8:0 Total 228 - Total 228 - - blkio.io_queued - 8:0 Read 0 - 8:0 Write 0 - 8:0 Sync 0 - 8:0 Async 0 - 8:0 Total 0 - Total 0 -*/ - -func splitBlkioStatLine(r rune) bool { - return r == ' ' || r == ':' -} - -func getBlkioStat(dir, file string) ([]cgroups.BlkioStatEntry, error) { - var blkioStats []cgroups.BlkioStatEntry - f, err := cgroups.OpenFile(dir, file, os.O_RDONLY) - if err != nil { - if os.IsNotExist(err) { - return blkioStats, nil - } - return nil, err - } - defer f.Close() - - sc := bufio.NewScanner(f) - for sc.Scan() { - // format: dev type amount - fields := strings.FieldsFunc(sc.Text(), splitBlkioStatLine) - if len(fields) < 3 { - if len(fields) == 2 && fields[0] == "Total" { - // skip total line - continue - } else { - return nil, fmt.Errorf("Invalid line found while parsing %s/%s: %s", dir, file, sc.Text()) - } - } - - v, err := strconv.ParseUint(fields[0], 10, 64) - if err != nil { - return nil, err - } - major := v - - v, err = strconv.ParseUint(fields[1], 10, 64) - if err != nil { - return nil, err - } - minor := v - - op := "" - valueField := 2 - if len(fields) == 4 { - op = fields[2] - valueField = 3 - } - v, err = strconv.ParseUint(fields[valueField], 10, 64) - if err != nil { - return nil, err - } - blkioStats = append(blkioStats, cgroups.BlkioStatEntry{Major: major, Minor: minor, Op: op, Value: v}) - } - - return blkioStats, nil -} - -func (s *BlkioGroup) GetStats(path string, stats *cgroups.Stats) error { - type blkioStatInfo struct { - filename string - blkioStatEntriesPtr *[]cgroups.BlkioStatEntry - } - bfqDebugStats := []blkioStatInfo{ - { - filename: "blkio.bfq.sectors_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive, - }, - { - filename: "blkio.bfq.io_service_time_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive, - }, - { - filename: "blkio.bfq.io_wait_time_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive, - }, - { - filename: "blkio.bfq.io_merged_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive, - }, - { - filename: "blkio.bfq.io_queued_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive, - }, - { - filename: "blkio.bfq.time_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive, - }, - { - filename: "blkio.bfq.io_serviced_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive, - }, - { - filename: "blkio.bfq.io_service_bytes_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, - }, - } - bfqStats := []blkioStatInfo{ - { - filename: "blkio.bfq.io_serviced_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive, - }, - { - filename: "blkio.bfq.io_service_bytes_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, - }, - } - cfqStats := []blkioStatInfo{ - { - filename: "blkio.sectors_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.SectorsRecursive, - }, - { - filename: "blkio.io_service_time_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServiceTimeRecursive, - }, - { - filename: "blkio.io_wait_time_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoWaitTimeRecursive, - }, - { - filename: "blkio.io_merged_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoMergedRecursive, - }, - { - filename: "blkio.io_queued_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoQueuedRecursive, - }, - { - filename: "blkio.time_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoTimeRecursive, - }, - { - filename: "blkio.io_serviced_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive, - }, - { - filename: "blkio.io_service_bytes_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, - }, - } - throttleRecursiveStats := []blkioStatInfo{ - { - filename: "blkio.throttle.io_serviced_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive, - }, - { - filename: "blkio.throttle.io_service_bytes_recursive", - blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, - }, - } - baseStats := []blkioStatInfo{ - { - filename: "blkio.throttle.io_serviced", - blkioStatEntriesPtr: &stats.BlkioStats.IoServicedRecursive, - }, - { - filename: "blkio.throttle.io_service_bytes", - blkioStatEntriesPtr: &stats.BlkioStats.IoServiceBytesRecursive, - }, - } - orderedStats := [][]blkioStatInfo{ - bfqDebugStats, - bfqStats, - cfqStats, - throttleRecursiveStats, - baseStats, - } - - var blkioStats []cgroups.BlkioStatEntry - var err error - - for _, statGroup := range orderedStats { - for i, statInfo := range statGroup { - if blkioStats, err = getBlkioStat(path, statInfo.filename); err != nil || blkioStats == nil { - // if error occurs on first file, move to next group - if i == 0 { - break - } - return err - } - *statInfo.blkioStatEntriesPtr = blkioStats - // finish if all stats are gathered - if i == len(statGroup)-1 { - return nil - } - } - } - return nil -} - -func (s *BlkioGroup) detectWeightFilenames(path string) { - if s.weightFilename != "" { - // Already detected. - return - } - if cgroups.PathExists(filepath.Join(path, "blkio.weight")) { - s.weightFilename = "blkio.weight" - s.weightDeviceFilename = "blkio.weight_device" - } else { - s.weightFilename = "blkio.bfq.weight" - s.weightDeviceFilename = "blkio.bfq.weight_device" - } -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go deleted file mode 100644 index 31c1c874ea..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go +++ /dev/null @@ -1,115 +0,0 @@ -// +build linux - -package fs - -import ( - "bufio" - "fmt" - "os" - "strconv" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" - "github.com/opencontainers/runc/libcontainer/configs" -) - -type CpuGroup struct{} - -func (s *CpuGroup) Name() string { - return "cpu" -} - -func (s *CpuGroup) Apply(path string, d *cgroupData) error { - // This might happen if we have no cpu cgroup mounted. - // Just do nothing and don't fail. - if path == "" { - return nil - } - if err := os.MkdirAll(path, 0o755); err != nil { - return err - } - // We should set the real-Time group scheduling settings before moving - // in the process because if the process is already in SCHED_RR mode - // and no RT bandwidth is set, adding it will fail. - if err := s.SetRtSched(path, d.config.Resources); err != nil { - return err - } - // Since we are not using join(), we need to place the pid - // into the procs file unlike other subsystems. - return cgroups.WriteCgroupProc(path, d.pid) -} - -func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error { - if r.CpuRtPeriod != 0 { - if err := cgroups.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(r.CpuRtPeriod, 10)); err != nil { - return err - } - } - if r.CpuRtRuntime != 0 { - if err := cgroups.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil { - return err - } - } - return nil -} - -func (s *CpuGroup) Set(path string, r *configs.Resources) error { - if r.CpuShares != 0 { - shares := r.CpuShares - if err := cgroups.WriteFile(path, "cpu.shares", strconv.FormatUint(shares, 10)); err != nil { - return err - } - // read it back - sharesRead, err := fscommon.GetCgroupParamUint(path, "cpu.shares") - if err != nil { - return err - } - // ... and check - if shares > sharesRead { - return fmt.Errorf("the maximum allowed cpu-shares is %d", sharesRead) - } else if shares < sharesRead { - return fmt.Errorf("the minimum allowed cpu-shares is %d", sharesRead) - } - } - if r.CpuPeriod != 0 { - if err := cgroups.WriteFile(path, "cpu.cfs_period_us", strconv.FormatUint(r.CpuPeriod, 10)); err != nil { - return err - } - } - if r.CpuQuota != 0 { - if err := cgroups.WriteFile(path, "cpu.cfs_quota_us", strconv.FormatInt(r.CpuQuota, 10)); err != nil { - return err - } - } - return s.SetRtSched(path, r) -} - -func (s *CpuGroup) GetStats(path string, stats *cgroups.Stats) error { - f, err := cgroups.OpenFile(path, "cpu.stat", os.O_RDONLY) - if err != nil { - if os.IsNotExist(err) { - return nil - } - return err - } - defer f.Close() - - sc := bufio.NewScanner(f) - for sc.Scan() { - t, v, err := fscommon.ParseKeyValue(sc.Text()) - if err != nil { - return err - } - switch t { - case "nr_periods": - stats.CpuStats.ThrottlingData.Periods = v - - case "nr_throttled": - stats.CpuStats.ThrottlingData.ThrottledPeriods = v - - case "throttled_time": - stats.CpuStats.ThrottlingData.ThrottledTime = v - } - } - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go deleted file mode 100644 index 4fbf078494..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuacct.go +++ /dev/null @@ -1,172 +0,0 @@ -// +build linux - -package fs - -import ( - "bufio" - "fmt" - "os" - "path/filepath" - "strconv" - "strings" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" - "github.com/opencontainers/runc/libcontainer/configs" -) - -const ( - cgroupCpuacctStat = "cpuacct.stat" - cgroupCpuacctUsageAll = "cpuacct.usage_all" - - nanosecondsInSecond = 1000000000 - - userModeColumn = 1 - kernelModeColumn = 2 - cuacctUsageAllColumnsNumber = 3 - - // The value comes from `C.sysconf(C._SC_CLK_TCK)`, and - // on Linux it's a constant which is safe to be hard coded, - // so we can avoid using cgo here. For details, see: - // https://github.com/containerd/cgroups/pull/12 - clockTicks uint64 = 100 -) - -type CpuacctGroup struct{} - -func (s *CpuacctGroup) Name() string { - return "cpuacct" -} - -func (s *CpuacctGroup) Apply(path string, d *cgroupData) error { - return join(path, d.pid) -} - -func (s *CpuacctGroup) Set(_ string, _ *configs.Resources) error { - return nil -} - -func (s *CpuacctGroup) GetStats(path string, stats *cgroups.Stats) error { - if !cgroups.PathExists(path) { - return nil - } - userModeUsage, kernelModeUsage, err := getCpuUsageBreakdown(path) - if err != nil { - return err - } - - totalUsage, err := fscommon.GetCgroupParamUint(path, "cpuacct.usage") - if err != nil { - return err - } - - percpuUsage, err := getPercpuUsage(path) - if err != nil { - return err - } - - percpuUsageInKernelmode, percpuUsageInUsermode, err := getPercpuUsageInModes(path) - if err != nil { - return err - } - - stats.CpuStats.CpuUsage.TotalUsage = totalUsage - stats.CpuStats.CpuUsage.PercpuUsage = percpuUsage - stats.CpuStats.CpuUsage.PercpuUsageInKernelmode = percpuUsageInKernelmode - stats.CpuStats.CpuUsage.PercpuUsageInUsermode = percpuUsageInUsermode - stats.CpuStats.CpuUsage.UsageInUsermode = userModeUsage - stats.CpuStats.CpuUsage.UsageInKernelmode = kernelModeUsage - return nil -} - -// Returns user and kernel usage breakdown in nanoseconds. -func getCpuUsageBreakdown(path string) (uint64, uint64, error) { - var userModeUsage, kernelModeUsage uint64 - const ( - userField = "user" - systemField = "system" - ) - - // Expected format: - // user - // system - data, err := cgroups.ReadFile(path, cgroupCpuacctStat) - if err != nil { - return 0, 0, err - } - fields := strings.Fields(data) - if len(fields) < 4 { - return 0, 0, fmt.Errorf("failure - %s is expected to have at least 4 fields", filepath.Join(path, cgroupCpuacctStat)) - } - if fields[0] != userField { - return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[0], cgroupCpuacctStat, userField) - } - if fields[2] != systemField { - return 0, 0, fmt.Errorf("unexpected field %q in %q, expected %q", fields[2], cgroupCpuacctStat, systemField) - } - if userModeUsage, err = strconv.ParseUint(fields[1], 10, 64); err != nil { - return 0, 0, err - } - if kernelModeUsage, err = strconv.ParseUint(fields[3], 10, 64); err != nil { - return 0, 0, err - } - - return (userModeUsage * nanosecondsInSecond) / clockTicks, (kernelModeUsage * nanosecondsInSecond) / clockTicks, nil -} - -func getPercpuUsage(path string) ([]uint64, error) { - percpuUsage := []uint64{} - data, err := cgroups.ReadFile(path, "cpuacct.usage_percpu") - if err != nil { - return percpuUsage, err - } - for _, value := range strings.Fields(data) { - value, err := strconv.ParseUint(value, 10, 64) - if err != nil { - return percpuUsage, fmt.Errorf("Unable to convert param value to uint64: %s", err) - } - percpuUsage = append(percpuUsage, value) - } - return percpuUsage, nil -} - -func getPercpuUsageInModes(path string) ([]uint64, []uint64, error) { - usageKernelMode := []uint64{} - usageUserMode := []uint64{} - - file, err := cgroups.OpenFile(path, cgroupCpuacctUsageAll, os.O_RDONLY) - if os.IsNotExist(err) { - return usageKernelMode, usageUserMode, nil - } else if err != nil { - return nil, nil, err - } - defer file.Close() - - scanner := bufio.NewScanner(file) - scanner.Scan() // skipping header line - - for scanner.Scan() { - lineFields := strings.SplitN(scanner.Text(), " ", cuacctUsageAllColumnsNumber+1) - if len(lineFields) != cuacctUsageAllColumnsNumber { - continue - } - - usageInKernelMode, err := strconv.ParseUint(lineFields[kernelModeColumn], 10, 64) - if err != nil { - return nil, nil, fmt.Errorf("Unable to convert CPU usage in kernel mode to uint64: %s", err) - } - usageKernelMode = append(usageKernelMode, usageInKernelMode) - - usageInUserMode, err := strconv.ParseUint(lineFields[userModeColumn], 10, 64) - if err != nil { - return nil, nil, fmt.Errorf("Unable to convert CPU usage in user mode to uint64: %s", err) - } - usageUserMode = append(usageUserMode, usageInUserMode) - } - - if err := scanner.Err(); err != nil { - return nil, nil, fmt.Errorf("Problem in reading %s line by line, %s", cgroupCpuacctUsageAll, err) - } - - return usageKernelMode, usageUserMode, nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go deleted file mode 100644 index 58a0f04064..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpuset.go +++ /dev/null @@ -1,248 +0,0 @@ -// +build linux - -package fs - -import ( - "fmt" - "os" - "path/filepath" - "strconv" - "strings" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/pkg/errors" - "golang.org/x/sys/unix" -) - -type CpusetGroup struct{} - -func (s *CpusetGroup) Name() string { - return "cpuset" -} - -func (s *CpusetGroup) Apply(path string, d *cgroupData) error { - return s.ApplyDir(path, d.config.Resources, d.pid) -} - -func (s *CpusetGroup) Set(path string, r *configs.Resources) error { - if r.CpusetCpus != "" { - if err := cgroups.WriteFile(path, "cpuset.cpus", r.CpusetCpus); err != nil { - return err - } - } - if r.CpusetMems != "" { - if err := cgroups.WriteFile(path, "cpuset.mems", r.CpusetMems); err != nil { - return err - } - } - return nil -} - -func getCpusetStat(path string, filename string) ([]uint16, error) { - var extracted []uint16 - fileContent, err := fscommon.GetCgroupParamString(path, filename) - if err != nil { - return extracted, err - } - if len(fileContent) == 0 { - return extracted, fmt.Errorf("%s found to be empty", filepath.Join(path, filename)) - } - - for _, s := range strings.Split(fileContent, ",") { - splitted := strings.SplitN(s, "-", 3) - switch len(splitted) { - case 3: - return extracted, fmt.Errorf("invalid values in %s", filepath.Join(path, filename)) - case 2: - min, err := strconv.ParseUint(splitted[0], 10, 16) - if err != nil { - return extracted, err - } - max, err := strconv.ParseUint(splitted[1], 10, 16) - if err != nil { - return extracted, err - } - if min > max { - return extracted, fmt.Errorf("invalid values in %s", filepath.Join(path, filename)) - } - for i := min; i <= max; i++ { - extracted = append(extracted, uint16(i)) - } - case 1: - value, err := strconv.ParseUint(s, 10, 16) - if err != nil { - return extracted, err - } - extracted = append(extracted, uint16(value)) - } - } - - return extracted, nil -} - -func (s *CpusetGroup) GetStats(path string, stats *cgroups.Stats) error { - var err error - - stats.CPUSetStats.CPUs, err = getCpusetStat(path, "cpuset.cpus") - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.CPUExclusive, err = fscommon.GetCgroupParamUint(path, "cpuset.cpu_exclusive") - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.Mems, err = getCpusetStat(path, "cpuset.mems") - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.MemHardwall, err = fscommon.GetCgroupParamUint(path, "cpuset.mem_hardwall") - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.MemExclusive, err = fscommon.GetCgroupParamUint(path, "cpuset.mem_exclusive") - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.MemoryMigrate, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_migrate") - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.MemorySpreadPage, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_spread_page") - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.MemorySpreadSlab, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_spread_slab") - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.MemoryPressure, err = fscommon.GetCgroupParamUint(path, "cpuset.memory_pressure") - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.SchedLoadBalance, err = fscommon.GetCgroupParamUint(path, "cpuset.sched_load_balance") - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - stats.CPUSetStats.SchedRelaxDomainLevel, err = fscommon.GetCgroupParamInt(path, "cpuset.sched_relax_domain_level") - if err != nil && !errors.Is(err, os.ErrNotExist) { - return err - } - - return nil -} - -func (s *CpusetGroup) ApplyDir(dir string, r *configs.Resources, pid int) error { - // This might happen if we have no cpuset cgroup mounted. - // Just do nothing and don't fail. - if dir == "" { - return nil - } - // 'ensureParent' start with parent because we don't want to - // explicitly inherit from parent, it could conflict with - // 'cpuset.cpu_exclusive'. - if err := cpusetEnsureParent(filepath.Dir(dir)); err != nil { - return err - } - if err := os.Mkdir(dir, 0o755); err != nil && !os.IsExist(err) { - return err - } - // We didn't inherit cpuset configs from parent, but we have - // to ensure cpuset configs are set before moving task into the - // cgroup. - // The logic is, if user specified cpuset configs, use these - // specified configs, otherwise, inherit from parent. This makes - // cpuset configs work correctly with 'cpuset.cpu_exclusive', and - // keep backward compatibility. - if err := s.ensureCpusAndMems(dir, r); err != nil { - return err - } - - // because we are not using d.join we need to place the pid into the procs file - // unlike the other subsystems - return cgroups.WriteCgroupProc(dir, pid) -} - -func getCpusetSubsystemSettings(parent string) (cpus, mems string, err error) { - if cpus, err = cgroups.ReadFile(parent, "cpuset.cpus"); err != nil { - return - } - if mems, err = cgroups.ReadFile(parent, "cpuset.mems"); err != nil { - return - } - return cpus, mems, nil -} - -// cpusetEnsureParent makes sure that the parent directories of current -// are created and populated with the proper cpus and mems files copied -// from their respective parent. It does that recursively, starting from -// the top of the cpuset hierarchy (i.e. cpuset cgroup mount point). -func cpusetEnsureParent(current string) error { - var st unix.Statfs_t - - parent := filepath.Dir(current) - err := unix.Statfs(parent, &st) - if err == nil && st.Type != unix.CGROUP_SUPER_MAGIC { - return nil - } - // Treat non-existing directory as cgroupfs as it will be created, - // and the root cpuset directory obviously exists. - if err != nil && err != unix.ENOENT { - return &os.PathError{Op: "statfs", Path: parent, Err: err} - } - - if err := cpusetEnsureParent(parent); err != nil { - return err - } - if err := os.Mkdir(current, 0o755); err != nil && !os.IsExist(err) { - return err - } - return cpusetCopyIfNeeded(current, parent) -} - -// cpusetCopyIfNeeded copies the cpuset.cpus and cpuset.mems from the parent -// directory to the current directory if the file's contents are 0 -func cpusetCopyIfNeeded(current, parent string) error { - currentCpus, currentMems, err := getCpusetSubsystemSettings(current) - if err != nil { - return err - } - parentCpus, parentMems, err := getCpusetSubsystemSettings(parent) - if err != nil { - return err - } - - if isEmptyCpuset(currentCpus) { - if err := cgroups.WriteFile(current, "cpuset.cpus", string(parentCpus)); err != nil { - return err - } - } - if isEmptyCpuset(currentMems) { - if err := cgroups.WriteFile(current, "cpuset.mems", string(parentMems)); err != nil { - return err - } - } - return nil -} - -func isEmptyCpuset(str string) bool { - return str == "" || str == "\n" -} - -func (s *CpusetGroup) ensureCpusAndMems(path string, r *configs.Resources) error { - if err := s.Set(path, r); err != nil { - return err - } - return cpusetCopyIfNeeded(path, filepath.Dir(path)) -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go deleted file mode 100644 index dcf69ce13e..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/devices.go +++ /dev/null @@ -1,110 +0,0 @@ -// +build linux - -package fs - -import ( - "bytes" - "errors" - "reflect" - - "github.com/opencontainers/runc/libcontainer/cgroups" - cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/devices" - "github.com/opencontainers/runc/libcontainer/userns" -) - -type DevicesGroup struct { - testingSkipFinalCheck bool -} - -func (s *DevicesGroup) Name() string { - return "devices" -} - -func (s *DevicesGroup) Apply(path string, d *cgroupData) error { - if d.config.SkipDevices { - return nil - } - if path == "" { - // Return error here, since devices cgroup - // is a hard requirement for container's security. - return errSubsystemDoesNotExist - } - return join(path, d.pid) -} - -func loadEmulator(path string) (*cgroupdevices.Emulator, error) { - list, err := cgroups.ReadFile(path, "devices.list") - if err != nil { - return nil, err - } - return cgroupdevices.EmulatorFromList(bytes.NewBufferString(list)) -} - -func buildEmulator(rules []*devices.Rule) (*cgroupdevices.Emulator, error) { - // This defaults to a white-list -- which is what we want! - emu := &cgroupdevices.Emulator{} - for _, rule := range rules { - if err := emu.Apply(*rule); err != nil { - return nil, err - } - } - return emu, nil -} - -func (s *DevicesGroup) Set(path string, r *configs.Resources) error { - if userns.RunningInUserNS() || r.SkipDevices { - return nil - } - - // Generate two emulators, one for the current state of the cgroup and one - // for the requested state by the user. - current, err := loadEmulator(path) - if err != nil { - return err - } - target, err := buildEmulator(r.Devices) - if err != nil { - return err - } - - // Compute the minimal set of transition rules needed to achieve the - // requested state. - transitionRules, err := current.Transition(target) - if err != nil { - return err - } - for _, rule := range transitionRules { - file := "devices.deny" - if rule.Allow { - file = "devices.allow" - } - if err := cgroups.WriteFile(path, file, rule.CgroupString()); err != nil { - return err - } - } - - // Final safety check -- ensure that the resulting state is what was - // requested. This is only really correct for white-lists, but for - // black-lists we can at least check that the cgroup is in the right mode. - // - // This safety-check is skipped for the unit tests because we cannot - // currently mock devices.list correctly. - if !s.testingSkipFinalCheck { - currentAfter, err := loadEmulator(path) - if err != nil { - return err - } - if !target.IsBlacklist() && !reflect.DeepEqual(currentAfter, target) { - return errors.New("resulting devices cgroup doesn't precisely match target") - } else if target.IsBlacklist() != currentAfter.IsBlacklist() { - return errors.New("resulting devices cgroup doesn't match target mode") - } - } - return nil -} - -func (s *DevicesGroup) GetStats(path string, stats *cgroups.Stats) error { - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go deleted file mode 100644 index 4baa2798ac..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/freezer.go +++ /dev/null @@ -1,160 +0,0 @@ -// +build linux - -package fs - -import ( - "errors" - "fmt" - "os" - "strings" - "time" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -type FreezerGroup struct{} - -func (s *FreezerGroup) Name() string { - return "freezer" -} - -func (s *FreezerGroup) Apply(path string, d *cgroupData) error { - return join(path, d.pid) -} - -func (s *FreezerGroup) Set(path string, r *configs.Resources) (Err error) { - switch r.Freezer { - case configs.Frozen: - defer func() { - if Err != nil { - // Freezing failed, and it is bad and dangerous - // to leave the cgroup in FROZEN or FREEZING - // state, so (try to) thaw it back. - _ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed)) - } - }() - - // As per older kernel docs (freezer-subsystem.txt before - // kernel commit ef9fe980c6fcc1821), if FREEZING is seen, - // userspace should either retry or thaw. While current - // kernel cgroup v1 docs no longer mention a need to retry, - // even a recent kernel (v5.4, Ubuntu 20.04) can't reliably - // freeze a cgroup v1 while new processes keep appearing in it - // (either via fork/clone or by writing new PIDs to - // cgroup.procs). - // - // The numbers below are empirically chosen to have a decent - // chance to succeed in various scenarios ("runc pause/unpause - // with parallel runc exec" and "bare freeze/unfreeze on a very - // slow system"), tested on RHEL7 and Ubuntu 20.04 kernels. - // - // Adding any amount of sleep in between retries did not - // increase the chances of successful freeze in "pause/unpause - // with parallel exec" reproducer. OTOH, adding an occasional - // sleep helped for the case where the system is extremely slow - // (CentOS 7 VM on GHA CI). - // - // Alas, this is still a game of chances, since the real fix - // belong to the kernel (cgroup v2 do not have this bug). - - for i := 0; i < 1000; i++ { - if i%50 == 49 { - // Occasional thaw and sleep improves - // the chances to succeed in freezing - // in case new processes keep appearing - // in the cgroup. - _ = cgroups.WriteFile(path, "freezer.state", string(configs.Thawed)) - time.Sleep(10 * time.Millisecond) - } - - if err := cgroups.WriteFile(path, "freezer.state", string(configs.Frozen)); err != nil { - return err - } - - if i%25 == 24 { - // Occasional short sleep before reading - // the state back also improves the chances to - // succeed in freezing in case of a very slow - // system. - time.Sleep(10 * time.Microsecond) - } - state, err := cgroups.ReadFile(path, "freezer.state") - if err != nil { - return err - } - state = strings.TrimSpace(state) - switch state { - case "FREEZING": - continue - case string(configs.Frozen): - if i > 1 { - logrus.Debugf("frozen after %d retries", i) - } - return nil - default: - // should never happen - return fmt.Errorf("unexpected state %s while freezing", strings.TrimSpace(state)) - } - } - // Despite our best efforts, it got stuck in FREEZING. - return errors.New("unable to freeze") - case configs.Thawed: - return cgroups.WriteFile(path, "freezer.state", string(configs.Thawed)) - case configs.Undefined: - return nil - default: - return fmt.Errorf("Invalid argument '%s' to freezer.state", string(r.Freezer)) - } -} - -func (s *FreezerGroup) GetStats(path string, stats *cgroups.Stats) error { - return nil -} - -func (s *FreezerGroup) GetState(path string) (configs.FreezerState, error) { - for { - state, err := cgroups.ReadFile(path, "freezer.state") - if err != nil { - // If the kernel is too old, then we just treat the freezer as - // being in an "undefined" state. - if os.IsNotExist(err) || errors.Is(err, unix.ENODEV) { - err = nil - } - return configs.Undefined, err - } - switch strings.TrimSpace(state) { - case "THAWED": - return configs.Thawed, nil - case "FROZEN": - // Find out whether the cgroup is frozen directly, - // or indirectly via an ancestor. - self, err := cgroups.ReadFile(path, "freezer.self_freezing") - if err != nil { - // If the kernel is too old, then we just treat - // it as being frozen. - if errors.Is(err, os.ErrNotExist) || errors.Is(err, unix.ENODEV) { - err = nil - } - return configs.Frozen, err - } - switch self { - case "0\n": - return configs.Thawed, nil - case "1\n": - return configs.Frozen, nil - default: - return configs.Undefined, fmt.Errorf(`unknown "freezer.self_freezing" state: %q`, self) - } - case "FREEZING": - // Make sure we get a stable freezer state, so retry if the cgroup - // is still undergoing freezing. This should be a temporary delay. - time.Sleep(1 * time.Millisecond) - continue - default: - return configs.Undefined, fmt.Errorf("unknown freezer.state %q", state) - } - } -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go deleted file mode 100644 index 777b94f035..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/fs.go +++ /dev/null @@ -1,440 +0,0 @@ -// +build linux - -package fs - -import ( - "fmt" - "os" - "path/filepath" - "sync" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" - "github.com/opencontainers/runc/libcontainer/configs" - libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" - "github.com/pkg/errors" - "golang.org/x/sys/unix" -) - -var ( - subsystems = []subsystem{ - &CpusetGroup{}, - &DevicesGroup{}, - &MemoryGroup{}, - &CpuGroup{}, - &CpuacctGroup{}, - &PidsGroup{}, - &BlkioGroup{}, - &HugetlbGroup{}, - &NetClsGroup{}, - &NetPrioGroup{}, - &PerfEventGroup{}, - &FreezerGroup{}, - &NameGroup{GroupName: "name=systemd", Join: true}, - } - HugePageSizes, _ = cgroups.GetHugePageSize() -) - -var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") - -type subsystem interface { - // Name returns the name of the subsystem. - Name() string - // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. - GetStats(path string, stats *cgroups.Stats) error - // Creates and joins the cgroup represented by 'cgroupData'. - Apply(path string, c *cgroupData) error - // Set sets the cgroup resources. - Set(path string, r *configs.Resources) error -} - -type manager struct { - mu sync.Mutex - cgroups *configs.Cgroup - rootless bool // ignore permission-related errors - paths map[string]string -} - -func NewManager(cg *configs.Cgroup, paths map[string]string, rootless bool) cgroups.Manager { - return &manager{ - cgroups: cg, - paths: paths, - rootless: rootless, - } -} - -// The absolute path to the root of the cgroup hierarchies. -var ( - cgroupRootLock sync.Mutex - cgroupRoot string -) - -const defaultCgroupRoot = "/sys/fs/cgroup" - -func tryDefaultCgroupRoot() string { - var st, pst unix.Stat_t - - // (1) it should be a directory... - err := unix.Lstat(defaultCgroupRoot, &st) - if err != nil || st.Mode&unix.S_IFDIR == 0 { - return "" - } - - // (2) ... and a mount point ... - err = unix.Lstat(filepath.Dir(defaultCgroupRoot), &pst) - if err != nil { - return "" - } - - if st.Dev == pst.Dev { - // parent dir has the same dev -- not a mount point - return "" - } - - // (3) ... of 'tmpfs' fs type. - var fst unix.Statfs_t - err = unix.Statfs(defaultCgroupRoot, &fst) - if err != nil || fst.Type != unix.TMPFS_MAGIC { - return "" - } - - // (4) it should have at least 1 entry ... - dir, err := os.Open(defaultCgroupRoot) - if err != nil { - return "" - } - names, err := dir.Readdirnames(1) - if err != nil { - return "" - } - if len(names) < 1 { - return "" - } - // ... which is a cgroup mount point. - err = unix.Statfs(filepath.Join(defaultCgroupRoot, names[0]), &fst) - if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC { - return "" - } - - return defaultCgroupRoot -} - -// Gets the cgroupRoot. -func getCgroupRoot() (string, error) { - cgroupRootLock.Lock() - defer cgroupRootLock.Unlock() - - if cgroupRoot != "" { - return cgroupRoot, nil - } - - // fast path - cgroupRoot = tryDefaultCgroupRoot() - if cgroupRoot != "" { - return cgroupRoot, nil - } - - // slow path: parse mountinfo - mi, err := cgroups.GetCgroupMounts(false) - if err != nil { - return "", err - } - if len(mi) < 1 { - return "", errors.New("no cgroup mount found in mountinfo") - } - - // Get the first cgroup mount (e.g. "/sys/fs/cgroup/memory"), - // use its parent directory. - root := filepath.Dir(mi[0].Mountpoint) - - if _, err := os.Stat(root); err != nil { - return "", err - } - - cgroupRoot = root - return cgroupRoot, nil -} - -type cgroupData struct { - root string - innerPath string - config *configs.Cgroup - pid int -} - -// isIgnorableError returns whether err is a permission error (in the loose -// sense of the word). This includes EROFS (which for an unprivileged user is -// basically a permission error) and EACCES (for similar reasons) as well as -// the normal EPERM. -func isIgnorableError(rootless bool, err error) bool { - // We do not ignore errors if we are root. - if !rootless { - return false - } - // TODO: rm errors.Cause once we switch to %w everywhere - err = errors.Cause(err) - // Is it an ordinary EPERM? - if errors.Is(err, os.ErrPermission) { - return true - } - // Handle some specific syscall errors. - var errno unix.Errno - if errors.As(err, &errno) { - return errno == unix.EROFS || errno == unix.EPERM || errno == unix.EACCES - } - return false -} - -func (m *manager) Apply(pid int) (err error) { - if m.cgroups == nil { - return nil - } - m.mu.Lock() - defer m.mu.Unlock() - - c := m.cgroups - if c.Resources.Unified != nil { - return cgroups.ErrV1NoUnified - } - - m.paths = make(map[string]string) - if c.Paths != nil { - cgMap, err := cgroups.ParseCgroupFile("/proc/self/cgroup") - if err != nil { - return err - } - for name, path := range c.Paths { - // XXX(kolyshkin@): why this check is needed? - if _, ok := cgMap[name]; ok { - m.paths[name] = path - } - } - return cgroups.EnterPid(m.paths, pid) - } - - d, err := getCgroupData(m.cgroups, pid) - if err != nil { - return err - } - - for _, sys := range subsystems { - p, err := d.path(sys.Name()) - if err != nil { - // The non-presence of the devices subsystem is - // considered fatal for security reasons. - if cgroups.IsNotFound(err) && (c.SkipDevices || sys.Name() != "devices") { - continue - } - return err - } - m.paths[sys.Name()] = p - - if err := sys.Apply(p, d); err != nil { - // In the case of rootless (including euid=0 in userns), where an - // explicit cgroup path hasn't been set, we don't bail on error in - // case of permission problems. Cases where limits have been set - // (and we couldn't create our own cgroup) are handled by Set. - if isIgnorableError(m.rootless, err) && m.cgroups.Path == "" { - delete(m.paths, sys.Name()) - continue - } - return err - } - - } - return nil -} - -func (m *manager) Destroy() error { - if m.cgroups == nil || m.cgroups.Paths != nil { - return nil - } - m.mu.Lock() - defer m.mu.Unlock() - return cgroups.RemovePaths(m.paths) -} - -func (m *manager) Path(subsys string) string { - m.mu.Lock() - defer m.mu.Unlock() - return m.paths[subsys] -} - -func (m *manager) GetStats() (*cgroups.Stats, error) { - m.mu.Lock() - defer m.mu.Unlock() - stats := cgroups.NewStats() - for _, sys := range subsystems { - path := m.paths[sys.Name()] - if path == "" { - continue - } - if err := sys.GetStats(path, stats); err != nil { - return nil, err - } - } - return stats, nil -} - -func (m *manager) Set(r *configs.Resources) error { - if r == nil { - return nil - } - - // If Paths are set, then we are just joining cgroups paths - // and there is no need to set any values. - if m.cgroups != nil && m.cgroups.Paths != nil { - return nil - } - if r.Unified != nil { - return cgroups.ErrV1NoUnified - } - - m.mu.Lock() - defer m.mu.Unlock() - for _, sys := range subsystems { - path := m.paths[sys.Name()] - if err := sys.Set(path, r); err != nil { - if m.rootless && sys.Name() == "devices" { - continue - } - // When m.rootless is true, errors from the device subsystem are ignored because it is really not expected to work. - // However, errors from other subsystems are not ignored. - // see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" - if path == "" { - // We never created a path for this cgroup, so we cannot set - // limits for it (though we have already tried at this point). - return fmt.Errorf("cannot set %s limit: container could not join or create cgroup", sys.Name()) - } - return err - } - } - - return nil -} - -// Freeze toggles the container's freezer cgroup depending on the state -// provided -func (m *manager) Freeze(state configs.FreezerState) error { - path := m.Path("freezer") - if m.cgroups == nil || path == "" { - return errors.New("cannot toggle freezer: cgroups not configured for container") - } - - prevState := m.cgroups.Resources.Freezer - m.cgroups.Resources.Freezer = state - freezer := &FreezerGroup{} - if err := freezer.Set(path, m.cgroups.Resources); err != nil { - m.cgroups.Resources.Freezer = prevState - return err - } - return nil -} - -func (m *manager) GetPids() ([]int, error) { - return cgroups.GetPids(m.Path("devices")) -} - -func (m *manager) GetAllPids() ([]int, error) { - return cgroups.GetAllPids(m.Path("devices")) -} - -func getCgroupData(c *configs.Cgroup, pid int) (*cgroupData, error) { - root, err := getCgroupRoot() - if err != nil { - return nil, err - } - - if (c.Name != "" || c.Parent != "") && c.Path != "" { - return nil, errors.New("cgroup: either Path or Name and Parent should be used") - } - - // XXX: Do not remove this code. Path safety is important! -- cyphar - cgPath := libcontainerUtils.CleanPath(c.Path) - cgParent := libcontainerUtils.CleanPath(c.Parent) - cgName := libcontainerUtils.CleanPath(c.Name) - - innerPath := cgPath - if innerPath == "" { - innerPath = filepath.Join(cgParent, cgName) - } - - return &cgroupData{ - root: root, - innerPath: innerPath, - config: c, - pid: pid, - }, nil -} - -func (raw *cgroupData) path(subsystem string) (string, error) { - // If the cgroup name/path is absolute do not look relative to the cgroup of the init process. - if filepath.IsAbs(raw.innerPath) { - mnt, err := cgroups.FindCgroupMountpoint(raw.root, subsystem) - // If we didn't mount the subsystem, there is no point we make the path. - if err != nil { - return "", err - } - - // Sometimes subsystems can be mounted together as 'cpu,cpuacct'. - return filepath.Join(raw.root, filepath.Base(mnt), raw.innerPath), nil - } - - // Use GetOwnCgroupPath instead of GetInitCgroupPath, because the creating - // process could in container and shared pid namespace with host, and - // /proc/1/cgroup could point to whole other world of cgroups. - parentPath, err := cgroups.GetOwnCgroupPath(subsystem) - if err != nil { - return "", err - } - - return filepath.Join(parentPath, raw.innerPath), nil -} - -func join(path string, pid int) error { - if path == "" { - return nil - } - if err := os.MkdirAll(path, 0o755); err != nil { - return err - } - return cgroups.WriteCgroupProc(path, pid) -} - -func (m *manager) GetPaths() map[string]string { - m.mu.Lock() - defer m.mu.Unlock() - return m.paths -} - -func (m *manager) GetCgroups() (*configs.Cgroup, error) { - return m.cgroups, nil -} - -func (m *manager) GetFreezerState() (configs.FreezerState, error) { - dir := m.Path("freezer") - // If the container doesn't have the freezer cgroup, say it's undefined. - if dir == "" { - return configs.Undefined, nil - } - freezer := &FreezerGroup{} - return freezer.GetState(dir) -} - -func (m *manager) Exists() bool { - return cgroups.PathExists(m.Path("devices")) -} - -func OOMKillCount(path string) (uint64, error) { - return fscommon.GetValueByKey(path, "memory.oom_control", "oom_kill") -} - -func (m *manager) OOMKillCount() (uint64, error) { - c, err := OOMKillCount(m.Path("memory")) - // Ignore ENOENT when rootless as it couldn't create cgroup. - if err != nil && m.rootless && os.IsNotExist(err) { - err = nil - } - - return c, err -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go deleted file mode 100644 index 3cafc5399e..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/hugetlb.go +++ /dev/null @@ -1,65 +0,0 @@ -// +build linux - -package fs - -import ( - "fmt" - "strconv" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" - "github.com/opencontainers/runc/libcontainer/configs" -) - -type HugetlbGroup struct{} - -func (s *HugetlbGroup) Name() string { - return "hugetlb" -} - -func (s *HugetlbGroup) Apply(path string, d *cgroupData) error { - return join(path, d.pid) -} - -func (s *HugetlbGroup) Set(path string, r *configs.Resources) error { - for _, hugetlb := range r.HugetlbLimit { - if err := cgroups.WriteFile(path, "hugetlb."+hugetlb.Pagesize+".limit_in_bytes", strconv.FormatUint(hugetlb.Limit, 10)); err != nil { - return err - } - } - - return nil -} - -func (s *HugetlbGroup) GetStats(path string, stats *cgroups.Stats) error { - hugetlbStats := cgroups.HugetlbStats{} - if !cgroups.PathExists(path) { - return nil - } - for _, pageSize := range HugePageSizes { - usage := "hugetlb." + pageSize + ".usage_in_bytes" - value, err := fscommon.GetCgroupParamUint(path, usage) - if err != nil { - return fmt.Errorf("failed to parse %s - %v", usage, err) - } - hugetlbStats.Usage = value - - maxUsage := "hugetlb." + pageSize + ".max_usage_in_bytes" - value, err = fscommon.GetCgroupParamUint(path, maxUsage) - if err != nil { - return fmt.Errorf("failed to parse %s - %v", maxUsage, err) - } - hugetlbStats.MaxUsage = value - - failcnt := "hugetlb." + pageSize + ".failcnt" - value, err = fscommon.GetCgroupParamUint(path, failcnt) - if err != nil { - return fmt.Errorf("failed to parse %s - %v", failcnt, err) - } - hugetlbStats.Failcnt = value - - stats.HugetlbStats[pageSize] = hugetlbStats - } - - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go deleted file mode 100644 index 33946726f1..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/memory.go +++ /dev/null @@ -1,352 +0,0 @@ -// +build linux - -package fs - -import ( - "bufio" - "fmt" - "math" - "os" - "path/filepath" - "strconv" - "strings" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/pkg/errors" - "golang.org/x/sys/unix" -) - -const ( - cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes" - cgroupMemoryLimit = "memory.limit_in_bytes" - cgroupMemoryUsage = "memory.usage_in_bytes" - cgroupMemoryMaxUsage = "memory.max_usage_in_bytes" -) - -type MemoryGroup struct{} - -func (s *MemoryGroup) Name() string { - return "memory" -} - -func (s *MemoryGroup) Apply(path string, d *cgroupData) (err error) { - return join(path, d.pid) -} - -func setMemory(path string, val int64) error { - if val == 0 { - return nil - } - - err := cgroups.WriteFile(path, cgroupMemoryLimit, strconv.FormatInt(val, 10)) - if !errors.Is(err, unix.EBUSY) { - return err - } - - // EBUSY means the kernel can't set new limit as it's too low - // (lower than the current usage). Return more specific error. - usage, err := fscommon.GetCgroupParamUint(path, cgroupMemoryUsage) - if err != nil { - return err - } - max, err := fscommon.GetCgroupParamUint(path, cgroupMemoryMaxUsage) - if err != nil { - return err - } - - return errors.Errorf("unable to set memory limit to %d (current usage: %d, peak usage: %d)", val, usage, max) -} - -func setSwap(path string, val int64) error { - if val == 0 { - return nil - } - - return cgroups.WriteFile(path, cgroupMemorySwapLimit, strconv.FormatInt(val, 10)) -} - -func setMemoryAndSwap(path string, r *configs.Resources) error { - // If the memory update is set to -1 and the swap is not explicitly - // set, we should also set swap to -1, it means unlimited memory. - if r.Memory == -1 && r.MemorySwap == 0 { - // Only set swap if it's enabled in kernel - if cgroups.PathExists(filepath.Join(path, cgroupMemorySwapLimit)) { - r.MemorySwap = -1 - } - } - - // When memory and swap memory are both set, we need to handle the cases - // for updating container. - if r.Memory != 0 && r.MemorySwap != 0 { - curLimit, err := fscommon.GetCgroupParamUint(path, cgroupMemoryLimit) - if err != nil { - return err - } - - // When update memory limit, we should adapt the write sequence - // for memory and swap memory, so it won't fail because the new - // value and the old value don't fit kernel's validation. - if r.MemorySwap == -1 || curLimit < uint64(r.MemorySwap) { - if err := setSwap(path, r.MemorySwap); err != nil { - return err - } - if err := setMemory(path, r.Memory); err != nil { - return err - } - return nil - } - } - - if err := setMemory(path, r.Memory); err != nil { - return err - } - if err := setSwap(path, r.MemorySwap); err != nil { - return err - } - - return nil -} - -func (s *MemoryGroup) Set(path string, r *configs.Resources) error { - if err := setMemoryAndSwap(path, r); err != nil { - return err - } - - // ignore KernelMemory and KernelMemoryTCP - - if r.MemoryReservation != 0 { - if err := cgroups.WriteFile(path, "memory.soft_limit_in_bytes", strconv.FormatInt(r.MemoryReservation, 10)); err != nil { - return err - } - } - - if r.OomKillDisable { - if err := cgroups.WriteFile(path, "memory.oom_control", "1"); err != nil { - return err - } - } - if r.MemorySwappiness == nil || int64(*r.MemorySwappiness) == -1 { - return nil - } else if *r.MemorySwappiness <= 100 { - if err := cgroups.WriteFile(path, "memory.swappiness", strconv.FormatUint(*r.MemorySwappiness, 10)); err != nil { - return err - } - } else { - return fmt.Errorf("invalid value:%d. valid memory swappiness range is 0-100", *r.MemorySwappiness) - } - - return nil -} - -func (s *MemoryGroup) GetStats(path string, stats *cgroups.Stats) error { - // Set stats from memory.stat. - statsFile, err := cgroups.OpenFile(path, "memory.stat", os.O_RDONLY) - if err != nil { - if os.IsNotExist(err) { - return nil - } - return err - } - defer statsFile.Close() - - sc := bufio.NewScanner(statsFile) - for sc.Scan() { - t, v, err := fscommon.ParseKeyValue(sc.Text()) - if err != nil { - return fmt.Errorf("failed to parse memory.stat (%q) - %v", sc.Text(), err) - } - stats.MemoryStats.Stats[t] = v - } - stats.MemoryStats.Cache = stats.MemoryStats.Stats["cache"] - - memoryUsage, err := getMemoryData(path, "") - if err != nil { - return err - } - stats.MemoryStats.Usage = memoryUsage - swapUsage, err := getMemoryData(path, "memsw") - if err != nil { - return err - } - stats.MemoryStats.SwapUsage = swapUsage - kernelUsage, err := getMemoryData(path, "kmem") - if err != nil { - return err - } - stats.MemoryStats.KernelUsage = kernelUsage - kernelTCPUsage, err := getMemoryData(path, "kmem.tcp") - if err != nil { - return err - } - stats.MemoryStats.KernelTCPUsage = kernelTCPUsage - - value, err := fscommon.GetCgroupParamUint(path, "memory.use_hierarchy") - if err != nil { - return err - } - if value == 1 { - stats.MemoryStats.UseHierarchy = true - } - - pagesByNUMA, err := getPageUsageByNUMA(path) - if err != nil { - return err - } - stats.MemoryStats.PageUsageByNUMA = pagesByNUMA - - return nil -} - -func getMemoryData(path, name string) (cgroups.MemoryData, error) { - memoryData := cgroups.MemoryData{} - - moduleName := "memory" - if name != "" { - moduleName = "memory." + name - } - var ( - usage = moduleName + ".usage_in_bytes" - maxUsage = moduleName + ".max_usage_in_bytes" - failcnt = moduleName + ".failcnt" - limit = moduleName + ".limit_in_bytes" - ) - - value, err := fscommon.GetCgroupParamUint(path, usage) - if err != nil { - if name != "" && os.IsNotExist(err) { - // Ignore ENOENT as swap and kmem controllers - // are optional in the kernel. - return cgroups.MemoryData{}, nil - } - return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", usage, err) - } - memoryData.Usage = value - value, err = fscommon.GetCgroupParamUint(path, maxUsage) - if err != nil { - return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", maxUsage, err) - } - memoryData.MaxUsage = value - value, err = fscommon.GetCgroupParamUint(path, failcnt) - if err != nil { - return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", failcnt, err) - } - memoryData.Failcnt = value - value, err = fscommon.GetCgroupParamUint(path, limit) - if err != nil { - return cgroups.MemoryData{}, fmt.Errorf("failed to parse %s - %v", limit, err) - } - memoryData.Limit = value - - return memoryData, nil -} - -func getPageUsageByNUMA(cgroupPath string) (cgroups.PageUsageByNUMA, error) { - const ( - maxColumns = math.MaxUint8 + 1 - filename = "memory.numa_stat" - ) - stats := cgroups.PageUsageByNUMA{} - - file, err := cgroups.OpenFile(cgroupPath, filename, os.O_RDONLY) - if os.IsNotExist(err) { - return stats, nil - } else if err != nil { - return stats, err - } - defer file.Close() - - // File format is documented in linux/Documentation/cgroup-v1/memory.txt - // and it looks like this: - // - // total= N0= N1= ... - // file= N0= N1= ... - // anon= N0= N1= ... - // unevictable= N0= N1= ... - // hierarchical_= N0= N1= ... - - scanner := bufio.NewScanner(file) - for scanner.Scan() { - var field *cgroups.PageStats - - line := scanner.Text() - columns := strings.SplitN(line, " ", maxColumns) - for i, column := range columns { - byNode := strings.SplitN(column, "=", 2) - // Some custom kernels have non-standard fields, like - // numa_locality 0 0 0 0 0 0 0 0 0 0 - // numa_exectime 0 - if len(byNode) < 2 { - if i == 0 { - // Ignore/skip those. - break - } else { - // The first column was already validated, - // so be strict to the rest. - return stats, fmt.Errorf("malformed line %q in %s", - line, filename) - } - } - key, val := byNode[0], byNode[1] - if i == 0 { // First column: key is name, val is total. - field = getNUMAField(&stats, key) - if field == nil { // unknown field (new kernel?) - break - } - field.Total, err = strconv.ParseUint(val, 0, 64) - if err != nil { - return stats, err - } - field.Nodes = map[uint8]uint64{} - } else { // Subsequent columns: key is N, val is usage. - if len(key) < 2 || key[0] != 'N' { - // This is definitely an error. - return stats, fmt.Errorf("malformed line %q in %s", - line, filename) - } - - n, err := strconv.ParseUint(key[1:], 10, 8) - if err != nil { - return cgroups.PageUsageByNUMA{}, err - } - - usage, err := strconv.ParseUint(val, 10, 64) - if err != nil { - return cgroups.PageUsageByNUMA{}, err - } - - field.Nodes[uint8(n)] = usage - } - - } - } - err = scanner.Err() - if err != nil { - return cgroups.PageUsageByNUMA{}, err - } - - return stats, nil -} - -func getNUMAField(stats *cgroups.PageUsageByNUMA, name string) *cgroups.PageStats { - switch name { - case "total": - return &stats.Total - case "file": - return &stats.File - case "anon": - return &stats.Anon - case "unevictable": - return &stats.Unevictable - case "hierarchical_total": - return &stats.Hierarchical.Total - case "hierarchical_file": - return &stats.Hierarchical.File - case "hierarchical_anon": - return &stats.Hierarchical.Anon - case "hierarchical_unevictable": - return &stats.Hierarchical.Unevictable - } - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go deleted file mode 100644 index 94a94b5e8d..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/name.go +++ /dev/null @@ -1,33 +0,0 @@ -// +build linux - -package fs - -import ( - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/configs" -) - -type NameGroup struct { - GroupName string - Join bool -} - -func (s *NameGroup) Name() string { - return s.GroupName -} - -func (s *NameGroup) Apply(path string, d *cgroupData) error { - if s.Join { - // ignore errors if the named cgroup does not exist - _ = join(path, d.pid) - } - return nil -} - -func (s *NameGroup) Set(_ string, _ *configs.Resources) error { - return nil -} - -func (s *NameGroup) GetStats(path string, stats *cgroups.Stats) error { - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go deleted file mode 100644 index f2617aa444..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_cls.go +++ /dev/null @@ -1,34 +0,0 @@ -// +build linux - -package fs - -import ( - "strconv" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/configs" -) - -type NetClsGroup struct{} - -func (s *NetClsGroup) Name() string { - return "net_cls" -} - -func (s *NetClsGroup) Apply(path string, d *cgroupData) error { - return join(path, d.pid) -} - -func (s *NetClsGroup) Set(path string, r *configs.Resources) error { - if r.NetClsClassid != 0 { - if err := cgroups.WriteFile(path, "net_cls.classid", strconv.FormatUint(uint64(r.NetClsClassid), 10)); err != nil { - return err - } - } - - return nil -} - -func (s *NetClsGroup) GetStats(path string, stats *cgroups.Stats) error { - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go deleted file mode 100644 index d0ac5e66bb..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/net_prio.go +++ /dev/null @@ -1,32 +0,0 @@ -// +build linux - -package fs - -import ( - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/configs" -) - -type NetPrioGroup struct{} - -func (s *NetPrioGroup) Name() string { - return "net_prio" -} - -func (s *NetPrioGroup) Apply(path string, d *cgroupData) error { - return join(path, d.pid) -} - -func (s *NetPrioGroup) Set(path string, r *configs.Resources) error { - for _, prioMap := range r.NetPrioIfpriomap { - if err := cgroups.WriteFile(path, "net_prio.ifpriomap", prioMap.CgroupString()); err != nil { - return err - } - } - - return nil -} - -func (s *NetPrioGroup) GetStats(path string, stats *cgroups.Stats) error { - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go deleted file mode 100644 index 1a306fbe3f..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/perf_event.go +++ /dev/null @@ -1,26 +0,0 @@ -// +build linux - -package fs - -import ( - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/configs" -) - -type PerfEventGroup struct{} - -func (s *PerfEventGroup) Name() string { - return "perf_event" -} - -func (s *PerfEventGroup) Apply(path string, d *cgroupData) error { - return join(path, d.pid) -} - -func (s *PerfEventGroup) Set(_ string, _ *configs.Resources) error { - return nil -} - -func (s *PerfEventGroup) GetStats(path string, stats *cgroups.Stats) error { - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go deleted file mode 100644 index 1b08433c45..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/pids.go +++ /dev/null @@ -1,68 +0,0 @@ -// +build linux - -package fs - -import ( - "fmt" - "path/filepath" - "strconv" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" - "github.com/opencontainers/runc/libcontainer/configs" -) - -type PidsGroup struct{} - -func (s *PidsGroup) Name() string { - return "pids" -} - -func (s *PidsGroup) Apply(path string, d *cgroupData) error { - return join(path, d.pid) -} - -func (s *PidsGroup) Set(path string, r *configs.Resources) error { - if r.PidsLimit != 0 { - // "max" is the fallback value. - limit := "max" - - if r.PidsLimit > 0 { - limit = strconv.FormatInt(r.PidsLimit, 10) - } - - if err := cgroups.WriteFile(path, "pids.max", limit); err != nil { - return err - } - } - - return nil -} - -func (s *PidsGroup) GetStats(path string, stats *cgroups.Stats) error { - if !cgroups.PathExists(path) { - return nil - } - current, err := fscommon.GetCgroupParamUint(path, "pids.current") - if err != nil { - return fmt.Errorf("failed to parse pids.current - %s", err) - } - - maxString, err := fscommon.GetCgroupParamString(path, "pids.max") - if err != nil { - return fmt.Errorf("failed to parse pids.max - %s", err) - } - - // Default if pids.max == "max" is 0 -- which represents "no limit". - var max uint64 - if maxString != "max" { - max, err = fscommon.ParseUint(maxString, 10, 64) - if err != nil { - return fmt.Errorf("failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", maxString, filepath.Join(path, "pids.max")) - } - } - - stats.PidsStats.Current = current - stats.PidsStats.Limit = max - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/unsupported.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/unsupported.go deleted file mode 100644 index 3ef9e03158..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/unsupported.go +++ /dev/null @@ -1,3 +0,0 @@ -// +build !linux - -package fs diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go deleted file mode 100644 index 25c47c9617..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpu.go +++ /dev/null @@ -1,85 +0,0 @@ -// +build linux - -package fs2 - -import ( - "bufio" - "os" - "strconv" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" - "github.com/opencontainers/runc/libcontainer/configs" -) - -func isCpuSet(r *configs.Resources) bool { - return r.CpuWeight != 0 || r.CpuQuota != 0 || r.CpuPeriod != 0 -} - -func setCpu(dirPath string, r *configs.Resources) error { - if !isCpuSet(r) { - return nil - } - - // NOTE: .CpuShares is not used here. Conversion is the caller's responsibility. - if r.CpuWeight != 0 { - if err := cgroups.WriteFile(dirPath, "cpu.weight", strconv.FormatUint(r.CpuWeight, 10)); err != nil { - return err - } - } - - if r.CpuQuota != 0 || r.CpuPeriod != 0 { - str := "max" - if r.CpuQuota > 0 { - str = strconv.FormatInt(r.CpuQuota, 10) - } - period := r.CpuPeriod - if period == 0 { - // This default value is documented in - // https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html - period = 100000 - } - str += " " + strconv.FormatUint(period, 10) - if err := cgroups.WriteFile(dirPath, "cpu.max", str); err != nil { - return err - } - } - - return nil -} - -func statCpu(dirPath string, stats *cgroups.Stats) error { - f, err := cgroups.OpenFile(dirPath, "cpu.stat", os.O_RDONLY) - if err != nil { - return err - } - defer f.Close() - - sc := bufio.NewScanner(f) - for sc.Scan() { - t, v, err := fscommon.ParseKeyValue(sc.Text()) - if err != nil { - return err - } - switch t { - case "usage_usec": - stats.CpuStats.CpuUsage.TotalUsage = v * 1000 - - case "user_usec": - stats.CpuStats.CpuUsage.UsageInUsermode = v * 1000 - - case "system_usec": - stats.CpuStats.CpuUsage.UsageInKernelmode = v * 1000 - - case "nr_periods": - stats.CpuStats.ThrottlingData.Periods = v - - case "nr_throttled": - stats.CpuStats.ThrottlingData.ThrottledPeriods = v - - case "throttled_usec": - stats.CpuStats.ThrottlingData.ThrottledTime = v * 1000 - } - } - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go deleted file mode 100644 index da29d7f2bb..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/cpuset.go +++ /dev/null @@ -1,30 +0,0 @@ -// +build linux - -package fs2 - -import ( - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/configs" -) - -func isCpusetSet(r *configs.Resources) bool { - return r.CpusetCpus != "" || r.CpusetMems != "" -} - -func setCpuset(dirPath string, r *configs.Resources) error { - if !isCpusetSet(r) { - return nil - } - - if r.CpusetCpus != "" { - if err := cgroups.WriteFile(dirPath, "cpuset.cpus", r.CpusetCpus); err != nil { - return err - } - } - if r.CpusetMems != "" { - if err := cgroups.WriteFile(dirPath, "cpuset.mems", r.CpusetMems); err != nil { - return err - } - } - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go deleted file mode 100644 index 641123a4d8..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/create.go +++ /dev/null @@ -1,152 +0,0 @@ -package fs2 - -import ( - "fmt" - "os" - "path/filepath" - "strings" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/configs" -) - -func supportedControllers() (string, error) { - return cgroups.ReadFile(UnifiedMountpoint, "/cgroup.controllers") -} - -// needAnyControllers returns whether we enable some supported controllers or not, -// based on (1) controllers available and (2) resources that are being set. -// We don't check "pseudo" controllers such as -// "freezer" and "devices". -func needAnyControllers(r *configs.Resources) (bool, error) { - if r == nil { - return false, nil - } - - // list of all available controllers - content, err := supportedControllers() - if err != nil { - return false, err - } - avail := make(map[string]struct{}) - for _, ctr := range strings.Fields(content) { - avail[ctr] = struct{}{} - } - - // check whether the controller if available or not - have := func(controller string) bool { - _, ok := avail[controller] - return ok - } - - if isPidsSet(r) && have("pids") { - return true, nil - } - if isMemorySet(r) && have("memory") { - return true, nil - } - if isIoSet(r) && have("io") { - return true, nil - } - if isCpuSet(r) && have("cpu") { - return true, nil - } - if isCpusetSet(r) && have("cpuset") { - return true, nil - } - if isHugeTlbSet(r) && have("hugetlb") { - return true, nil - } - - return false, nil -} - -// containsDomainController returns whether the current config contains domain controller or not. -// Refer to: http://man7.org/linux/man-pages/man7/cgroups.7.html -// As at Linux 4.19, the following controllers are threaded: cpu, perf_event, and pids. -func containsDomainController(r *configs.Resources) bool { - return isMemorySet(r) || isIoSet(r) || isCpuSet(r) || isHugeTlbSet(r) -} - -// CreateCgroupPath creates cgroupv2 path, enabling all the supported controllers. -func CreateCgroupPath(path string, c *configs.Cgroup) (Err error) { - if !strings.HasPrefix(path, UnifiedMountpoint) { - return fmt.Errorf("invalid cgroup path %s", path) - } - - content, err := supportedControllers() - if err != nil { - return err - } - - const ( - cgTypeFile = "cgroup.type" - cgStCtlFile = "cgroup.subtree_control" - ) - ctrs := strings.Fields(content) - res := "+" + strings.Join(ctrs, " +") - - elements := strings.Split(path, "/") - elements = elements[3:] - current := "/sys/fs" - for i, e := range elements { - current = filepath.Join(current, e) - if i > 0 { - if err := os.Mkdir(current, 0o755); err != nil { - if !os.IsExist(err) { - return err - } - } else { - // If the directory was created, be sure it is not left around on errors. - current := current - defer func() { - if Err != nil { - os.Remove(current) - } - }() - } - cgType, _ := cgroups.ReadFile(current, cgTypeFile) - cgType = strings.TrimSpace(cgType) - switch cgType { - // If the cgroup is in an invalid mode (usually this means there's an internal - // process in the cgroup tree, because we created a cgroup under an - // already-populated-by-other-processes cgroup), then we have to error out if - // the user requested controllers which are not thread-aware. However, if all - // the controllers requested are thread-aware we can simply put the cgroup into - // threaded mode. - case "domain invalid": - if containsDomainController(c.Resources) { - return fmt.Errorf("cannot enter cgroupv2 %q with domain controllers -- it is in an invalid state", current) - } else { - // Not entirely correct (in theory we'd always want to be a domain -- - // since that means we're a properly delegated cgroup subtree) but in - // this case there's not much we can do and it's better than giving an - // error. - _ = cgroups.WriteFile(current, cgTypeFile, "threaded") - } - // If the cgroup is in (threaded) or (domain threaded) mode, we can only use thread-aware controllers - // (and you cannot usually take a cgroup out of threaded mode). - case "domain threaded": - fallthrough - case "threaded": - if containsDomainController(c.Resources) { - return fmt.Errorf("cannot enter cgroupv2 %q with domain controllers -- it is in %s mode", current, cgType) - } - } - } - // enable all supported controllers - if i < len(elements)-1 { - if err := cgroups.WriteFile(current, cgStCtlFile, res); err != nil { - // try write one by one - allCtrs := strings.Split(res, " ") - for _, ctr := range allCtrs { - _ = cgroups.WriteFile(current, cgStCtlFile, ctr) - } - } - // Some controllers might not be enabled when rootless or containerized, - // but we don't catch the error here. (Caught in setXXX() functions.) - } - } - - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go deleted file mode 100644 index ba81ce0b41..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/defaultpath.go +++ /dev/null @@ -1,103 +0,0 @@ -/* - Copyright The containerd Authors. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -*/ - -package fs2 - -import ( - "bufio" - "io" - "os" - "path/filepath" - "strings" - - "github.com/opencontainers/runc/libcontainer/configs" - libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" - "github.com/pkg/errors" -) - -const UnifiedMountpoint = "/sys/fs/cgroup" - -func defaultDirPath(c *configs.Cgroup) (string, error) { - if (c.Name != "" || c.Parent != "") && c.Path != "" { - return "", errors.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c) - } - if len(c.Paths) != 0 { - // never set by specconv - return "", errors.Errorf("cgroup: Paths is unsupported, use Path, got %+v", c) - } - - // XXX: Do not remove this code. Path safety is important! -- cyphar - cgPath := libcontainerUtils.CleanPath(c.Path) - cgParent := libcontainerUtils.CleanPath(c.Parent) - cgName := libcontainerUtils.CleanPath(c.Name) - - return _defaultDirPath(UnifiedMountpoint, cgPath, cgParent, cgName) -} - -func _defaultDirPath(root, cgPath, cgParent, cgName string) (string, error) { - if (cgName != "" || cgParent != "") && cgPath != "" { - return "", errors.New("cgroup: either Path or Name and Parent should be used") - } - innerPath := cgPath - if innerPath == "" { - innerPath = filepath.Join(cgParent, cgName) - } - if filepath.IsAbs(innerPath) { - return filepath.Join(root, innerPath), nil - } - - ownCgroup, err := parseCgroupFile("/proc/self/cgroup") - if err != nil { - return "", err - } - // The current user scope most probably has tasks in it already, - // making it impossible to enable controllers for its sub-cgroup. - // A parent cgroup (with no tasks in it) is what we need. - ownCgroup = filepath.Dir(ownCgroup) - - return filepath.Join(root, ownCgroup, innerPath), nil -} - -// parseCgroupFile parses /proc/PID/cgroup file and return string -func parseCgroupFile(path string) (string, error) { - f, err := os.Open(path) - if err != nil { - return "", err - } - defer f.Close() - return parseCgroupFromReader(f) -} - -func parseCgroupFromReader(r io.Reader) (string, error) { - s := bufio.NewScanner(r) - for s.Scan() { - var ( - text = s.Text() - parts = strings.SplitN(text, ":", 3) - ) - if len(parts) < 3 { - return "", errors.Errorf("invalid cgroup entry: %q", text) - } - // text is like "0::/user.slice/user-1001.slice/session-1.scope" - if parts[0] == "0" && parts[1] == "" { - return parts[2], nil - } - } - if err := s.Err(); err != nil { - return "", err - } - return "", errors.New("cgroup path not found") -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go deleted file mode 100644 index 7c501cad8a..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/devices.go +++ /dev/null @@ -1,76 +0,0 @@ -// +build linux - -package fs2 - -import ( - "github.com/opencontainers/runc/libcontainer/cgroups/ebpf" - "github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/devices" - "github.com/opencontainers/runc/libcontainer/userns" - - "github.com/pkg/errors" - "golang.org/x/sys/unix" -) - -func isRWM(perms devices.Permissions) bool { - var r, w, m bool - for _, perm := range perms { - switch perm { - case 'r': - r = true - case 'w': - w = true - case 'm': - m = true - } - } - return r && w && m -} - -// This is similar to the logic applied in crun for handling errors from bpf(2) -// . -func canSkipEBPFError(r *configs.Resources) bool { - // If we're running in a user namespace we can ignore eBPF rules because we - // usually cannot use bpf(2), as well as rootless containers usually don't - // have the necessary privileges to mknod(2) device inodes or access - // host-level instances (though ideally we would be blocking device access - // for rootless containers anyway). - if userns.RunningInUserNS() { - return true - } - - // We cannot ignore an eBPF load error if any rule if is a block rule or it - // doesn't permit all access modes. - // - // NOTE: This will sometimes trigger in cases where access modes are split - // between different rules but to handle this correctly would require - // using ".../libcontainer/cgroup/devices".Emulator. - for _, dev := range r.Devices { - if !dev.Allow || !isRWM(dev.Permissions) { - return false - } - } - return true -} - -func setDevices(dirPath string, r *configs.Resources) error { - if r.SkipDevices { - return nil - } - insts, license, err := devicefilter.DeviceFilter(r.Devices) - if err != nil { - return err - } - dirFD, err := unix.Open(dirPath, unix.O_DIRECTORY|unix.O_RDONLY, 0o600) - if err != nil { - return errors.Errorf("cannot get dir FD for %s", dirPath) - } - defer unix.Close(dirFD) - if _, err := ebpf.LoadAttachCgroupDeviceFilter(insts, license, dirFD); err != nil { - if !canSkipEBPFError(r) { - return err - } - } - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go deleted file mode 100644 index e901f7a07b..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/freezer.go +++ /dev/null @@ -1,129 +0,0 @@ -// +build linux - -package fs2 - -import ( - "bufio" - stdErrors "errors" - "fmt" - "os" - "strings" - "time" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/pkg/errors" - "golang.org/x/sys/unix" -) - -func setFreezer(dirPath string, state configs.FreezerState) error { - var stateStr string - switch state { - case configs.Undefined: - return nil - case configs.Frozen: - stateStr = "1" - case configs.Thawed: - stateStr = "0" - default: - return errors.Errorf("invalid freezer state %q requested", state) - } - - fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDWR) - if err != nil { - // We can ignore this request as long as the user didn't ask us to - // freeze the container (since without the freezer cgroup, that's a - // no-op). - if state != configs.Frozen { - return nil - } - return errors.Wrap(err, "freezer not supported") - } - defer fd.Close() - - if _, err := fd.WriteString(stateStr); err != nil { - return err - } - // Confirm that the cgroup did actually change states. - if actualState, err := readFreezer(dirPath, fd); err != nil { - return err - } else if actualState != state { - return errors.Errorf(`expected "cgroup.freeze" to be in state %q but was in %q`, state, actualState) - } - return nil -} - -func getFreezer(dirPath string) (configs.FreezerState, error) { - fd, err := cgroups.OpenFile(dirPath, "cgroup.freeze", unix.O_RDONLY) - if err != nil { - // If the kernel is too old, then we just treat the freezer as being in - // an "undefined" state. - if os.IsNotExist(err) || stdErrors.Is(err, unix.ENODEV) { - err = nil - } - return configs.Undefined, err - } - defer fd.Close() - - return readFreezer(dirPath, fd) -} - -func readFreezer(dirPath string, fd *os.File) (configs.FreezerState, error) { - if _, err := fd.Seek(0, 0); err != nil { - return configs.Undefined, err - } - state := make([]byte, 2) - if _, err := fd.Read(state); err != nil { - return configs.Undefined, err - } - switch string(state) { - case "0\n": - return configs.Thawed, nil - case "1\n": - return waitFrozen(dirPath) - default: - return configs.Undefined, errors.Errorf(`unknown "cgroup.freeze" state: %q`, state) - } -} - -// waitFrozen polls cgroup.events until it sees "frozen 1" in it. -func waitFrozen(dirPath string) (configs.FreezerState, error) { - fd, err := cgroups.OpenFile(dirPath, "cgroup.events", unix.O_RDONLY) - if err != nil { - return configs.Undefined, err - } - defer fd.Close() - - // XXX: Simple wait/read/retry is used here. An implementation - // based on poll(2) or inotify(7) is possible, but it makes the code - // much more complicated. Maybe address this later. - const ( - // Perform maxIter with waitTime in between iterations. - waitTime = 10 * time.Millisecond - maxIter = 1000 - ) - scanner := bufio.NewScanner(fd) - for i := 0; scanner.Scan(); { - if i == maxIter { - return configs.Undefined, fmt.Errorf("timeout of %s reached waiting for the cgroup to freeze", waitTime*maxIter) - } - line := scanner.Text() - val := strings.TrimPrefix(line, "frozen ") - if val != line { // got prefix - if val[0] == '1' { - return configs.Frozen, nil - } - - i++ - // wait, then re-read - time.Sleep(waitTime) - _, err := fd.Seek(0, 0) - if err != nil { - return configs.Undefined, err - } - } - } - // Should only reach here either on read error, - // or if the file does not contain "frozen " line. - return configs.Undefined, scanner.Err() -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go deleted file mode 100644 index afba0ab1c8..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go +++ /dev/null @@ -1,251 +0,0 @@ -// +build linux - -package fs2 - -import ( - "fmt" - "os" - "strings" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/pkg/errors" -) - -type manager struct { - config *configs.Cgroup - // dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope" - dirPath string - // controllers is content of "cgroup.controllers" file. - // excludes pseudo-controllers ("devices" and "freezer"). - controllers map[string]struct{} - rootless bool -} - -// NewManager creates a manager for cgroup v2 unified hierarchy. -// dirPath is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope". -// If dirPath is empty, it is automatically set using config. -func NewManager(config *configs.Cgroup, dirPath string, rootless bool) (cgroups.Manager, error) { - if config == nil { - config = &configs.Cgroup{} - } - if dirPath == "" { - var err error - dirPath, err = defaultDirPath(config) - if err != nil { - return nil, err - } - } - - m := &manager{ - config: config, - dirPath: dirPath, - rootless: rootless, - } - return m, nil -} - -func (m *manager) getControllers() error { - if m.controllers != nil { - return nil - } - - data, err := cgroups.ReadFile(m.dirPath, "cgroup.controllers") - if err != nil { - if m.rootless && m.config.Path == "" { - return nil - } - return err - } - fields := strings.Fields(data) - m.controllers = make(map[string]struct{}, len(fields)) - for _, c := range fields { - m.controllers[c] = struct{}{} - } - - return nil -} - -func (m *manager) Apply(pid int) error { - if err := CreateCgroupPath(m.dirPath, m.config); err != nil { - // Related tests: - // - "runc create (no limits + no cgrouppath + no permission) succeeds" - // - "runc create (rootless + no limits + cgrouppath + no permission) fails with permission error" - // - "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" - if m.rootless { - if m.config.Path == "" { - if blNeed, nErr := needAnyControllers(m.config.Resources); nErr == nil && !blNeed { - return nil - } - return errors.Wrap(err, "rootless needs no limits + no cgrouppath when no permission is granted for cgroups") - } - } - return err - } - if err := cgroups.WriteCgroupProc(m.dirPath, pid); err != nil { - return err - } - return nil -} - -func (m *manager) GetPids() ([]int, error) { - return cgroups.GetPids(m.dirPath) -} - -func (m *manager) GetAllPids() ([]int, error) { - return cgroups.GetAllPids(m.dirPath) -} - -func (m *manager) GetStats() (*cgroups.Stats, error) { - var errs []error - - st := cgroups.NewStats() - - // pids (since kernel 4.5) - if err := statPids(m.dirPath, st); err != nil { - errs = append(errs, err) - } - // memory (since kernel 4.5) - if err := statMemory(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) - } - // io (since kernel 4.5) - if err := statIo(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) - } - // cpu (since kernel 4.15) - // Note cpu.stat is available even if the controller is not enabled. - if err := statCpu(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) - } - // hugetlb (since kernel 5.6) - if err := statHugeTlb(m.dirPath, st); err != nil && !os.IsNotExist(err) { - errs = append(errs, err) - } - if len(errs) > 0 && !m.rootless { - return st, errors.Errorf("error while statting cgroup v2: %+v", errs) - } - return st, nil -} - -func (m *manager) Freeze(state configs.FreezerState) error { - if err := setFreezer(m.dirPath, state); err != nil { - return err - } - m.config.Resources.Freezer = state - return nil -} - -func (m *manager) Destroy() error { - return cgroups.RemovePath(m.dirPath) -} - -func (m *manager) Path(_ string) string { - return m.dirPath -} - -func (m *manager) Set(r *configs.Resources) error { - if err := m.getControllers(); err != nil { - return err - } - // pids (since kernel 4.5) - if err := setPids(m.dirPath, r); err != nil { - return err - } - // memory (since kernel 4.5) - if err := setMemory(m.dirPath, r); err != nil { - return err - } - // io (since kernel 4.5) - if err := setIo(m.dirPath, r); err != nil { - return err - } - // cpu (since kernel 4.15) - if err := setCpu(m.dirPath, r); err != nil { - return err - } - // devices (since kernel 4.15, pseudo-controller) - // - // When m.rootless is true, errors from the device subsystem are ignored because it is really not expected to work. - // However, errors from other subsystems are not ignored. - // see @test "runc create (rootless + limits + no cgrouppath + no permission) fails with informative error" - if err := setDevices(m.dirPath, r); err != nil && !m.rootless { - return err - } - // cpuset (since kernel 5.0) - if err := setCpuset(m.dirPath, r); err != nil { - return err - } - // hugetlb (since kernel 5.6) - if err := setHugeTlb(m.dirPath, r); err != nil { - return err - } - // freezer (since kernel 5.2, pseudo-controller) - if err := setFreezer(m.dirPath, r.Freezer); err != nil { - return err - } - if err := m.setUnified(r.Unified); err != nil { - return err - } - m.config.Resources = r - return nil -} - -func (m *manager) setUnified(res map[string]string) error { - for k, v := range res { - if strings.Contains(k, "/") { - return fmt.Errorf("unified resource %q must be a file name (no slashes)", k) - } - if err := cgroups.WriteFile(m.dirPath, k, v); err != nil { - errC := errors.Cause(err) - // Check for both EPERM and ENOENT since O_CREAT is used by WriteFile. - if errors.Is(errC, os.ErrPermission) || errors.Is(errC, os.ErrNotExist) { - // Check if a controller is available, - // to give more specific error if not. - sk := strings.SplitN(k, ".", 2) - if len(sk) != 2 { - return fmt.Errorf("unified resource %q must be in the form CONTROLLER.PARAMETER", k) - } - c := sk[0] - if _, ok := m.controllers[c]; !ok && c != "cgroup" { - return fmt.Errorf("unified resource %q can't be set: controller %q not available", k, c) - } - } - return errors.Wrapf(err, "can't set unified resource %q", k) - } - } - - return nil -} - -func (m *manager) GetPaths() map[string]string { - paths := make(map[string]string, 1) - paths[""] = m.dirPath - return paths -} - -func (m *manager) GetCgroups() (*configs.Cgroup, error) { - return m.config, nil -} - -func (m *manager) GetFreezerState() (configs.FreezerState, error) { - return getFreezer(m.dirPath) -} - -func (m *manager) Exists() bool { - return cgroups.PathExists(m.dirPath) -} - -func OOMKillCount(path string) (uint64, error) { - return fscommon.GetValueByKey(path, "memory.events", "oom_kill") -} - -func (m *manager) OOMKillCount() (uint64, error) { - c, err := OOMKillCount(m.dirPath) - if err != nil && m.rootless && os.IsNotExist(err) { - err = nil - } - - return c, err -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go deleted file mode 100644 index 3f513975bd..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/hugetlb.go +++ /dev/null @@ -1,57 +0,0 @@ -// +build linux - -package fs2 - -import ( - "strconv" - - "github.com/pkg/errors" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" - "github.com/opencontainers/runc/libcontainer/configs" -) - -func isHugeTlbSet(r *configs.Resources) bool { - return len(r.HugetlbLimit) > 0 -} - -func setHugeTlb(dirPath string, r *configs.Resources) error { - if !isHugeTlbSet(r) { - return nil - } - for _, hugetlb := range r.HugetlbLimit { - if err := cgroups.WriteFile(dirPath, "hugetlb."+hugetlb.Pagesize+".max", strconv.FormatUint(hugetlb.Limit, 10)); err != nil { - return err - } - } - - return nil -} - -func statHugeTlb(dirPath string, stats *cgroups.Stats) error { - hugePageSizes, err := cgroups.GetHugePageSize() - if err != nil { - return errors.Wrap(err, "failed to fetch hugetlb info") - } - hugetlbStats := cgroups.HugetlbStats{} - - for _, pagesize := range hugePageSizes { - value, err := fscommon.GetCgroupParamUint(dirPath, "hugetlb."+pagesize+".current") - if err != nil { - return err - } - hugetlbStats.Usage = value - - fileName := "hugetlb." + pagesize + ".events" - value, err = fscommon.GetValueByKey(dirPath, fileName, "max") - if err != nil { - return errors.Wrap(err, "failed to read stats") - } - hugetlbStats.Failcnt = value - - stats.HugetlbStats[pagesize] = hugetlbStats - } - - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go deleted file mode 100644 index fd3f0993ea..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/io.go +++ /dev/null @@ -1,194 +0,0 @@ -// +build linux - -package fs2 - -import ( - "bufio" - "bytes" - "fmt" - "os" - "strconv" - "strings" - - "github.com/sirupsen/logrus" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/configs" -) - -func isIoSet(r *configs.Resources) bool { - return r.BlkioWeight != 0 || - len(r.BlkioWeightDevice) > 0 || - len(r.BlkioThrottleReadBpsDevice) > 0 || - len(r.BlkioThrottleWriteBpsDevice) > 0 || - len(r.BlkioThrottleReadIOPSDevice) > 0 || - len(r.BlkioThrottleWriteIOPSDevice) > 0 -} - -// bfqDeviceWeightSupported checks for per-device BFQ weight support (added -// in kernel v5.4, commit 795fe54c2a8) by reading from "io.bfq.weight". -func bfqDeviceWeightSupported(bfq *os.File) bool { - if bfq == nil { - return false - } - _, _ = bfq.Seek(0, 0) - buf := make([]byte, 32) - _, _ = bfq.Read(buf) - // If only a single number (default weight) if read back, we have older kernel. - _, err := strconv.ParseInt(string(bytes.TrimSpace(buf)), 10, 64) - return err != nil -} - -func setIo(dirPath string, r *configs.Resources) error { - if !isIoSet(r) { - return nil - } - - // If BFQ IO scheduler is available, use it. - var bfq *os.File - if r.BlkioWeight != 0 || len(r.BlkioWeightDevice) > 0 { - var err error - bfq, err = cgroups.OpenFile(dirPath, "io.bfq.weight", os.O_RDWR) - if err == nil { - defer bfq.Close() - } else if !os.IsNotExist(err) { - return err - } - } - - if r.BlkioWeight != 0 { - if bfq != nil { // Use BFQ. - if _, err := bfq.WriteString(strconv.FormatUint(uint64(r.BlkioWeight), 10)); err != nil { - return err - } - } else { - // Fallback to io.weight with a conversion scheme. - v := cgroups.ConvertBlkIOToIOWeightValue(r.BlkioWeight) - if err := cgroups.WriteFile(dirPath, "io.weight", strconv.FormatUint(v, 10)); err != nil { - return err - } - } - } - if bfqDeviceWeightSupported(bfq) { - for _, wd := range r.BlkioWeightDevice { - if _, err := bfq.WriteString(wd.WeightString() + "\n"); err != nil { - return fmt.Errorf("setting device weight %q: %w", wd.WeightString(), err) - } - } - } - for _, td := range r.BlkioThrottleReadBpsDevice { - if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("rbps")); err != nil { - return err - } - } - for _, td := range r.BlkioThrottleWriteBpsDevice { - if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wbps")); err != nil { - return err - } - } - for _, td := range r.BlkioThrottleReadIOPSDevice { - if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("riops")); err != nil { - return err - } - } - for _, td := range r.BlkioThrottleWriteIOPSDevice { - if err := cgroups.WriteFile(dirPath, "io.max", td.StringName("wiops")); err != nil { - return err - } - } - - return nil -} - -func readCgroup2MapFile(dirPath string, name string) (map[string][]string, error) { - ret := map[string][]string{} - f, err := cgroups.OpenFile(dirPath, name, os.O_RDONLY) - if err != nil { - return nil, err - } - defer f.Close() - scanner := bufio.NewScanner(f) - for scanner.Scan() { - line := scanner.Text() - parts := strings.Fields(line) - if len(parts) < 2 { - continue - } - ret[parts[0]] = parts[1:] - } - if err := scanner.Err(); err != nil { - return nil, err - } - return ret, nil -} - -func statIo(dirPath string, stats *cgroups.Stats) error { - values, err := readCgroup2MapFile(dirPath, "io.stat") - if err != nil { - return err - } - // more details on the io.stat file format: https://www.kernel.org/doc/Documentation/cgroup-v2.txt - var parsedStats cgroups.BlkioStats - for k, v := range values { - d := strings.Split(k, ":") - if len(d) != 2 { - continue - } - major, err := strconv.ParseUint(d[0], 10, 64) - if err != nil { - return err - } - minor, err := strconv.ParseUint(d[1], 10, 64) - if err != nil { - return err - } - - for _, item := range v { - d := strings.Split(item, "=") - if len(d) != 2 { - continue - } - op := d[0] - - // Map to the cgroupv1 naming and layout (in separate tables). - var targetTable *[]cgroups.BlkioStatEntry - switch op { - // Equivalent to cgroupv1's blkio.io_service_bytes. - case "rbytes": - op = "Read" - targetTable = &parsedStats.IoServiceBytesRecursive - case "wbytes": - op = "Write" - targetTable = &parsedStats.IoServiceBytesRecursive - // Equivalent to cgroupv1's blkio.io_serviced. - case "rios": - op = "Read" - targetTable = &parsedStats.IoServicedRecursive - case "wios": - op = "Write" - targetTable = &parsedStats.IoServicedRecursive - default: - // Skip over entries we cannot map to cgroupv1 stats for now. - // In the future we should expand the stats struct to include - // them. - logrus.Debugf("cgroupv2 io stats: skipping over unmappable %s entry", item) - continue - } - - value, err := strconv.ParseUint(d[1], 10, 64) - if err != nil { - return err - } - - entry := cgroups.BlkioStatEntry{ - Op: op, - Major: major, - Minor: minor, - Value: value, - } - *targetTable = append(*targetTable, entry) - } - } - stats.BlkioStats = parsedStats - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go deleted file mode 100644 index 53e8f1e934..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go +++ /dev/null @@ -1,213 +0,0 @@ -// +build linux - -package fs2 - -import ( - "bufio" - "math" - "os" - "strconv" - "strings" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/pkg/errors" - "golang.org/x/sys/unix" -) - -// numToStr converts an int64 value to a string for writing to a -// cgroupv2 files with .min, .max, .low, or .high suffix. -// The value of -1 is converted to "max" for cgroupv1 compatibility -// (which used to write -1 to remove the limit). -func numToStr(value int64) (ret string) { - switch { - case value == 0: - ret = "" - case value == -1: - ret = "max" - default: - ret = strconv.FormatInt(value, 10) - } - - return ret -} - -func isMemorySet(r *configs.Resources) bool { - return r.MemoryReservation != 0 || r.Memory != 0 || r.MemorySwap != 0 -} - -func setMemory(dirPath string, r *configs.Resources) error { - if !isMemorySet(r) { - return nil - } - swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory) - if err != nil { - return err - } - swapStr := numToStr(swap) - if swapStr == "" && swap == 0 && r.MemorySwap > 0 { - // memory and memorySwap set to the same value -- disable swap - swapStr = "0" - } - // never write empty string to `memory.swap.max`, it means set to 0. - if swapStr != "" { - if err := cgroups.WriteFile(dirPath, "memory.swap.max", swapStr); err != nil { - return err - } - } - - if val := numToStr(r.Memory); val != "" { - if err := cgroups.WriteFile(dirPath, "memory.max", val); err != nil { - return err - } - } - - // cgroup.Resources.KernelMemory is ignored - - if val := numToStr(r.MemoryReservation); val != "" { - if err := cgroups.WriteFile(dirPath, "memory.low", val); err != nil { - return err - } - } - - return nil -} - -func statMemory(dirPath string, stats *cgroups.Stats) error { - // Set stats from memory.stat. - statsFile, err := cgroups.OpenFile(dirPath, "memory.stat", os.O_RDONLY) - if err != nil { - return err - } - defer statsFile.Close() - - sc := bufio.NewScanner(statsFile) - for sc.Scan() { - t, v, err := fscommon.ParseKeyValue(sc.Text()) - if err != nil { - return errors.Wrapf(err, "failed to parse memory.stat (%q)", sc.Text()) - } - stats.MemoryStats.Stats[t] = v - } - stats.MemoryStats.Cache = stats.MemoryStats.Stats["file"] - // Unlike cgroup v1 which has memory.use_hierarchy binary knob, - // cgroup v2 is always hierarchical. - stats.MemoryStats.UseHierarchy = true - - memoryUsage, err := getMemoryDataV2(dirPath, "") - if err != nil { - if errors.Is(err, unix.ENOENT) && dirPath == UnifiedMountpoint { - // The root cgroup does not have memory.{current,max} - // so emulate those using data from /proc/meminfo. - return statsFromMeminfo(stats) - } - return err - } - stats.MemoryStats.Usage = memoryUsage - swapUsage, err := getMemoryDataV2(dirPath, "swap") - if err != nil { - return err - } - // As cgroup v1 reports SwapUsage values as mem+swap combined, - // while in cgroup v2 swap values do not include memory, - // report combined mem+swap for v1 compatibility. - swapUsage.Usage += memoryUsage.Usage - if swapUsage.Limit != math.MaxUint64 { - swapUsage.Limit += memoryUsage.Limit - } - stats.MemoryStats.SwapUsage = swapUsage - - return nil -} - -func getMemoryDataV2(path, name string) (cgroups.MemoryData, error) { - memoryData := cgroups.MemoryData{} - - moduleName := "memory" - if name != "" { - moduleName = "memory." + name - } - usage := moduleName + ".current" - limit := moduleName + ".max" - - value, err := fscommon.GetCgroupParamUint(path, usage) - if err != nil { - if name != "" && os.IsNotExist(err) { - // Ignore EEXIST as there's no swap accounting - // if kernel CONFIG_MEMCG_SWAP is not set or - // swapaccount=0 kernel boot parameter is given. - return cgroups.MemoryData{}, nil - } - return cgroups.MemoryData{}, errors.Wrapf(err, "failed to parse %s", usage) - } - memoryData.Usage = value - - value, err = fscommon.GetCgroupParamUint(path, limit) - if err != nil { - return cgroups.MemoryData{}, errors.Wrapf(err, "failed to parse %s", limit) - } - memoryData.Limit = value - - return memoryData, nil -} - -func statsFromMeminfo(stats *cgroups.Stats) error { - f, err := os.Open("/proc/meminfo") - if err != nil { - return err - } - defer f.Close() - - // Fields we are interested in. - var ( - swap_free uint64 - swap_total uint64 - main_total uint64 - main_free uint64 - ) - mem := map[string]*uint64{ - "SwapFree": &swap_free, - "SwapTotal": &swap_total, - "MemTotal": &main_total, - "MemFree": &main_free, - } - - found := 0 - sc := bufio.NewScanner(f) - for sc.Scan() { - parts := strings.SplitN(sc.Text(), ":", 3) - if len(parts) != 2 { - // Should not happen. - continue - } - k := parts[0] - p, ok := mem[k] - if !ok { - // Unknown field -- not interested. - continue - } - vStr := strings.TrimSpace(strings.TrimSuffix(parts[1], " kB")) - *p, err = strconv.ParseUint(vStr, 10, 64) - if err != nil { - return errors.Wrap(err, "parsing /proc/meminfo "+k) - } - - found++ - if found == len(mem) { - // Got everything we need -- skip the rest. - break - } - } - if sc.Err() != nil { - return sc.Err() - } - - stats.MemoryStats.SwapUsage.Usage = (swap_total - swap_free) * 1024 - stats.MemoryStats.SwapUsage.Limit = math.MaxUint64 - - stats.MemoryStats.Usage.Usage = (main_total - main_free) * 1024 - stats.MemoryStats.Usage.Limit = math.MaxUint64 - - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go deleted file mode 100644 index e2050002d0..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/pids.go +++ /dev/null @@ -1,77 +0,0 @@ -// +build linux - -package fs2 - -import ( - "os" - "path/filepath" - "strings" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/pkg/errors" - "golang.org/x/sys/unix" -) - -func isPidsSet(r *configs.Resources) bool { - return r.PidsLimit != 0 -} - -func setPids(dirPath string, r *configs.Resources) error { - if !isPidsSet(r) { - return nil - } - if val := numToStr(r.PidsLimit); val != "" { - if err := cgroups.WriteFile(dirPath, "pids.max", val); err != nil { - return err - } - } - - return nil -} - -func statPidsFromCgroupProcs(dirPath string, stats *cgroups.Stats) error { - // if the controller is not enabled, let's read PIDS from cgroups.procs - // (or threads if cgroup.threads is enabled) - contents, err := cgroups.ReadFile(dirPath, "cgroup.procs") - if errors.Is(err, unix.ENOTSUP) { - contents, err = cgroups.ReadFile(dirPath, "cgroup.threads") - } - if err != nil { - return err - } - pids := strings.Count(contents, "\n") - stats.PidsStats.Current = uint64(pids) - stats.PidsStats.Limit = 0 - return nil -} - -func statPids(dirPath string, stats *cgroups.Stats) error { - current, err := fscommon.GetCgroupParamUint(dirPath, "pids.current") - if err != nil { - if os.IsNotExist(err) { - return statPidsFromCgroupProcs(dirPath, stats) - } - return errors.Wrap(err, "failed to parse pids.current") - } - - maxString, err := fscommon.GetCgroupParamString(dirPath, "pids.max") - if err != nil { - return errors.Wrap(err, "failed to parse pids.max") - } - - // Default if pids.max == "max" is 0 -- which represents "no limit". - var max uint64 - if maxString != "max" { - max, err = fscommon.ParseUint(maxString, 10, 64) - if err != nil { - return errors.Wrapf(err, "failed to parse pids.max - unable to parse %q as a uint from Cgroup file %q", - maxString, filepath.Join(dirPath, "pids.max")) - } - } - - stats.PidsStats.Current = current - stats.PidsStats.Limit = max - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go deleted file mode 100644 index e31146ae9d..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fscommon/utils.go +++ /dev/null @@ -1,131 +0,0 @@ -// +build linux - -package fscommon - -import ( - "errors" - "fmt" - "math" - "strconv" - "strings" - - "github.com/opencontainers/runc/libcontainer/cgroups" -) - -var ( - ErrNotValidFormat = errors.New("line is not a valid key value format") - - // Deprecated: use cgroups.OpenFile instead. - OpenFile = cgroups.OpenFile - // Deprecated: use cgroups.ReadFile instead. - ReadFile = cgroups.ReadFile - // Deprecated: use cgroups.WriteFile instead. - WriteFile = cgroups.WriteFile -) - -// ParseUint converts a string to an uint64 integer. -// Negative values are returned at zero as, due to kernel bugs, -// some of the memory cgroup stats can be negative. -func ParseUint(s string, base, bitSize int) (uint64, error) { - value, err := strconv.ParseUint(s, base, bitSize) - if err != nil { - intValue, intErr := strconv.ParseInt(s, base, bitSize) - // 1. Handle negative values greater than MinInt64 (and) - // 2. Handle negative values lesser than MinInt64 - if intErr == nil && intValue < 0 { - return 0, nil - } else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 { - return 0, nil - } - - return value, err - } - - return value, nil -} - -// ParseKeyValue parses a space-separated "name value" kind of cgroup -// parameter and returns its key as a string, and its value as uint64 -// (ParseUint is used to convert the value). For example, -// "io_service_bytes 1234" will be returned as "io_service_bytes", 1234. -func ParseKeyValue(t string) (string, uint64, error) { - parts := strings.SplitN(t, " ", 3) - if len(parts) != 2 { - return "", 0, fmt.Errorf("line %q is not in key value format", t) - } - - value, err := ParseUint(parts[1], 10, 64) - if err != nil { - return "", 0, fmt.Errorf("unable to convert to uint64: %v", err) - } - - return parts[0], value, nil -} - -// GetValueByKey reads a key-value pairs from the specified cgroup file, -// and returns a value of the specified key. ParseUint is used for value -// conversion. -func GetValueByKey(path, file, key string) (uint64, error) { - content, err := cgroups.ReadFile(path, file) - if err != nil { - return 0, err - } - - lines := strings.Split(string(content), "\n") - for _, line := range lines { - arr := strings.Split(line, " ") - if len(arr) == 2 && arr[0] == key { - return ParseUint(arr[1], 10, 64) - } - } - - return 0, nil -} - -// GetCgroupParamUint reads a single uint64 value from the specified cgroup file. -// If the value read is "max", the math.MaxUint64 is returned. -func GetCgroupParamUint(path, file string) (uint64, error) { - contents, err := GetCgroupParamString(path, file) - if err != nil { - return 0, err - } - contents = strings.TrimSpace(contents) - if contents == "max" { - return math.MaxUint64, nil - } - - res, err := ParseUint(contents, 10, 64) - if err != nil { - return res, fmt.Errorf("unable to parse file %q", path+"/"+file) - } - return res, nil -} - -// GetCgroupParamInt reads a single int64 value from specified cgroup file. -// If the value read is "max", the math.MaxInt64 is returned. -func GetCgroupParamInt(path, file string) (int64, error) { - contents, err := cgroups.ReadFile(path, file) - if err != nil { - return 0, err - } - contents = strings.TrimSpace(contents) - if contents == "max" { - return math.MaxInt64, nil - } - - res, err := strconv.ParseInt(contents, 10, 64) - if err != nil { - return res, fmt.Errorf("unable to parse %q as a int from Cgroup file %q", contents, path+"/"+file) - } - return res, nil -} - -// GetCgroupParamString reads a string from the specified cgroup file. -func GetCgroupParamString(path, file string) (string, error) { - contents, err := cgroups.ReadFile(path, file) - if err != nil { - return "", err - } - - return strings.TrimSpace(contents), nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go deleted file mode 100644 index e7f9c46263..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/stats.go +++ /dev/null @@ -1,163 +0,0 @@ -// +build linux - -package cgroups - -type ThrottlingData struct { - // Number of periods with throttling active - Periods uint64 `json:"periods,omitempty"` - // Number of periods when the container hit its throttling limit. - ThrottledPeriods uint64 `json:"throttled_periods,omitempty"` - // Aggregate time the container was throttled for in nanoseconds. - ThrottledTime uint64 `json:"throttled_time,omitempty"` -} - -// CpuUsage denotes the usage of a CPU. -// All CPU stats are aggregate since container inception. -type CpuUsage struct { - // Total CPU time consumed. - // Units: nanoseconds. - TotalUsage uint64 `json:"total_usage,omitempty"` - // Total CPU time consumed per core. - // Units: nanoseconds. - PercpuUsage []uint64 `json:"percpu_usage,omitempty"` - // CPU time consumed per core in kernel mode - // Units: nanoseconds. - PercpuUsageInKernelmode []uint64 `json:"percpu_usage_in_kernelmode"` - // CPU time consumed per core in user mode - // Units: nanoseconds. - PercpuUsageInUsermode []uint64 `json:"percpu_usage_in_usermode"` - // Time spent by tasks of the cgroup in kernel mode. - // Units: nanoseconds. - UsageInKernelmode uint64 `json:"usage_in_kernelmode"` - // Time spent by tasks of the cgroup in user mode. - // Units: nanoseconds. - UsageInUsermode uint64 `json:"usage_in_usermode"` -} - -type CpuStats struct { - CpuUsage CpuUsage `json:"cpu_usage,omitempty"` - ThrottlingData ThrottlingData `json:"throttling_data,omitempty"` -} - -type CPUSetStats struct { - // List of the physical numbers of the CPUs on which processes - // in that cpuset are allowed to execute - CPUs []uint16 `json:"cpus,omitempty"` - // cpu_exclusive flag - CPUExclusive uint64 `json:"cpu_exclusive"` - // List of memory nodes on which processes in that cpuset - // are allowed to allocate memory - Mems []uint16 `json:"mems,omitempty"` - // mem_hardwall flag - MemHardwall uint64 `json:"mem_hardwall"` - // mem_exclusive flag - MemExclusive uint64 `json:"mem_exclusive"` - // memory_migrate flag - MemoryMigrate uint64 `json:"memory_migrate"` - // memory_spread page flag - MemorySpreadPage uint64 `json:"memory_spread_page"` - // memory_spread slab flag - MemorySpreadSlab uint64 `json:"memory_spread_slab"` - // memory_pressure - MemoryPressure uint64 `json:"memory_pressure"` - // sched_load balance flag - SchedLoadBalance uint64 `json:"sched_load_balance"` - // sched_relax_domain_level - SchedRelaxDomainLevel int64 `json:"sched_relax_domain_level"` -} - -type MemoryData struct { - Usage uint64 `json:"usage,omitempty"` - MaxUsage uint64 `json:"max_usage,omitempty"` - Failcnt uint64 `json:"failcnt"` - Limit uint64 `json:"limit"` -} - -type MemoryStats struct { - // memory used for cache - Cache uint64 `json:"cache,omitempty"` - // usage of memory - Usage MemoryData `json:"usage,omitempty"` - // usage of memory + swap - SwapUsage MemoryData `json:"swap_usage,omitempty"` - // usage of kernel memory - KernelUsage MemoryData `json:"kernel_usage,omitempty"` - // usage of kernel TCP memory - KernelTCPUsage MemoryData `json:"kernel_tcp_usage,omitempty"` - // usage of memory pages by NUMA node - // see chapter 5.6 of memory controller documentation - PageUsageByNUMA PageUsageByNUMA `json:"page_usage_by_numa,omitempty"` - // if true, memory usage is accounted for throughout a hierarchy of cgroups. - UseHierarchy bool `json:"use_hierarchy"` - - Stats map[string]uint64 `json:"stats,omitempty"` -} - -type PageUsageByNUMA struct { - // Embedding is used as types can't be recursive. - PageUsageByNUMAInner - Hierarchical PageUsageByNUMAInner `json:"hierarchical,omitempty"` -} - -type PageUsageByNUMAInner struct { - Total PageStats `json:"total,omitempty"` - File PageStats `json:"file,omitempty"` - Anon PageStats `json:"anon,omitempty"` - Unevictable PageStats `json:"unevictable,omitempty"` -} - -type PageStats struct { - Total uint64 `json:"total,omitempty"` - Nodes map[uint8]uint64 `json:"nodes,omitempty"` -} - -type PidsStats struct { - // number of pids in the cgroup - Current uint64 `json:"current,omitempty"` - // active pids hard limit - Limit uint64 `json:"limit,omitempty"` -} - -type BlkioStatEntry struct { - Major uint64 `json:"major,omitempty"` - Minor uint64 `json:"minor,omitempty"` - Op string `json:"op,omitempty"` - Value uint64 `json:"value,omitempty"` -} - -type BlkioStats struct { - // number of bytes tranferred to and from the block device - IoServiceBytesRecursive []BlkioStatEntry `json:"io_service_bytes_recursive,omitempty"` - IoServicedRecursive []BlkioStatEntry `json:"io_serviced_recursive,omitempty"` - IoQueuedRecursive []BlkioStatEntry `json:"io_queue_recursive,omitempty"` - IoServiceTimeRecursive []BlkioStatEntry `json:"io_service_time_recursive,omitempty"` - IoWaitTimeRecursive []BlkioStatEntry `json:"io_wait_time_recursive,omitempty"` - IoMergedRecursive []BlkioStatEntry `json:"io_merged_recursive,omitempty"` - IoTimeRecursive []BlkioStatEntry `json:"io_time_recursive,omitempty"` - SectorsRecursive []BlkioStatEntry `json:"sectors_recursive,omitempty"` -} - -type HugetlbStats struct { - // current res_counter usage for hugetlb - Usage uint64 `json:"usage,omitempty"` - // maximum usage ever recorded. - MaxUsage uint64 `json:"max_usage,omitempty"` - // number of times hugetlb usage allocation failure. - Failcnt uint64 `json:"failcnt"` -} - -type Stats struct { - CpuStats CpuStats `json:"cpu_stats,omitempty"` - CPUSetStats CPUSetStats `json:"cpuset_stats,omitempty"` - MemoryStats MemoryStats `json:"memory_stats,omitempty"` - PidsStats PidsStats `json:"pids_stats,omitempty"` - BlkioStats BlkioStats `json:"blkio_stats,omitempty"` - // the map is in the format "size of hugepage: stats of the hugepage" - HugetlbStats map[string]HugetlbStats `json:"hugetlb_stats,omitempty"` -} - -func NewStats() *Stats { - memoryStats := MemoryStats{Stats: make(map[string]uint64)} - hugetlbStats := make(map[string]HugetlbStats) - return &Stats{MemoryStats: memoryStats, HugetlbStats: hugetlbStats} -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go deleted file mode 100644 index 3506c82746..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/common.go +++ /dev/null @@ -1,516 +0,0 @@ -package systemd - -import ( - "bufio" - "context" - "fmt" - "math" - "os" - "regexp" - "strconv" - "strings" - "sync" - "time" - - systemdDbus "github.com/coreos/go-systemd/v22/dbus" - dbus "github.com/godbus/dbus/v5" - cgroupdevices "github.com/opencontainers/runc/libcontainer/cgroups/devices" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/devices" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" -) - -const ( - // Default kernel value for cpu quota period is 100000 us (100 ms), same for v1 and v2. - // v1: https://www.kernel.org/doc/html/latest/scheduler/sched-bwc.html and - // v2: https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html - defCPUQuotaPeriod = uint64(100000) -) - -var ( - versionOnce sync.Once - version int - - isRunningSystemdOnce sync.Once - isRunningSystemd bool -) - -// NOTE: This function comes from package github.com/coreos/go-systemd/util -// It was borrowed here to avoid a dependency on cgo. -// -// IsRunningSystemd checks whether the host was booted with systemd as its init -// system. This functions similarly to systemd's `sd_booted(3)`: internally, it -// checks whether /run/systemd/system/ exists and is a directory. -// http://www.freedesktop.org/software/systemd/man/sd_booted.html -func IsRunningSystemd() bool { - isRunningSystemdOnce.Do(func() { - fi, err := os.Lstat("/run/systemd/system") - isRunningSystemd = err == nil && fi.IsDir() - }) - return isRunningSystemd -} - -// systemd represents slice hierarchy using `-`, so we need to follow suit when -// generating the path of slice. Essentially, test-a-b.slice becomes -// /test.slice/test-a.slice/test-a-b.slice. -func ExpandSlice(slice string) (string, error) { - suffix := ".slice" - // Name has to end with ".slice", but can't be just ".slice". - if len(slice) < len(suffix) || !strings.HasSuffix(slice, suffix) { - return "", fmt.Errorf("invalid slice name: %s", slice) - } - - // Path-separators are not allowed. - if strings.Contains(slice, "/") { - return "", fmt.Errorf("invalid slice name: %s", slice) - } - - var path, prefix string - sliceName := strings.TrimSuffix(slice, suffix) - // if input was -.slice, we should just return root now - if sliceName == "-" { - return "/", nil - } - for _, component := range strings.Split(sliceName, "-") { - // test--a.slice isn't permitted, nor is -test.slice. - if component == "" { - return "", fmt.Errorf("invalid slice name: %s", slice) - } - - // Append the component to the path and to the prefix. - path += "/" + prefix + component + suffix - prefix += component + "-" - } - return path, nil -} - -func groupPrefix(ruleType devices.Type) (string, error) { - switch ruleType { - case devices.BlockDevice: - return "block-", nil - case devices.CharDevice: - return "char-", nil - default: - return "", errors.Errorf("device type %v has no group prefix", ruleType) - } -} - -// findDeviceGroup tries to find the device group name (as listed in -// /proc/devices) with the type prefixed as required for DeviceAllow, for a -// given (type, major) combination. If more than one device group exists, an -// arbitrary one is chosen. -func findDeviceGroup(ruleType devices.Type, ruleMajor int64) (string, error) { - fh, err := os.Open("/proc/devices") - if err != nil { - return "", err - } - defer fh.Close() - - prefix, err := groupPrefix(ruleType) - if err != nil { - return "", err - } - - scanner := bufio.NewScanner(fh) - var currentType devices.Type - for scanner.Scan() { - // We need to strip spaces because the first number is column-aligned. - line := strings.TrimSpace(scanner.Text()) - - // Handle the "header" lines. - switch line { - case "Block devices:": - currentType = devices.BlockDevice - continue - case "Character devices:": - currentType = devices.CharDevice - continue - case "": - continue - } - - // Skip lines unrelated to our type. - if currentType != ruleType { - continue - } - - // Parse out the (major, name). - var ( - currMajor int64 - currName string - ) - if n, err := fmt.Sscanf(line, "%d %s", &currMajor, &currName); err != nil || n != 2 { - if err == nil { - err = errors.Errorf("wrong number of fields") - } - return "", errors.Wrapf(err, "scan /proc/devices line %q", line) - } - - if currMajor == ruleMajor { - return prefix + currName, nil - } - } - if err := scanner.Err(); err != nil { - return "", errors.Wrap(err, "reading /proc/devices") - } - // Couldn't find the device group. - return "", nil -} - -// DeviceAllow is the dbus type "a(ss)" which means we need a struct -// to represent it in Go. -type deviceAllowEntry struct { - Path string - Perms string -} - -func allowAllDevices() []systemdDbus.Property { - // Setting mode to auto and removing all DeviceAllow rules - // results in allowing access to all devices. - return []systemdDbus.Property{ - newProp("DevicePolicy", "auto"), - newProp("DeviceAllow", []deviceAllowEntry{}), - } -} - -// generateDeviceProperties takes the configured device rules and generates a -// corresponding set of systemd properties to configure the devices correctly. -func generateDeviceProperties(r *configs.Resources) ([]systemdDbus.Property, error) { - if r.SkipDevices { - return nil, nil - } - - properties := []systemdDbus.Property{ - // Always run in the strictest white-list mode. - newProp("DevicePolicy", "strict"), - // Empty the DeviceAllow array before filling it. - newProp("DeviceAllow", []deviceAllowEntry{}), - } - - // Figure out the set of rules. - configEmu := &cgroupdevices.Emulator{} - for _, rule := range r.Devices { - if err := configEmu.Apply(*rule); err != nil { - return nil, errors.Wrap(err, "apply rule for systemd") - } - } - // systemd doesn't support blacklists. So we log a warning, and tell - // systemd to act as a deny-all whitelist. This ruleset will be replaced - // with our normal fallback code. This may result in spurrious errors, but - // the only other option is to error out here. - if configEmu.IsBlacklist() { - // However, if we're dealing with an allow-all rule then we can do it. - if configEmu.IsAllowAll() { - return allowAllDevices(), nil - } - logrus.Warn("systemd doesn't support blacklist device rules -- applying temporary deny-all rule") - return properties, nil - } - - // Now generate the set of rules we actually need to apply. Unlike the - // normal devices cgroup, in "strict" mode systemd defaults to a deny-all - // whitelist which is the default for devices.Emulator. - finalRules, err := configEmu.Rules() - if err != nil { - return nil, errors.Wrap(err, "get simplified rules for systemd") - } - var deviceAllowList []deviceAllowEntry - for _, rule := range finalRules { - if !rule.Allow { - // Should never happen. - return nil, errors.Errorf("[internal error] cannot add deny rule to systemd DeviceAllow list: %v", *rule) - } - switch rule.Type { - case devices.BlockDevice, devices.CharDevice: - default: - // Should never happen. - return nil, errors.Errorf("invalid device type for DeviceAllow: %v", rule.Type) - } - - entry := deviceAllowEntry{ - Perms: string(rule.Permissions), - } - - // systemd has a fairly odd (though understandable) syntax here, and - // because of the OCI configuration format we have to do quite a bit of - // trickery to convert things: - // - // * Concrete rules with non-wildcard major/minor numbers have to use - // /dev/{block,char} paths. This is slightly odd because it means - // that we cannot add whitelist rules for devices that don't exist, - // but there's not too much we can do about that. - // - // However, path globbing is not support for path-based rules so we - // need to handle wildcards in some other manner. - // - // * Wildcard-minor rules have to specify a "device group name" (the - // second column in /proc/devices). - // - // * Wildcard (major and minor) rules can just specify a glob with the - // type ("char-*" or "block-*"). - // - // The only type of rule we can't handle is wildcard-major rules, and - // so we'll give a warning in that case (note that the fallback code - // will insert any rules systemd couldn't handle). What amazing fun. - - if rule.Major == devices.Wildcard { - // "_ *:n _" rules aren't supported by systemd. - if rule.Minor != devices.Wildcard { - logrus.Warnf("systemd doesn't support '*:n' device rules -- temporarily ignoring rule: %v", *rule) - continue - } - - // "_ *:* _" rules just wildcard everything. - prefix, err := groupPrefix(rule.Type) - if err != nil { - return nil, err - } - entry.Path = prefix + "*" - } else if rule.Minor == devices.Wildcard { - // "_ n:* _" rules require a device group from /proc/devices. - group, err := findDeviceGroup(rule.Type, rule.Major) - if err != nil { - return nil, errors.Wrapf(err, "find device '%v/%d'", rule.Type, rule.Major) - } - if group == "" { - // Couldn't find a group. - logrus.Warnf("could not find device group for '%v/%d' in /proc/devices -- temporarily ignoring rule: %v", rule.Type, rule.Major, *rule) - continue - } - entry.Path = group - } else { - // "_ n:m _" rules are just a path in /dev/{block,char}/. - switch rule.Type { - case devices.BlockDevice: - entry.Path = fmt.Sprintf("/dev/block/%d:%d", rule.Major, rule.Minor) - case devices.CharDevice: - entry.Path = fmt.Sprintf("/dev/char/%d:%d", rule.Major, rule.Minor) - } - } - deviceAllowList = append(deviceAllowList, entry) - } - - properties = append(properties, newProp("DeviceAllow", deviceAllowList)) - return properties, nil -} - -func newProp(name string, units interface{}) systemdDbus.Property { - return systemdDbus.Property{ - Name: name, - Value: dbus.MakeVariant(units), - } -} - -func getUnitName(c *configs.Cgroup) string { - // by default, we create a scope unless the user explicitly asks for a slice. - if !strings.HasSuffix(c.Name, ".slice") { - return c.ScopePrefix + "-" + c.Name + ".scope" - } - return c.Name -} - -// isDbusError returns true if the error is a specific dbus error. -func isDbusError(err error, name string) bool { - if err != nil { - var derr dbus.Error - if errors.As(err, &derr) { - return strings.Contains(derr.Name, name) - } - } - return false -} - -// isUnitExists returns true if the error is that a systemd unit already exists. -func isUnitExists(err error) bool { - return isDbusError(err, "org.freedesktop.systemd1.UnitExists") -} - -func startUnit(cm *dbusConnManager, unitName string, properties []systemdDbus.Property) error { - statusChan := make(chan string, 1) - err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { - _, err := c.StartTransientUnitContext(context.TODO(), unitName, "replace", properties, statusChan) - return err - }) - if err == nil { - timeout := time.NewTimer(30 * time.Second) - defer timeout.Stop() - - select { - case s := <-statusChan: - close(statusChan) - // Please refer to https://pkg.go.dev/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit - if s != "done" { - resetFailedUnit(cm, unitName) - return errors.Errorf("error creating systemd unit `%s`: got `%s`", unitName, s) - } - case <-timeout.C: - resetFailedUnit(cm, unitName) - return errors.New("Timeout waiting for systemd to create " + unitName) - } - } else if !isUnitExists(err) { - return err - } - - return nil -} - -func stopUnit(cm *dbusConnManager, unitName string) error { - statusChan := make(chan string, 1) - err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { - _, err := c.StopUnitContext(context.TODO(), unitName, "replace", statusChan) - return err - }) - if err == nil { - timeout := time.NewTimer(30 * time.Second) - defer timeout.Stop() - - select { - case s := <-statusChan: - close(statusChan) - // Please refer to https://godoc.org/github.com/coreos/go-systemd/v22/dbus#Conn.StartUnit - if s != "done" { - logrus.Warnf("error removing unit `%s`: got `%s`. Continuing...", unitName, s) - } - case <-timeout.C: - return errors.New("Timed out while waiting for systemd to remove " + unitName) - } - } - return nil -} - -func resetFailedUnit(cm *dbusConnManager, name string) { - err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { - return c.ResetFailedUnitContext(context.TODO(), name) - }) - if err != nil { - logrus.Warnf("unable to reset failed unit: %v", err) - } -} - -func getUnitProperty(cm *dbusConnManager, unitName string, propertyName string) (*systemdDbus.Property, error) { - var prop *systemdDbus.Property - err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) (Err error) { - prop, Err = c.GetUnitPropertyContext(context.TODO(), unitName, propertyName) - return Err - }) - return prop, err -} - -func setUnitProperties(cm *dbusConnManager, name string, properties ...systemdDbus.Property) error { - return cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { - return c.SetUnitPropertiesContext(context.TODO(), name, true, properties...) - }) -} - -func getManagerProperty(cm *dbusConnManager, name string) (string, error) { - str := "" - err := cm.retryOnDisconnect(func(c *systemdDbus.Conn) error { - var err error - str, err = c.GetManagerProperty(name) - return err - }) - if err != nil { - return "", err - } - return strconv.Unquote(str) -} - -func systemdVersion(cm *dbusConnManager) int { - versionOnce.Do(func() { - version = -1 - verStr, err := getManagerProperty(cm, "Version") - if err == nil { - version, err = systemdVersionAtoi(verStr) - } - - if err != nil { - logrus.WithError(err).Error("unable to get systemd version") - } - }) - - return version -} - -func systemdVersionAtoi(verStr string) (int, error) { - // verStr should be of the form: - // "v245.4-1.fc32", "245", "v245-1.fc32", "245-1.fc32" (without quotes). - // The result for all of the above should be 245. - // Thus, we unconditionally remove the "v" prefix - // and then match on the first integer we can grab. - re := regexp.MustCompile(`v?([0-9]+)`) - matches := re.FindStringSubmatch(verStr) - if len(matches) < 2 { - return 0, errors.Errorf("can't parse version %s: incorrect number of matches %v", verStr, matches) - } - ver, err := strconv.Atoi(matches[1]) - return ver, errors.Wrapf(err, "can't parse version %s", verStr) -} - -func addCpuQuota(cm *dbusConnManager, properties *[]systemdDbus.Property, quota int64, period uint64) { - if period != 0 { - // systemd only supports CPUQuotaPeriodUSec since v242 - sdVer := systemdVersion(cm) - if sdVer >= 242 { - *properties = append(*properties, - newProp("CPUQuotaPeriodUSec", period)) - } else { - logrus.Debugf("systemd v%d is too old to support CPUQuotaPeriodSec "+ - " (setting will still be applied to cgroupfs)", sdVer) - } - } - if quota != 0 || period != 0 { - // corresponds to USEC_INFINITY in systemd - cpuQuotaPerSecUSec := uint64(math.MaxUint64) - if quota > 0 { - if period == 0 { - // assume the default - period = defCPUQuotaPeriod - } - // systemd converts CPUQuotaPerSecUSec (microseconds per CPU second) to CPUQuota - // (integer percentage of CPU) internally. This means that if a fractional percent of - // CPU is indicated by Resources.CpuQuota, we need to round up to the nearest - // 10ms (1% of a second) such that child cgroups can set the cpu.cfs_quota_us they expect. - cpuQuotaPerSecUSec = uint64(quota*1000000) / period - if cpuQuotaPerSecUSec%10000 != 0 { - cpuQuotaPerSecUSec = ((cpuQuotaPerSecUSec / 10000) + 1) * 10000 - } - } - *properties = append(*properties, - newProp("CPUQuotaPerSecUSec", cpuQuotaPerSecUSec)) - } -} - -func addCpuset(cm *dbusConnManager, props *[]systemdDbus.Property, cpus, mems string) error { - if cpus == "" && mems == "" { - return nil - } - - // systemd only supports AllowedCPUs/AllowedMemoryNodes since v244 - sdVer := systemdVersion(cm) - if sdVer < 244 { - logrus.Debugf("systemd v%d is too old to support AllowedCPUs/AllowedMemoryNodes"+ - " (settings will still be applied to cgroupfs)", sdVer) - return nil - } - - if cpus != "" { - bits, err := RangeToBits(cpus) - if err != nil { - return fmt.Errorf("resources.CPU.Cpus=%q conversion error: %w", - cpus, err) - } - *props = append(*props, - newProp("AllowedCPUs", bits)) - } - if mems != "" { - bits, err := RangeToBits(mems) - if err != nil { - return fmt.Errorf("resources.CPU.Mems=%q conversion error: %w", - mems, err) - } - *props = append(*props, - newProp("AllowedMemoryNodes", bits)) - } - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go deleted file mode 100644 index 642a04e7dd..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/cpuset.go +++ /dev/null @@ -1,67 +0,0 @@ -package systemd - -import ( - "encoding/binary" - "strconv" - "strings" - - "github.com/bits-and-blooms/bitset" - "github.com/pkg/errors" -) - -// RangeToBits converts a text representation of a CPU mask (as written to -// or read from cgroups' cpuset.* files, e.g. "1,3-5") to a slice of bytes -// with the corresponding bits set (as consumed by systemd over dbus as -// AllowedCPUs/AllowedMemoryNodes unit property value). -func RangeToBits(str string) ([]byte, error) { - bits := &bitset.BitSet{} - - for _, r := range strings.Split(str, ",") { - // allow extra spaces around - r = strings.TrimSpace(r) - // allow empty elements (extra commas) - if r == "" { - continue - } - ranges := strings.SplitN(r, "-", 2) - if len(ranges) > 1 { - start, err := strconv.ParseUint(ranges[0], 10, 32) - if err != nil { - return nil, err - } - end, err := strconv.ParseUint(ranges[1], 10, 32) - if err != nil { - return nil, err - } - if start > end { - return nil, errors.New("invalid range: " + r) - } - for i := uint(start); i <= uint(end); i++ { - bits.Set(i) - } - } else { - val, err := strconv.ParseUint(ranges[0], 10, 32) - if err != nil { - return nil, err - } - bits.Set(uint(val)) - } - } - - val := bits.Bytes() - if len(val) == 0 { - // do not allow empty values - return nil, errors.New("empty value") - } - ret := make([]byte, len(val)*8) - for i := range val { - // bitset uses BigEndian internally - binary.BigEndian.PutUint64(ret[i*8:], val[len(val)-1-i]) - } - // remove upper all-zero bytes - for ret[0] == 0 { - ret = ret[1:] - } - - return ret, nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/dbus.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/dbus.go deleted file mode 100644 index a70a9df43e..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/dbus.go +++ /dev/null @@ -1,98 +0,0 @@ -// +build linux - -package systemd - -import ( - "context" - "sync" - - systemdDbus "github.com/coreos/go-systemd/v22/dbus" - dbus "github.com/godbus/dbus/v5" -) - -var ( - dbusC *systemdDbus.Conn - dbusMu sync.RWMutex - dbusInited bool - dbusRootless bool -) - -type dbusConnManager struct{} - -// newDbusConnManager initializes systemd dbus connection manager. -func newDbusConnManager(rootless bool) *dbusConnManager { - dbusMu.Lock() - defer dbusMu.Unlock() - if dbusInited && rootless != dbusRootless { - panic("can't have both root and rootless dbus") - } - dbusInited = true - dbusRootless = rootless - return &dbusConnManager{} -} - -// getConnection lazily initializes and returns systemd dbus connection. -func (d *dbusConnManager) getConnection() (*systemdDbus.Conn, error) { - // In the case where dbusC != nil - // Use the read lock the first time to ensure - // that Conn can be acquired at the same time. - dbusMu.RLock() - if conn := dbusC; conn != nil { - dbusMu.RUnlock() - return conn, nil - } - dbusMu.RUnlock() - - // In the case where dbusC == nil - // Use write lock to ensure that only one - // will be created - dbusMu.Lock() - defer dbusMu.Unlock() - if conn := dbusC; conn != nil { - return conn, nil - } - - conn, err := d.newConnection() - if err != nil { - return nil, err - } - dbusC = conn - return conn, nil -} - -func (d *dbusConnManager) newConnection() (*systemdDbus.Conn, error) { - if dbusRootless { - return newUserSystemdDbus() - } - return systemdDbus.NewWithContext(context.TODO()) -} - -// resetConnection resets the connection to its initial state -// (so it can be reconnected if necessary). -func (d *dbusConnManager) resetConnection(conn *systemdDbus.Conn) { - dbusMu.Lock() - defer dbusMu.Unlock() - if dbusC != nil && dbusC == conn { - dbusC.Close() - dbusC = nil - } -} - -var errDbusConnClosed = dbus.ErrClosed.Error() - -// retryOnDisconnect calls op, and if the error it returns is about closed dbus -// connection, the connection is re-established and the op is retried. This helps -// with the situation when dbus is restarted and we have a stale connection. -func (d *dbusConnManager) retryOnDisconnect(op func(*systemdDbus.Conn) error) error { - for { - conn, err := d.getConnection() - if err != nil { - return err - } - err = op(conn) - if !isDbusError(err, errDbusConnClosed) { - return err - } - d.resetConnection(conn) - } -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/unsupported.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/unsupported.go deleted file mode 100644 index cb4d00c88b..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/unsupported.go +++ /dev/null @@ -1,71 +0,0 @@ -// +build !linux - -package systemd - -import ( - "errors" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/configs" -) - -type Manager struct { - Cgroups *configs.Cgroup - Paths map[string]string -} - -func IsRunningSystemd() bool { - return false -} - -func NewSystemdCgroupsManager() (func(config *configs.Cgroup, paths map[string]string) cgroups.Manager, error) { - return nil, errors.New("Systemd not supported") -} - -func (m *Manager) Apply(pid int) error { - return errors.New("Systemd not supported") -} - -func (m *Manager) GetPids() ([]int, error) { - return nil, errors.New("Systemd not supported") -} - -func (m *Manager) GetAllPids() ([]int, error) { - return nil, errors.New("Systemd not supported") -} - -func (m *Manager) Destroy() error { - return errors.New("Systemd not supported") -} - -func (m *Manager) GetPaths() map[string]string { - return nil -} - -func (m *Manager) Path(_ string) string { - return "" -} - -func (m *Manager) GetStats() (*cgroups.Stats, error) { - return nil, errors.New("Systemd not supported") -} - -func (m *Manager) Set(container *configs.Config) error { - return errors.New("Systemd not supported") -} - -func (m *Manager) Freeze(state configs.FreezerState) error { - return errors.New("Systemd not supported") -} - -func Freeze(c *configs.Cgroup, state configs.FreezerState) error { - return errors.New("Systemd not supported") -} - -func (m *Manager) GetCgroups() (*configs.Cgroup, error) { - return nil, errors.New("Systemd not supported") -} - -func (m *Manager) Exists() bool { - return false -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go deleted file mode 100644 index ddeaf66426..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/user.go +++ /dev/null @@ -1,106 +0,0 @@ -// +build linux - -package systemd - -import ( - "bufio" - "bytes" - "os" - "os/exec" - "path/filepath" - "strconv" - "strings" - - systemdDbus "github.com/coreos/go-systemd/v22/dbus" - dbus "github.com/godbus/dbus/v5" - "github.com/opencontainers/runc/libcontainer/userns" - "github.com/pkg/errors" -) - -// newUserSystemdDbus creates a connection for systemd user-instance. -func newUserSystemdDbus() (*systemdDbus.Conn, error) { - addr, err := DetectUserDbusSessionBusAddress() - if err != nil { - return nil, err - } - uid, err := DetectUID() - if err != nil { - return nil, err - } - - return systemdDbus.NewConnection(func() (*dbus.Conn, error) { - conn, err := dbus.Dial(addr) - if err != nil { - return nil, errors.Wrapf(err, "error while dialing %q", addr) - } - methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(uid))} - err = conn.Auth(methods) - if err != nil { - conn.Close() - return nil, errors.Wrapf(err, "error while authenticating connection, address=%q, UID=%d", addr, uid) - } - if err = conn.Hello(); err != nil { - conn.Close() - return nil, errors.Wrapf(err, "error while sending Hello message, address=%q, UID=%d", addr, uid) - } - return conn, nil - }) -} - -// DetectUID detects UID from the OwnerUID field of `busctl --user status` -// if running in userNS. The value corresponds to sd_bus_creds_get_owner_uid(3) . -// -// Otherwise returns os.Getuid() . -func DetectUID() (int, error) { - if !userns.RunningInUserNS() { - return os.Getuid(), nil - } - b, err := exec.Command("busctl", "--user", "--no-pager", "status").CombinedOutput() - if err != nil { - return -1, errors.Wrapf(err, "could not execute `busctl --user --no-pager status`: %q", string(b)) - } - scanner := bufio.NewScanner(bytes.NewReader(b)) - for scanner.Scan() { - s := strings.TrimSpace(scanner.Text()) - if strings.HasPrefix(s, "OwnerUID=") { - uidStr := strings.TrimPrefix(s, "OwnerUID=") - i, err := strconv.Atoi(uidStr) - if err != nil { - return -1, errors.Wrapf(err, "could not detect the OwnerUID: %s", s) - } - return i, nil - } - } - if err := scanner.Err(); err != nil { - return -1, err - } - return -1, errors.New("could not detect the OwnerUID") -} - -// DetectUserDbusSessionBusAddress returns $DBUS_SESSION_BUS_ADDRESS if set. -// Otherwise returns "unix:path=$XDG_RUNTIME_DIR/bus" if $XDG_RUNTIME_DIR/bus exists. -// Otherwise parses the value from `systemctl --user show-environment` . -func DetectUserDbusSessionBusAddress() (string, error) { - if env := os.Getenv("DBUS_SESSION_BUS_ADDRESS"); env != "" { - return env, nil - } - if xdr := os.Getenv("XDG_RUNTIME_DIR"); xdr != "" { - busPath := filepath.Join(xdr, "bus") - if _, err := os.Stat(busPath); err == nil { - busAddress := "unix:path=" + busPath - return busAddress, nil - } - } - b, err := exec.Command("systemctl", "--user", "--no-pager", "show-environment").CombinedOutput() - if err != nil { - return "", errors.Wrapf(err, "could not execute `systemctl --user --no-pager show-environment`, output=%q", string(b)) - } - scanner := bufio.NewScanner(bytes.NewReader(b)) - for scanner.Scan() { - s := strings.TrimSpace(scanner.Text()) - if strings.HasPrefix(s, "DBUS_SESSION_BUS_ADDRESS=") { - return strings.TrimPrefix(s, "DBUS_SESSION_BUS_ADDRESS="), nil - } - } - return "", errors.New("could not detect DBUS_SESSION_BUS_ADDRESS from `systemctl --user --no-pager show-environment`. Make sure you have installed the dbus-user-session or dbus-daemon package and then run: `systemctl --user start dbus`") -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go deleted file mode 100644 index 1a8e1e3c6c..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v1.go +++ /dev/null @@ -1,465 +0,0 @@ -// +build linux - -package systemd - -import ( - "errors" - "os" - "path/filepath" - "strings" - "sync" - - systemdDbus "github.com/coreos/go-systemd/v22/dbus" - "github.com/godbus/dbus/v5" - "github.com/sirupsen/logrus" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fs" - "github.com/opencontainers/runc/libcontainer/configs" -) - -type legacyManager struct { - mu sync.Mutex - cgroups *configs.Cgroup - paths map[string]string - dbus *dbusConnManager -} - -func NewLegacyManager(cg *configs.Cgroup, paths map[string]string) cgroups.Manager { - return &legacyManager{ - cgroups: cg, - paths: paths, - dbus: newDbusConnManager(false), - } -} - -type subsystem interface { - // Name returns the name of the subsystem. - Name() string - // Returns the stats, as 'stats', corresponding to the cgroup under 'path'. - GetStats(path string, stats *cgroups.Stats) error - // Set sets cgroup resource limits. - Set(path string, r *configs.Resources) error -} - -var errSubsystemDoesNotExist = errors.New("cgroup: subsystem does not exist") - -var legacySubsystems = []subsystem{ - &fs.CpusetGroup{}, - &fs.DevicesGroup{}, - &fs.MemoryGroup{}, - &fs.CpuGroup{}, - &fs.CpuacctGroup{}, - &fs.PidsGroup{}, - &fs.BlkioGroup{}, - &fs.HugetlbGroup{}, - &fs.PerfEventGroup{}, - &fs.FreezerGroup{}, - &fs.NetPrioGroup{}, - &fs.NetClsGroup{}, - &fs.NameGroup{GroupName: "name=systemd"}, -} - -func genV1ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) { - var properties []systemdDbus.Property - - deviceProperties, err := generateDeviceProperties(r) - if err != nil { - return nil, err - } - properties = append(properties, deviceProperties...) - - if r.Memory != 0 { - properties = append(properties, - newProp("MemoryLimit", uint64(r.Memory))) - } - - if r.CpuShares != 0 { - properties = append(properties, - newProp("CPUShares", r.CpuShares)) - } - - addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod) - - if r.BlkioWeight != 0 { - properties = append(properties, - newProp("BlockIOWeight", uint64(r.BlkioWeight))) - } - - if r.PidsLimit > 0 || r.PidsLimit == -1 { - properties = append(properties, - newProp("TasksMax", uint64(r.PidsLimit))) - } - - err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems) - if err != nil { - return nil, err - } - - return properties, nil -} - -func (m *legacyManager) Apply(pid int) error { - var ( - c = m.cgroups - unitName = getUnitName(c) - slice = "system.slice" - properties []systemdDbus.Property - ) - - if c.Resources.Unified != nil { - return cgroups.ErrV1NoUnified - } - - m.mu.Lock() - defer m.mu.Unlock() - if c.Paths != nil { - paths := make(map[string]string) - cgMap, err := cgroups.ParseCgroupFile("/proc/self/cgroup") - if err != nil { - return err - } - // XXX(kolyshkin@): why this check is needed? - for name, path := range c.Paths { - if _, ok := cgMap[name]; ok { - paths[name] = path - } - } - m.paths = paths - return cgroups.EnterPid(m.paths, pid) - } - - if c.Parent != "" { - slice = c.Parent - } - - properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name)) - - // if we create a slice, the parent is defined via a Wants= - if strings.HasSuffix(unitName, ".slice") { - properties = append(properties, systemdDbus.PropWants(slice)) - } else { - // otherwise, we use Slice= - properties = append(properties, systemdDbus.PropSlice(slice)) - } - - // only add pid if its valid, -1 is used w/ general slice creation. - if pid != -1 { - properties = append(properties, newProp("PIDs", []uint32{uint32(pid)})) - } - - // Check if we can delegate. This is only supported on systemd versions 218 and above. - if !strings.HasSuffix(unitName, ".slice") { - // Assume scopes always support delegation. - properties = append(properties, newProp("Delegate", true)) - } - - // Always enable accounting, this gets us the same behaviour as the fs implementation, - // plus the kernel has some problems with joining the memory cgroup at a later time. - properties = append(properties, - newProp("MemoryAccounting", true), - newProp("CPUAccounting", true), - newProp("BlockIOAccounting", true), - newProp("TasksAccounting", true), - ) - - // Assume DefaultDependencies= will always work (the check for it was previously broken.) - properties = append(properties, - newProp("DefaultDependencies", false)) - - properties = append(properties, c.SystemdProps...) - - if err := startUnit(m.dbus, unitName, properties); err != nil { - return err - } - - paths := make(map[string]string) - for _, s := range legacySubsystems { - subsystemPath, err := getSubsystemPath(m.cgroups, s.Name()) - if err != nil { - // Even if it's `not found` error, we'll return err - // because devices cgroup is hard requirement for - // container security. - if s.Name() == "devices" { - return err - } - // Don't fail if a cgroup hierarchy was not found, just skip this subsystem - if cgroups.IsNotFound(err) { - continue - } - return err - } - paths[s.Name()] = subsystemPath - } - m.paths = paths - - if err := m.joinCgroups(pid); err != nil { - return err - } - - return nil -} - -func (m *legacyManager) Destroy() error { - if m.cgroups.Paths != nil { - return nil - } - m.mu.Lock() - defer m.mu.Unlock() - - stopErr := stopUnit(m.dbus, getUnitName(m.cgroups)) - - // Both on success and on error, cleanup all the cgroups - // we are aware of, as some of them were created directly - // by Apply() and are not managed by systemd. - if err := cgroups.RemovePaths(m.paths); err != nil && stopErr == nil { - return err - } - - return stopErr -} - -func (m *legacyManager) Path(subsys string) string { - m.mu.Lock() - defer m.mu.Unlock() - return m.paths[subsys] -} - -func (m *legacyManager) joinCgroups(pid int) error { - for _, sys := range legacySubsystems { - name := sys.Name() - switch name { - case "name=systemd": - // let systemd handle this - case "cpuset": - if path, ok := m.paths[name]; ok { - s := &fs.CpusetGroup{} - if err := s.ApplyDir(path, m.cgroups.Resources, pid); err != nil { - return err - } - } - default: - if path, ok := m.paths[name]; ok { - if err := os.MkdirAll(path, 0o755); err != nil { - return err - } - if err := cgroups.WriteCgroupProc(path, pid); err != nil { - return err - } - } - } - } - - return nil -} - -func getSubsystemPath(c *configs.Cgroup, subsystem string) (string, error) { - mountpoint, err := cgroups.FindCgroupMountpoint("", subsystem) - if err != nil { - return "", err - } - - initPath, err := cgroups.GetInitCgroup(subsystem) - if err != nil { - return "", err - } - // if pid 1 is systemd 226 or later, it will be in init.scope, not the root - initPath = strings.TrimSuffix(filepath.Clean(initPath), "init.scope") - - slice := "system.slice" - if c.Parent != "" { - slice = c.Parent - } - - slice, err = ExpandSlice(slice) - if err != nil { - return "", err - } - - return filepath.Join(mountpoint, initPath, slice, getUnitName(c)), nil -} - -func (m *legacyManager) Freeze(state configs.FreezerState) error { - err := m.doFreeze(state) - if err == nil { - m.cgroups.Resources.Freezer = state - } - return err -} - -// doFreeze is the same as Freeze but without -// changing the m.cgroups.Resources.Frozen field. -func (m *legacyManager) doFreeze(state configs.FreezerState) error { - path, ok := m.paths["freezer"] - if !ok { - return errSubsystemDoesNotExist - } - freezer := &fs.FreezerGroup{} - resources := &configs.Resources{Freezer: state} - return freezer.Set(path, resources) -} - -func (m *legacyManager) GetPids() ([]int, error) { - path, ok := m.paths["devices"] - if !ok { - return nil, errSubsystemDoesNotExist - } - return cgroups.GetPids(path) -} - -func (m *legacyManager) GetAllPids() ([]int, error) { - path, ok := m.paths["devices"] - if !ok { - return nil, errSubsystemDoesNotExist - } - return cgroups.GetAllPids(path) -} - -func (m *legacyManager) GetStats() (*cgroups.Stats, error) { - m.mu.Lock() - defer m.mu.Unlock() - stats := cgroups.NewStats() - for _, sys := range legacySubsystems { - path := m.paths[sys.Name()] - if path == "" { - continue - } - if err := sys.GetStats(path, stats); err != nil { - return nil, err - } - } - - return stats, nil -} - -// freezeBeforeSet answers whether there is a need to freeze the cgroup before -// applying its systemd unit properties, and thaw after, while avoiding -// unnecessary freezer state changes. -// -// The reason why we have to freeze is that systemd's application of device -// rules is done disruptively, resulting in spurious errors to common devices -// (unlike our fs driver, they will happily write deny-all rules to running -// containers). So we have to freeze the container to avoid the container get -// an occasional "permission denied" error. -func (m *legacyManager) freezeBeforeSet(unitName string, r *configs.Resources) (needsFreeze, needsThaw bool, err error) { - // Special case for SkipDevices, as used by Kubernetes to create pod - // cgroups with allow-all device policy). - if r.SkipDevices { - // No need to freeze if SkipDevices is set, and either - // (1) systemd unit does not (yet) exist, or - // (2) it has DevicePolicy=auto and empty DeviceAllow list. - // - // Interestingly, (1) and (2) are the same here because - // a non-existent unit returns default properties, - // and settings in (2) are the defaults. - // - // Do not return errors from getUnitProperty, as they alone - // should not prevent Set from working. - devPolicy, e := getUnitProperty(m.dbus, unitName, "DevicePolicy") - if e == nil && devPolicy.Value == dbus.MakeVariant("auto") { - devAllow, e := getUnitProperty(m.dbus, unitName, "DeviceAllow") - if e == nil && devAllow.Value == dbus.MakeVariant([]deviceAllowEntry{}) { - needsFreeze = false - needsThaw = false - return - } - } - } - - needsFreeze = true - needsThaw = true - - // Check the current freezer state. - freezerState, err := m.GetFreezerState() - if err != nil { - return - } - if freezerState == configs.Frozen { - // Already frozen, and should stay frozen. - needsFreeze = false - needsThaw = false - } - - if r.Freezer == configs.Frozen { - // Will be frozen anyway -- no need to thaw. - needsThaw = false - } - return -} - -func (m *legacyManager) Set(r *configs.Resources) error { - // If Paths are set, then we are just joining cgroups paths - // and there is no need to set any values. - if m.cgroups.Paths != nil { - return nil - } - if r.Unified != nil { - return cgroups.ErrV1NoUnified - } - properties, err := genV1ResourcesProperties(r, m.dbus) - if err != nil { - return err - } - - unitName := getUnitName(m.cgroups) - needsFreeze, needsThaw, err := m.freezeBeforeSet(unitName, r) - if err != nil { - return err - } - - if needsFreeze { - if err := m.doFreeze(configs.Frozen); err != nil { - // If freezer cgroup isn't supported, we just warn about it. - logrus.Infof("freeze container before SetUnitProperties failed: %v", err) - } - } - setErr := setUnitProperties(m.dbus, unitName, properties...) - if needsThaw { - if err := m.doFreeze(configs.Thawed); err != nil { - logrus.Infof("thaw container after SetUnitProperties failed: %v", err) - } - } - if setErr != nil { - return setErr - } - - for _, sys := range legacySubsystems { - // Get the subsystem path, but don't error out for not found cgroups. - path, ok := m.paths[sys.Name()] - if !ok { - continue - } - if err := sys.Set(path, r); err != nil { - return err - } - } - - return nil -} - -func (m *legacyManager) GetPaths() map[string]string { - m.mu.Lock() - defer m.mu.Unlock() - return m.paths -} - -func (m *legacyManager) GetCgroups() (*configs.Cgroup, error) { - return m.cgroups, nil -} - -func (m *legacyManager) GetFreezerState() (configs.FreezerState, error) { - path, ok := m.paths["freezer"] - if !ok { - return configs.Undefined, nil - } - freezer := &fs.FreezerGroup{} - return freezer.GetState(path) -} - -func (m *legacyManager) Exists() bool { - return cgroups.PathExists(m.Path("devices")) -} - -func (m *legacyManager) OOMKillCount() (uint64, error) { - return fs.OOMKillCount(m.Path("memory")) -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go deleted file mode 100644 index 8176ce5b2e..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/systemd/v2.go +++ /dev/null @@ -1,460 +0,0 @@ -// +build linux - -package systemd - -import ( - "fmt" - "math" - "os" - "path/filepath" - "strconv" - "strings" - "sync" - - systemdDbus "github.com/coreos/go-systemd/v22/dbus" - securejoin "github.com/cyphar/filepath-securejoin" - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fs2" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" -) - -type unifiedManager struct { - mu sync.Mutex - cgroups *configs.Cgroup - // path is like "/sys/fs/cgroup/user.slice/user-1001.slice/session-1.scope" - path string - rootless bool - dbus *dbusConnManager -} - -func NewUnifiedManager(config *configs.Cgroup, path string, rootless bool) cgroups.Manager { - return &unifiedManager{ - cgroups: config, - path: path, - rootless: rootless, - dbus: newDbusConnManager(rootless), - } -} - -// unifiedResToSystemdProps tries to convert from Cgroup.Resources.Unified -// key/value map (where key is cgroupfs file name) to systemd unit properties. -// This is on a best-effort basis, so the properties that are not known -// (to this function and/or systemd) are ignored (but logged with "debug" -// log level). -// -// For the list of keys, see https://www.kernel.org/doc/Documentation/cgroup-v2.txt -// -// For the list of systemd unit properties, see systemd.resource-control(5). -func unifiedResToSystemdProps(cm *dbusConnManager, res map[string]string) (props []systemdDbus.Property, _ error) { - var err error - - for k, v := range res { - if strings.Contains(k, "/") { - return nil, fmt.Errorf("unified resource %q must be a file name (no slashes)", k) - } - sk := strings.SplitN(k, ".", 2) - if len(sk) != 2 { - return nil, fmt.Errorf("unified resource %q must be in the form CONTROLLER.PARAMETER", k) - } - // Kernel is quite forgiving to extra whitespace - // around the value, and so should we. - v = strings.TrimSpace(v) - // Please keep cases in alphabetical order. - switch k { - case "cpu.max": - // value: quota [period] - quota := int64(0) // 0 means "unlimited" for addCpuQuota, if period is set - period := defCPUQuotaPeriod - sv := strings.Fields(v) - if len(sv) < 1 || len(sv) > 2 { - return nil, fmt.Errorf("unified resource %q value invalid: %q", k, v) - } - // quota - if sv[0] != "max" { - quota, err = strconv.ParseInt(sv[0], 10, 64) - if err != nil { - return nil, fmt.Errorf("unified resource %q period value conversion error: %w", k, err) - } - } - // period - if len(sv) == 2 { - period, err = strconv.ParseUint(sv[1], 10, 64) - if err != nil { - return nil, fmt.Errorf("unified resource %q quota value conversion error: %w", k, err) - } - } - addCpuQuota(cm, &props, quota, period) - - case "cpu.weight": - num, err := strconv.ParseUint(v, 10, 64) - if err != nil { - return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err) - } - props = append(props, - newProp("CPUWeight", num)) - - case "cpuset.cpus", "cpuset.mems": - bits, err := RangeToBits(v) - if err != nil { - return nil, fmt.Errorf("unified resource %q=%q conversion error: %w", k, v, err) - } - m := map[string]string{ - "cpuset.cpus": "AllowedCPUs", - "cpuset.mems": "AllowedMemoryNodes", - } - // systemd only supports these properties since v244 - sdVer := systemdVersion(cm) - if sdVer >= 244 { - props = append(props, - newProp(m[k], bits)) - } else { - logrus.Debugf("systemd v%d is too old to support %s"+ - " (setting will still be applied to cgroupfs)", - sdVer, m[k]) - } - - case "memory.high", "memory.low", "memory.min", "memory.max", "memory.swap.max": - num := uint64(math.MaxUint64) - if v != "max" { - num, err = strconv.ParseUint(v, 10, 64) - if err != nil { - return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err) - } - } - m := map[string]string{ - "memory.high": "MemoryHigh", - "memory.low": "MemoryLow", - "memory.min": "MemoryMin", - "memory.max": "MemoryMax", - "memory.swap.max": "MemorySwapMax", - } - props = append(props, - newProp(m[k], num)) - - case "pids.max": - num := uint64(math.MaxUint64) - if v != "max" { - var err error - num, err = strconv.ParseUint(v, 10, 64) - if err != nil { - return nil, fmt.Errorf("unified resource %q value conversion error: %w", k, err) - } - } - props = append(props, - newProp("TasksMax", num)) - - case "memory.oom.group": - // Setting this to 1 is roughly equivalent to OOMPolicy=kill - // (as per systemd.service(5) and - // https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html), - // but it's not clear what to do if it is unset or set - // to 0 in runc update, as there are two other possible - // values for OOMPolicy (continue/stop). - fallthrough - - default: - // Ignore the unknown resource here -- will still be - // applied in Set which calls fs2.Set. - logrus.Debugf("don't know how to convert unified resource %q=%q to systemd unit property; skipping (will still be applied to cgroupfs)", k, v) - } - } - - return props, nil -} - -func genV2ResourcesProperties(r *configs.Resources, cm *dbusConnManager) ([]systemdDbus.Property, error) { - var properties []systemdDbus.Property - - // NOTE: This is of questionable correctness because we insert our own - // devices eBPF program later. Two programs with identical rules - // aren't the end of the world, but it is a bit concerning. However - // it's unclear if systemd removes all eBPF programs attached when - // doing SetUnitProperties... - deviceProperties, err := generateDeviceProperties(r) - if err != nil { - return nil, err - } - properties = append(properties, deviceProperties...) - - if r.Memory != 0 { - properties = append(properties, - newProp("MemoryMax", uint64(r.Memory))) - } - if r.MemoryReservation != 0 { - properties = append(properties, - newProp("MemoryLow", uint64(r.MemoryReservation))) - } - - swap, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory) - if err != nil { - return nil, err - } - if swap != 0 { - properties = append(properties, - newProp("MemorySwapMax", uint64(swap))) - } - - if r.CpuWeight != 0 { - properties = append(properties, - newProp("CPUWeight", r.CpuWeight)) - } - - addCpuQuota(cm, &properties, r.CpuQuota, r.CpuPeriod) - - if r.PidsLimit > 0 || r.PidsLimit == -1 { - properties = append(properties, - newProp("TasksMax", uint64(r.PidsLimit))) - } - - err = addCpuset(cm, &properties, r.CpusetCpus, r.CpusetMems) - if err != nil { - return nil, err - } - - // ignore r.KernelMemory - - // convert Resources.Unified map to systemd properties - if r.Unified != nil { - unifiedProps, err := unifiedResToSystemdProps(cm, r.Unified) - if err != nil { - return nil, err - } - properties = append(properties, unifiedProps...) - } - - return properties, nil -} - -func (m *unifiedManager) Apply(pid int) error { - var ( - c = m.cgroups - unitName = getUnitName(c) - properties []systemdDbus.Property - ) - - if c.Paths != nil { - return cgroups.WriteCgroupProc(m.path, pid) - } - - slice := "system.slice" - if m.rootless { - slice = "user.slice" - } - if c.Parent != "" { - slice = c.Parent - } - - properties = append(properties, systemdDbus.PropDescription("libcontainer container "+c.Name)) - - // if we create a slice, the parent is defined via a Wants= - if strings.HasSuffix(unitName, ".slice") { - properties = append(properties, systemdDbus.PropWants(slice)) - } else { - // otherwise, we use Slice= - properties = append(properties, systemdDbus.PropSlice(slice)) - } - - // only add pid if its valid, -1 is used w/ general slice creation. - if pid != -1 { - properties = append(properties, newProp("PIDs", []uint32{uint32(pid)})) - } - - // Check if we can delegate. This is only supported on systemd versions 218 and above. - if !strings.HasSuffix(unitName, ".slice") { - // Assume scopes always support delegation. - properties = append(properties, newProp("Delegate", true)) - } - - // Always enable accounting, this gets us the same behaviour as the fs implementation, - // plus the kernel has some problems with joining the memory cgroup at a later time. - properties = append(properties, - newProp("MemoryAccounting", true), - newProp("CPUAccounting", true), - newProp("IOAccounting", true), - newProp("TasksAccounting", true), - ) - - // Assume DefaultDependencies= will always work (the check for it was previously broken.) - properties = append(properties, - newProp("DefaultDependencies", false)) - - properties = append(properties, c.SystemdProps...) - - if err := startUnit(m.dbus, unitName, properties); err != nil { - return errors.Wrapf(err, "error while starting unit %q with properties %+v", unitName, properties) - } - - if err := m.initPath(); err != nil { - return err - } - if err := fs2.CreateCgroupPath(m.path, m.cgroups); err != nil { - return err - } - return nil -} - -func (m *unifiedManager) Destroy() error { - if m.cgroups.Paths != nil { - return nil - } - m.mu.Lock() - defer m.mu.Unlock() - - unitName := getUnitName(m.cgroups) - if err := stopUnit(m.dbus, unitName); err != nil { - return err - } - - // XXX this is probably not needed, systemd should handle it - err := os.Remove(m.path) - if err != nil && !os.IsNotExist(err) { - return err - } - - return nil -} - -func (m *unifiedManager) Path(_ string) string { - _ = m.initPath() - return m.path -} - -// getSliceFull value is used in initPath. -// The value is incompatible with systemdDbus.PropSlice. -func (m *unifiedManager) getSliceFull() (string, error) { - c := m.cgroups - slice := "system.slice" - if m.rootless { - slice = "user.slice" - } - if c.Parent != "" { - var err error - slice, err = ExpandSlice(c.Parent) - if err != nil { - return "", err - } - } - - if m.rootless { - // managerCG is typically "/user.slice/user-${uid}.slice/user@${uid}.service". - managerCG, err := getManagerProperty(m.dbus, "ControlGroup") - if err != nil { - return "", err - } - slice = filepath.Join(managerCG, slice) - } - - // an example of the final slice in rootless: "/user.slice/user-1001.slice/user@1001.service/user.slice" - // NOTE: systemdDbus.PropSlice requires the "/user.slice/user-1001.slice/user@1001.service/" prefix NOT to be specified. - return slice, nil -} - -func (m *unifiedManager) initPath() error { - if m.path != "" { - return nil - } - - sliceFull, err := m.getSliceFull() - if err != nil { - return err - } - - c := m.cgroups - path := filepath.Join(sliceFull, getUnitName(c)) - path, err = securejoin.SecureJoin(fs2.UnifiedMountpoint, path) - if err != nil { - return err - } - - // an example of the final path in rootless: - // "/sys/fs/cgroup/user.slice/user-1001.slice/user@1001.service/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope" - m.path = path - - return nil -} - -func (m *unifiedManager) fsManager() (cgroups.Manager, error) { - if err := m.initPath(); err != nil { - return nil, err - } - return fs2.NewManager(m.cgroups, m.path, m.rootless) -} - -func (m *unifiedManager) Freeze(state configs.FreezerState) error { - fsMgr, err := m.fsManager() - if err != nil { - return err - } - return fsMgr.Freeze(state) -} - -func (m *unifiedManager) GetPids() ([]int, error) { - if err := m.initPath(); err != nil { - return nil, err - } - return cgroups.GetPids(m.path) -} - -func (m *unifiedManager) GetAllPids() ([]int, error) { - if err := m.initPath(); err != nil { - return nil, err - } - return cgroups.GetAllPids(m.path) -} - -func (m *unifiedManager) GetStats() (*cgroups.Stats, error) { - fsMgr, err := m.fsManager() - if err != nil { - return nil, err - } - return fsMgr.GetStats() -} - -func (m *unifiedManager) Set(r *configs.Resources) error { - properties, err := genV2ResourcesProperties(r, m.dbus) - if err != nil { - return err - } - - if err := setUnitProperties(m.dbus, getUnitName(m.cgroups), properties...); err != nil { - return errors.Wrap(err, "error while setting unit properties") - } - - fsMgr, err := m.fsManager() - if err != nil { - return err - } - return fsMgr.Set(r) -} - -func (m *unifiedManager) GetPaths() map[string]string { - paths := make(map[string]string, 1) - paths[""] = m.path - return paths -} - -func (m *unifiedManager) GetCgroups() (*configs.Cgroup, error) { - return m.cgroups, nil -} - -func (m *unifiedManager) GetFreezerState() (configs.FreezerState, error) { - fsMgr, err := m.fsManager() - if err != nil { - return configs.Undefined, err - } - return fsMgr.GetFreezerState() -} - -func (m *unifiedManager) Exists() bool { - return cgroups.PathExists(m.path) -} - -func (m *unifiedManager) OOMKillCount() (uint64, error) { - fsMgr, err := m.fsManager() - if err != nil { - return 0, err - } - return fsMgr.OOMKillCount() -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go deleted file mode 100644 index 92606525b4..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go +++ /dev/null @@ -1,450 +0,0 @@ -// +build linux - -package cgroups - -import ( - "bufio" - "errors" - "fmt" - "io" - "io/ioutil" - "os" - "path/filepath" - "strconv" - "strings" - "sync" - "time" - - "github.com/opencontainers/runc/libcontainer/userns" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -const ( - CgroupProcesses = "cgroup.procs" - unifiedMountpoint = "/sys/fs/cgroup" -) - -var ( - isUnifiedOnce sync.Once - isUnified bool -) - -// IsCgroup2UnifiedMode returns whether we are running in cgroup v2 unified mode. -func IsCgroup2UnifiedMode() bool { - isUnifiedOnce.Do(func() { - var st unix.Statfs_t - err := unix.Statfs(unifiedMountpoint, &st) - if err != nil { - if os.IsNotExist(err) && userns.RunningInUserNS() { - // ignore the "not found" error if running in userns - logrus.WithError(err).Debugf("%s missing, assuming cgroup v1", unifiedMountpoint) - isUnified = false - return - } - panic(fmt.Sprintf("cannot statfs cgroup root: %s", err)) - } - isUnified = st.Type == unix.CGROUP2_SUPER_MAGIC - }) - return isUnified -} - -type Mount struct { - Mountpoint string - Root string - Subsystems []string -} - -// GetCgroupMounts returns the mounts for the cgroup subsystems. -// all indicates whether to return just the first instance or all the mounts. -// This function should not be used from cgroupv2 code, as in this case -// all the controllers are available under the constant unifiedMountpoint. -func GetCgroupMounts(all bool) ([]Mount, error) { - if IsCgroup2UnifiedMode() { - // TODO: remove cgroupv2 case once all external users are converted - availableControllers, err := GetAllSubsystems() - if err != nil { - return nil, err - } - m := Mount{ - Mountpoint: unifiedMountpoint, - Root: unifiedMountpoint, - Subsystems: availableControllers, - } - return []Mount{m}, nil - } - - return getCgroupMountsV1(all) -} - -// GetAllSubsystems returns all the cgroup subsystems supported by the kernel -func GetAllSubsystems() ([]string, error) { - // /proc/cgroups is meaningless for v2 - // https://github.com/torvalds/linux/blob/v5.3/Documentation/admin-guide/cgroup-v2.rst#deprecated-v1-core-features - if IsCgroup2UnifiedMode() { - // "pseudo" controllers do not appear in /sys/fs/cgroup/cgroup.controllers. - // - devices: implemented in kernel 4.15 - // - freezer: implemented in kernel 5.2 - // We assume these are always available, as it is hard to detect availability. - pseudo := []string{"devices", "freezer"} - data, err := ReadFile("/sys/fs/cgroup", "cgroup.controllers") - if err != nil { - return nil, err - } - subsystems := append(pseudo, strings.Fields(data)...) - return subsystems, nil - } - f, err := os.Open("/proc/cgroups") - if err != nil { - return nil, err - } - defer f.Close() - - subsystems := []string{} - - s := bufio.NewScanner(f) - for s.Scan() { - text := s.Text() - if text[0] != '#' { - parts := strings.Fields(text) - if len(parts) >= 4 && parts[3] != "0" { - subsystems = append(subsystems, parts[0]) - } - } - } - if err := s.Err(); err != nil { - return nil, err - } - return subsystems, nil -} - -func readProcsFile(file string) ([]int, error) { - f, err := os.Open(file) - if err != nil { - return nil, err - } - defer f.Close() - - var ( - s = bufio.NewScanner(f) - out = []int{} - ) - - for s.Scan() { - if t := s.Text(); t != "" { - pid, err := strconv.Atoi(t) - if err != nil { - return nil, err - } - out = append(out, pid) - } - } - return out, s.Err() -} - -// ParseCgroupFile parses the given cgroup file, typically /proc/self/cgroup -// or /proc//cgroup, into a map of subsystems to cgroup paths, e.g. -// "cpu": "/user.slice/user-1000.slice" -// "pids": "/user.slice/user-1000.slice" -// etc. -// -// Note that for cgroup v2 unified hierarchy, there are no per-controller -// cgroup paths, so the resulting map will have a single element where the key -// is empty string ("") and the value is the cgroup path the is in. -func ParseCgroupFile(path string) (map[string]string, error) { - f, err := os.Open(path) - if err != nil { - return nil, err - } - defer f.Close() - - return parseCgroupFromReader(f) -} - -// helper function for ParseCgroupFile to make testing easier -func parseCgroupFromReader(r io.Reader) (map[string]string, error) { - s := bufio.NewScanner(r) - cgroups := make(map[string]string) - - for s.Scan() { - text := s.Text() - // from cgroups(7): - // /proc/[pid]/cgroup - // ... - // For each cgroup hierarchy ... there is one entry - // containing three colon-separated fields of the form: - // hierarchy-ID:subsystem-list:cgroup-path - parts := strings.SplitN(text, ":", 3) - if len(parts) < 3 { - return nil, fmt.Errorf("invalid cgroup entry: must contain at least two colons: %v", text) - } - - for _, subs := range strings.Split(parts[1], ",") { - cgroups[subs] = parts[2] - } - } - if err := s.Err(); err != nil { - return nil, err - } - - return cgroups, nil -} - -func PathExists(path string) bool { - if _, err := os.Stat(path); err != nil { - return false - } - return true -} - -func EnterPid(cgroupPaths map[string]string, pid int) error { - for _, path := range cgroupPaths { - if PathExists(path) { - if err := WriteCgroupProc(path, pid); err != nil { - return err - } - } - } - return nil -} - -func rmdir(path string) error { - err := unix.Rmdir(path) - if err == nil || err == unix.ENOENT { - return nil - } - return &os.PathError{Op: "rmdir", Path: path, Err: err} -} - -// RemovePath aims to remove cgroup path. It does so recursively, -// by removing any subdirectories (sub-cgroups) first. -func RemovePath(path string) error { - // try the fast path first - if err := rmdir(path); err == nil { - return nil - } - - infos, err := ioutil.ReadDir(path) - if err != nil { - if os.IsNotExist(err) { - err = nil - } - return err - } - for _, info := range infos { - if info.IsDir() { - // We should remove subcgroups dir first - if err = RemovePath(filepath.Join(path, info.Name())); err != nil { - break - } - } - } - if err == nil { - err = rmdir(path) - } - return err -} - -// RemovePaths iterates over the provided paths removing them. -// We trying to remove all paths five times with increasing delay between tries. -// If after all there are not removed cgroups - appropriate error will be -// returned. -func RemovePaths(paths map[string]string) (err error) { - const retries = 5 - delay := 10 * time.Millisecond - for i := 0; i < retries; i++ { - if i != 0 { - time.Sleep(delay) - delay *= 2 - } - for s, p := range paths { - if err := RemovePath(p); err != nil { - // do not log intermediate iterations - switch i { - case 0: - logrus.WithError(err).Warnf("Failed to remove cgroup (will retry)") - case retries - 1: - logrus.WithError(err).Error("Failed to remove cgroup") - } - } - _, err := os.Stat(p) - // We need this strange way of checking cgroups existence because - // RemoveAll almost always returns error, even on already removed - // cgroups - if os.IsNotExist(err) { - delete(paths, s) - } - } - if len(paths) == 0 { - //nolint:ineffassign,staticcheck // done to help garbage collecting: opencontainers/runc#2506 - paths = make(map[string]string) - return nil - } - } - return fmt.Errorf("Failed to remove paths: %v", paths) -} - -func GetHugePageSize() ([]string, error) { - dir, err := os.OpenFile("/sys/kernel/mm/hugepages", unix.O_DIRECTORY|unix.O_RDONLY, 0) - if err != nil { - return nil, err - } - files, err := dir.Readdirnames(0) - dir.Close() - if err != nil { - return nil, err - } - - return getHugePageSizeFromFilenames(files) -} - -func getHugePageSizeFromFilenames(fileNames []string) ([]string, error) { - pageSizes := make([]string, 0, len(fileNames)) - - for _, file := range fileNames { - // example: hugepages-1048576kB - val := strings.TrimPrefix(file, "hugepages-") - if len(val) == len(file) { - // unexpected file name: no prefix found - continue - } - // The suffix is always "kB" (as of Linux 5.9) - eLen := len(val) - 2 - val = strings.TrimSuffix(val, "kB") - if len(val) != eLen { - logrus.Warnf("GetHugePageSize: %s: invalid filename suffix (expected \"kB\")", file) - continue - } - size, err := strconv.Atoi(val) - if err != nil { - return nil, err - } - // Model after https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/mm/hugetlb_cgroup.c?id=eff48ddeab782e35e58ccc8853f7386bbae9dec4#n574 - // but in our case the size is in KB already. - if size >= (1 << 20) { - val = strconv.Itoa(size>>20) + "GB" - } else if size >= (1 << 10) { - val = strconv.Itoa(size>>10) + "MB" - } else { - val += "KB" - } - pageSizes = append(pageSizes, val) - } - - return pageSizes, nil -} - -// GetPids returns all pids, that were added to cgroup at path. -func GetPids(dir string) ([]int, error) { - return readProcsFile(filepath.Join(dir, CgroupProcesses)) -} - -// GetAllPids returns all pids, that were added to cgroup at path and to all its -// subcgroups. -func GetAllPids(path string) ([]int, error) { - var pids []int - // collect pids from all sub-cgroups - err := filepath.Walk(path, func(p string, info os.FileInfo, iErr error) error { - if iErr != nil { - return iErr - } - if info.IsDir() || info.Name() != CgroupProcesses { - return nil - } - cPids, err := readProcsFile(p) - if err != nil { - return err - } - pids = append(pids, cPids...) - return nil - }) - return pids, err -} - -// WriteCgroupProc writes the specified pid into the cgroup's cgroup.procs file -func WriteCgroupProc(dir string, pid int) error { - // Normally dir should not be empty, one case is that cgroup subsystem - // is not mounted, we will get empty dir, and we want it fail here. - if dir == "" { - return fmt.Errorf("no such directory for %s", CgroupProcesses) - } - - // Dont attach any pid to the cgroup if -1 is specified as a pid - if pid == -1 { - return nil - } - - file, err := OpenFile(dir, CgroupProcesses, os.O_WRONLY) - if err != nil { - return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err) - } - defer file.Close() - - for i := 0; i < 5; i++ { - _, err = file.WriteString(strconv.Itoa(pid)) - if err == nil { - return nil - } - - // EINVAL might mean that the task being added to cgroup.procs is in state - // TASK_NEW. We should attempt to do so again. - if errors.Is(err, unix.EINVAL) { - time.Sleep(30 * time.Millisecond) - continue - } - - return fmt.Errorf("failed to write %v to %v: %v", pid, CgroupProcesses, err) - } - return err -} - -// Since the OCI spec is designed for cgroup v1, in some cases -// there is need to convert from the cgroup v1 configuration to cgroup v2 -// the formula for cpuShares is y = (1 + ((x - 2) * 9999) / 262142) -// convert from [2-262144] to [1-10000] -// 262144 comes from Linux kernel definition "#define MAX_SHARES (1UL << 18)" -func ConvertCPUSharesToCgroupV2Value(cpuShares uint64) uint64 { - if cpuShares == 0 { - return 0 - } - return (1 + ((cpuShares-2)*9999)/262142) -} - -// ConvertMemorySwapToCgroupV2Value converts MemorySwap value from OCI spec -// for use by cgroup v2 drivers. A conversion is needed since Resources.MemorySwap -// is defined as memory+swap combined, while in cgroup v2 swap is a separate value. -func ConvertMemorySwapToCgroupV2Value(memorySwap, memory int64) (int64, error) { - // for compatibility with cgroup1 controller, set swap to unlimited in - // case the memory is set to unlimited, and swap is not explicitly set, - // treating the request as "set both memory and swap to unlimited". - if memory == -1 && memorySwap == 0 { - return -1, nil - } - if memorySwap == -1 || memorySwap == 0 { - // -1 is "max", 0 is "unset", so treat as is - return memorySwap, nil - } - // sanity checks - if memory == 0 || memory == -1 { - return 0, errors.New("unable to set swap limit without memory limit") - } - if memory < 0 { - return 0, fmt.Errorf("invalid memory value: %d", memory) - } - if memorySwap < memory { - return 0, errors.New("memory+swap limit should be >= memory limit") - } - - return memorySwap - memory, nil -} - -// Since the OCI spec is designed for cgroup v1, in some cases -// there is need to convert from the cgroup v1 configuration to cgroup v2 -// the formula for BlkIOWeight to IOWeight is y = (1 + (x - 10) * 9999 / 990) -// convert linearly from [10-1000] to [1-10000] -func ConvertBlkIOToIOWeightValue(blkIoWeight uint16) uint64 { - if blkIoWeight == 0 { - return 0 - } - return uint64(1 + (uint64(blkIoWeight)-10)*9999/990) -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go deleted file mode 100644 index 95ec9dff02..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/cgroups/v1_utils.go +++ /dev/null @@ -1,283 +0,0 @@ -package cgroups - -import ( - "errors" - "fmt" - "os" - "path/filepath" - "strings" - "sync" - "syscall" - - securejoin "github.com/cyphar/filepath-securejoin" - "github.com/moby/sys/mountinfo" - "golang.org/x/sys/unix" -) - -// Code in this source file are specific to cgroup v1, -// and must not be used from any cgroup v2 code. - -const ( - CgroupNamePrefix = "name=" - defaultPrefix = "/sys/fs/cgroup" -) - -var ( - errUnified = errors.New("not implemented for cgroup v2 unified hierarchy") - ErrV1NoUnified = errors.New("invalid configuration: cannot use unified on cgroup v1") - - readMountinfoOnce sync.Once - readMountinfoErr error - cgroupMountinfo []*mountinfo.Info -) - -type NotFoundError struct { - Subsystem string -} - -func (e *NotFoundError) Error() string { - return fmt.Sprintf("mountpoint for %s not found", e.Subsystem) -} - -func NewNotFoundError(sub string) error { - return &NotFoundError{ - Subsystem: sub, - } -} - -func IsNotFound(err error) bool { - if err == nil { - return false - } - _, ok := err.(*NotFoundError) - return ok -} - -func tryDefaultPath(cgroupPath, subsystem string) string { - if !strings.HasPrefix(defaultPrefix, cgroupPath) { - return "" - } - - // remove possible prefix - subsystem = strings.TrimPrefix(subsystem, CgroupNamePrefix) - - // Make sure we're still under defaultPrefix, and resolve - // a possible symlink (like cpu -> cpu,cpuacct). - path, err := securejoin.SecureJoin(defaultPrefix, subsystem) - if err != nil { - return "" - } - - // (1) path should be a directory. - st, err := os.Lstat(path) - if err != nil || !st.IsDir() { - return "" - } - - // (2) path should be a mount point. - pst, err := os.Lstat(filepath.Dir(path)) - if err != nil { - return "" - } - - if st.Sys().(*syscall.Stat_t).Dev == pst.Sys().(*syscall.Stat_t).Dev { - // parent dir has the same dev -- path is not a mount point - return "" - } - - // (3) path should have 'cgroup' fs type. - fst := unix.Statfs_t{} - err = unix.Statfs(path, &fst) - if err != nil || fst.Type != unix.CGROUP_SUPER_MAGIC { - return "" - } - - return path -} - -// readCgroupMountinfo returns a list of cgroup v1 mounts (i.e. the ones -// with fstype of "cgroup") for the current running process. -// -// The results are cached (to avoid re-reading mountinfo which is relatively -// expensive), so it is assumed that cgroup mounts are not being changed. -func readCgroupMountinfo() ([]*mountinfo.Info, error) { - readMountinfoOnce.Do(func() { - cgroupMountinfo, readMountinfoErr = mountinfo.GetMounts( - mountinfo.FSTypeFilter("cgroup"), - ) - }) - - return cgroupMountinfo, readMountinfoErr -} - -// https://www.kernel.org/doc/Documentation/cgroup-v1/cgroups.txt -func FindCgroupMountpoint(cgroupPath, subsystem string) (string, error) { - if IsCgroup2UnifiedMode() { - return "", errUnified - } - - // Avoid parsing mountinfo by trying the default path first, if possible. - if path := tryDefaultPath(cgroupPath, subsystem); path != "" { - return path, nil - } - - mnt, _, err := FindCgroupMountpointAndRoot(cgroupPath, subsystem) - return mnt, err -} - -func FindCgroupMountpointAndRoot(cgroupPath, subsystem string) (string, string, error) { - if IsCgroup2UnifiedMode() { - return "", "", errUnified - } - - mi, err := readCgroupMountinfo() - if err != nil { - return "", "", err - } - - return findCgroupMountpointAndRootFromMI(mi, cgroupPath, subsystem) -} - -func findCgroupMountpointAndRootFromMI(mounts []*mountinfo.Info, cgroupPath, subsystem string) (string, string, error) { - for _, mi := range mounts { - if strings.HasPrefix(mi.Mountpoint, cgroupPath) { - for _, opt := range strings.Split(mi.VFSOptions, ",") { - if opt == subsystem { - return mi.Mountpoint, mi.Root, nil - } - } - } - } - - return "", "", NewNotFoundError(subsystem) -} - -func (m Mount) GetOwnCgroup(cgroups map[string]string) (string, error) { - if len(m.Subsystems) == 0 { - return "", fmt.Errorf("no subsystem for mount") - } - - return getControllerPath(m.Subsystems[0], cgroups) -} - -func getCgroupMountsHelper(ss map[string]bool, mounts []*mountinfo.Info, all bool) ([]Mount, error) { - res := make([]Mount, 0, len(ss)) - numFound := 0 - for _, mi := range mounts { - m := Mount{ - Mountpoint: mi.Mountpoint, - Root: mi.Root, - } - for _, opt := range strings.Split(mi.VFSOptions, ",") { - seen, known := ss[opt] - if !known || (!all && seen) { - continue - } - ss[opt] = true - opt = strings.TrimPrefix(opt, CgroupNamePrefix) - m.Subsystems = append(m.Subsystems, opt) - numFound++ - } - if len(m.Subsystems) > 0 || all { - res = append(res, m) - } - if !all && numFound >= len(ss) { - break - } - } - return res, nil -} - -func getCgroupMountsV1(all bool) ([]Mount, error) { - mi, err := readCgroupMountinfo() - if err != nil { - return nil, err - } - - allSubsystems, err := ParseCgroupFile("/proc/self/cgroup") - if err != nil { - return nil, err - } - - allMap := make(map[string]bool) - for s := range allSubsystems { - allMap[s] = false - } - - return getCgroupMountsHelper(allMap, mi, all) -} - -// GetOwnCgroup returns the relative path to the cgroup docker is running in. -func GetOwnCgroup(subsystem string) (string, error) { - if IsCgroup2UnifiedMode() { - return "", errUnified - } - cgroups, err := ParseCgroupFile("/proc/self/cgroup") - if err != nil { - return "", err - } - - return getControllerPath(subsystem, cgroups) -} - -func GetOwnCgroupPath(subsystem string) (string, error) { - cgroup, err := GetOwnCgroup(subsystem) - if err != nil { - return "", err - } - - return getCgroupPathHelper(subsystem, cgroup) -} - -func GetInitCgroup(subsystem string) (string, error) { - if IsCgroup2UnifiedMode() { - return "", errUnified - } - cgroups, err := ParseCgroupFile("/proc/1/cgroup") - if err != nil { - return "", err - } - - return getControllerPath(subsystem, cgroups) -} - -func GetInitCgroupPath(subsystem string) (string, error) { - cgroup, err := GetInitCgroup(subsystem) - if err != nil { - return "", err - } - - return getCgroupPathHelper(subsystem, cgroup) -} - -func getCgroupPathHelper(subsystem, cgroup string) (string, error) { - mnt, root, err := FindCgroupMountpointAndRoot("", subsystem) - if err != nil { - return "", err - } - - // This is needed for nested containers, because in /proc/self/cgroup we - // see paths from host, which don't exist in container. - relCgroup, err := filepath.Rel(root, cgroup) - if err != nil { - return "", err - } - - return filepath.Join(mnt, relCgroup), nil -} - -func getControllerPath(subsystem string, cgroups map[string]string) (string, error) { - if IsCgroup2UnifiedMode() { - return "", errUnified - } - - if p, ok := cgroups[subsystem]; ok { - return p, nil - } - - if p, ok := cgroups[CgroupNamePrefix+subsystem]; ok { - return p, nil - } - - return "", NewNotFoundError(subsystem) -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go deleted file mode 100644 index 9a6e5eb32a..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/rootless.go +++ /dev/null @@ -1,93 +0,0 @@ -package validate - -import ( - "errors" - "fmt" - "strings" - - "github.com/opencontainers/runc/libcontainer/configs" -) - -// rootlessEUID makes sure that the config can be applied when runc -// is being executed as a non-root user (euid != 0) in the current user namespace. -func (v *ConfigValidator) rootlessEUID(config *configs.Config) error { - if !config.RootlessEUID { - return nil - } - if err := rootlessEUIDMappings(config); err != nil { - return err - } - if err := rootlessEUIDMount(config); err != nil { - return err - } - - // XXX: We currently can't verify the user config at all, because - // configs.Config doesn't store the user-related configs. So this - // has to be verified by setupUser() in init_linux.go. - - return nil -} - -func hasIDMapping(id int, mappings []configs.IDMap) bool { - for _, m := range mappings { - if id >= m.ContainerID && id < m.ContainerID+m.Size { - return true - } - } - return false -} - -func rootlessEUIDMappings(config *configs.Config) error { - if !config.Namespaces.Contains(configs.NEWUSER) { - return errors.New("rootless container requires user namespaces") - } - - if len(config.UidMappings) == 0 { - return errors.New("rootless containers requires at least one UID mapping") - } - if len(config.GidMappings) == 0 { - return errors.New("rootless containers requires at least one GID mapping") - } - return nil -} - -// mount verifies that the user isn't trying to set up any mounts they don't have -// the rights to do. In addition, it makes sure that no mount has a `uid=` or -// `gid=` option that doesn't resolve to root. -func rootlessEUIDMount(config *configs.Config) error { - // XXX: We could whitelist allowed devices at this point, but I'm not - // convinced that's a good idea. The kernel is the best arbiter of - // access control. - - for _, mount := range config.Mounts { - // Check that the options list doesn't contain any uid= or gid= entries - // that don't resolve to root. - for _, opt := range strings.Split(mount.Data, ",") { - if strings.HasPrefix(opt, "uid=") { - var uid int - n, err := fmt.Sscanf(opt, "uid=%d", &uid) - if n != 1 || err != nil { - // Ignore unknown mount options. - continue - } - if !hasIDMapping(uid, config.UidMappings) { - return errors.New("cannot specify uid= mount options for unmapped uid in rootless containers") - } - } - - if strings.HasPrefix(opt, "gid=") { - var gid int - n, err := fmt.Sscanf(opt, "gid=%d", &gid) - if n != 1 || err != nil { - // Ignore unknown mount options. - continue - } - if !hasIDMapping(gid, config.GidMappings) { - return errors.New("cannot specify gid= mount options for unmapped gid in rootless containers") - } - } - } - } - - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go deleted file mode 100644 index b025460024..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/configs/validate/validator.go +++ /dev/null @@ -1,278 +0,0 @@ -package validate - -import ( - "errors" - "fmt" - "os" - "path/filepath" - "strings" - "sync" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/intelrdt" - selinux "github.com/opencontainers/selinux/go-selinux" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -type Validator interface { - Validate(*configs.Config) error -} - -func New() Validator { - return &ConfigValidator{} -} - -type ConfigValidator struct{} - -type check func(config *configs.Config) error - -func (v *ConfigValidator) Validate(config *configs.Config) error { - checks := []check{ - v.cgroups, - v.rootfs, - v.network, - v.hostname, - v.security, - v.usernamespace, - v.cgroupnamespace, - v.sysctl, - v.intelrdt, - v.rootlessEUID, - } - for _, c := range checks { - if err := c(config); err != nil { - return err - } - } - // Relaxed validation rules for backward compatibility - warns := []check{ - v.mounts, // TODO (runc v1.x.x): make this an error instead of a warning - } - for _, c := range warns { - if err := c(config); err != nil { - logrus.WithError(err).Warnf("invalid configuration") - } - } - return nil -} - -// rootfs validates if the rootfs is an absolute path and is not a symlink -// to the container's root filesystem. -func (v *ConfigValidator) rootfs(config *configs.Config) error { - if _, err := os.Stat(config.Rootfs); err != nil { - if os.IsNotExist(err) { - return fmt.Errorf("rootfs (%s) does not exist", config.Rootfs) - } - return err - } - cleaned, err := filepath.Abs(config.Rootfs) - if err != nil { - return err - } - if cleaned, err = filepath.EvalSymlinks(cleaned); err != nil { - return err - } - if filepath.Clean(config.Rootfs) != cleaned { - return fmt.Errorf("%s is not an absolute path or is a symlink", config.Rootfs) - } - return nil -} - -func (v *ConfigValidator) network(config *configs.Config) error { - if !config.Namespaces.Contains(configs.NEWNET) { - if len(config.Networks) > 0 || len(config.Routes) > 0 { - return errors.New("unable to apply network settings without a private NET namespace") - } - } - return nil -} - -func (v *ConfigValidator) hostname(config *configs.Config) error { - if config.Hostname != "" && !config.Namespaces.Contains(configs.NEWUTS) { - return errors.New("unable to set hostname without a private UTS namespace") - } - return nil -} - -func (v *ConfigValidator) security(config *configs.Config) error { - // restrict sys without mount namespace - if (len(config.MaskPaths) > 0 || len(config.ReadonlyPaths) > 0) && - !config.Namespaces.Contains(configs.NEWNS) { - return errors.New("unable to restrict sys entries without a private MNT namespace") - } - if config.ProcessLabel != "" && !selinux.GetEnabled() { - return errors.New("selinux label is specified in config, but selinux is disabled or not supported") - } - - return nil -} - -func (v *ConfigValidator) usernamespace(config *configs.Config) error { - if config.Namespaces.Contains(configs.NEWUSER) { - if _, err := os.Stat("/proc/self/ns/user"); os.IsNotExist(err) { - return errors.New("USER namespaces aren't enabled in the kernel") - } - } else { - if config.UidMappings != nil || config.GidMappings != nil { - return errors.New("User namespace mappings specified, but USER namespace isn't enabled in the config") - } - } - return nil -} - -func (v *ConfigValidator) cgroupnamespace(config *configs.Config) error { - if config.Namespaces.Contains(configs.NEWCGROUP) { - if _, err := os.Stat("/proc/self/ns/cgroup"); os.IsNotExist(err) { - return errors.New("cgroup namespaces aren't enabled in the kernel") - } - } - return nil -} - -// sysctl validates that the specified sysctl keys are valid or not. -// /proc/sys isn't completely namespaced and depending on which namespaces -// are specified, a subset of sysctls are permitted. -func (v *ConfigValidator) sysctl(config *configs.Config) error { - validSysctlMap := map[string]bool{ - "kernel.msgmax": true, - "kernel.msgmnb": true, - "kernel.msgmni": true, - "kernel.sem": true, - "kernel.shmall": true, - "kernel.shmmax": true, - "kernel.shmmni": true, - "kernel.shm_rmid_forced": true, - } - - var ( - netOnce sync.Once - hostnet bool - hostnetErr error - ) - - for s := range config.Sysctl { - if validSysctlMap[s] || strings.HasPrefix(s, "fs.mqueue.") { - if config.Namespaces.Contains(configs.NEWIPC) { - continue - } else { - return fmt.Errorf("sysctl %q is not allowed in the hosts ipc namespace", s) - } - } - if strings.HasPrefix(s, "net.") { - // Is container using host netns? - // Here "host" means "current", not "initial". - netOnce.Do(func() { - if !config.Namespaces.Contains(configs.NEWNET) { - hostnet = true - return - } - path := config.Namespaces.PathOf(configs.NEWNET) - if path == "" { - // own netns, so hostnet = false - return - } - hostnet, hostnetErr = isHostNetNS(path) - }) - if hostnetErr != nil { - return hostnetErr - } - if hostnet { - return fmt.Errorf("sysctl %q not allowed in host network namespace", s) - } - continue - } - if config.Namespaces.Contains(configs.NEWUTS) { - switch s { - case "kernel.domainname": - // This is namespaced and there's no explicit OCI field for it. - continue - case "kernel.hostname": - // This is namespaced but there's a conflicting (dedicated) OCI field for it. - return fmt.Errorf("sysctl %q is not allowed as it conflicts with the OCI %q field", s, "hostname") - } - } - return fmt.Errorf("sysctl %q is not in a separate kernel namespace", s) - } - - return nil -} - -func (v *ConfigValidator) intelrdt(config *configs.Config) error { - if config.IntelRdt != nil { - if !intelrdt.IsCATEnabled() && !intelrdt.IsMBAEnabled() { - return errors.New("intelRdt is specified in config, but Intel RDT is not supported or enabled") - } - - if !intelrdt.IsCATEnabled() && config.IntelRdt.L3CacheSchema != "" { - return errors.New("intelRdt.l3CacheSchema is specified in config, but Intel RDT/CAT is not enabled") - } - if !intelrdt.IsMBAEnabled() && config.IntelRdt.MemBwSchema != "" { - return errors.New("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled") - } - - if intelrdt.IsCATEnabled() && config.IntelRdt.L3CacheSchema == "" { - return errors.New("Intel RDT/CAT is enabled and intelRdt is specified in config, but intelRdt.l3CacheSchema is empty") - } - if intelrdt.IsMBAEnabled() && config.IntelRdt.MemBwSchema == "" { - return errors.New("Intel RDT/MBA is enabled and intelRdt is specified in config, but intelRdt.memBwSchema is empty") - } - } - - return nil -} - -func (v *ConfigValidator) cgroups(config *configs.Config) error { - c := config.Cgroups - if c == nil { - return nil - } - - if (c.Name != "" || c.Parent != "") && c.Path != "" { - return fmt.Errorf("cgroup: either Path or Name and Parent should be used, got %+v", c) - } - - r := c.Resources - if r == nil { - return nil - } - - if !cgroups.IsCgroup2UnifiedMode() && r.Unified != nil { - return cgroups.ErrV1NoUnified - } - - if cgroups.IsCgroup2UnifiedMode() { - _, err := cgroups.ConvertMemorySwapToCgroupV2Value(r.MemorySwap, r.Memory) - if err != nil { - return err - } - } - - return nil -} - -func (v *ConfigValidator) mounts(config *configs.Config) error { - for _, m := range config.Mounts { - if !filepath.IsAbs(m.Destination) { - return fmt.Errorf("invalid mount %+v: mount destination not absolute", m) - } - } - - return nil -} - -func isHostNetNS(path string) (bool, error) { - const currentProcessNetns = "/proc/self/ns/net" - - var st1, st2 unix.Stat_t - - if err := unix.Stat(currentProcessNetns, &st1); err != nil { - return false, fmt.Errorf("unable to stat %q: %s", currentProcessNetns, err) - } - if err := unix.Stat(path, &st2); err != nil { - return false, fmt.Errorf("unable to stat %q: %s", path, err) - } - - return (st1.Dev == st2.Dev) && (st1.Ino == st2.Ino), nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go deleted file mode 100644 index 7bfff026ac..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/console_linux.go +++ /dev/null @@ -1,41 +0,0 @@ -package libcontainer - -import ( - "os" - - "golang.org/x/sys/unix" -) - -// mount initializes the console inside the rootfs mounting with the specified mount label -// and applying the correct ownership of the console. -func mountConsole(slavePath string) error { - oldMask := unix.Umask(0o000) - defer unix.Umask(oldMask) - f, err := os.Create("/dev/console") - if err != nil && !os.IsExist(err) { - return err - } - if f != nil { - f.Close() - } - return unix.Mount(slavePath, "/dev/console", "bind", unix.MS_BIND, "") -} - -// dupStdio opens the slavePath for the console and dups the fds to the current -// processes stdio, fd 0,1,2. -func dupStdio(slavePath string) error { - fd, err := unix.Open(slavePath, unix.O_RDWR, 0) - if err != nil { - return &os.PathError{ - Op: "open", - Path: slavePath, - Err: err, - } - } - for _, i := range []int{0, 1, 2} { - if err := unix.Dup3(fd, i, 0); err != nil { - return err - } - } - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/container.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/container.go deleted file mode 100644 index ba7541c5fd..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/container.go +++ /dev/null @@ -1,173 +0,0 @@ -// Package libcontainer provides a native Go implementation for creating containers -// with namespaces, cgroups, capabilities, and filesystem access controls. -// It allows you to manage the lifecycle of the container performing additional operations -// after the container is created. -package libcontainer - -import ( - "os" - "time" - - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runtime-spec/specs-go" -) - -// Status is the status of a container. -type Status int - -const ( - // Created is the status that denotes the container exists but has not been run yet. - Created Status = iota - // Running is the status that denotes the container exists and is running. - Running - // Pausing is the status that denotes the container exists, it is in the process of being paused. - Pausing - // Paused is the status that denotes the container exists, but all its processes are paused. - Paused - // Stopped is the status that denotes the container does not have a created or running process. - Stopped -) - -func (s Status) String() string { - switch s { - case Created: - return "created" - case Running: - return "running" - case Pausing: - return "pausing" - case Paused: - return "paused" - case Stopped: - return "stopped" - default: - return "unknown" - } -} - -// BaseState represents the platform agnostic pieces relating to a -// running container's state -type BaseState struct { - // ID is the container ID. - ID string `json:"id"` - - // InitProcessPid is the init process id in the parent namespace. - InitProcessPid int `json:"init_process_pid"` - - // InitProcessStartTime is the init process start time in clock cycles since boot time. - InitProcessStartTime uint64 `json:"init_process_start"` - - // Created is the unix timestamp for the creation time of the container in UTC - Created time.Time `json:"created"` - - // Config is the container's configuration. - Config configs.Config `json:"config"` -} - -// BaseContainer is a libcontainer container object. -// -// Each container is thread-safe within the same process. Since a container can -// be destroyed by a separate process, any function may return that the container -// was not found. BaseContainer includes methods that are platform agnostic. -type BaseContainer interface { - // Returns the ID of the container - ID() string - - // Returns the current status of the container. - // - // errors: - // ContainerNotExists - Container no longer exists, - // Systemerror - System error. - Status() (Status, error) - - // State returns the current container's state information. - // - // errors: - // SystemError - System error. - State() (*State, error) - - // OCIState returns the current container's state information. - // - // errors: - // SystemError - System error. - OCIState() (*specs.State, error) - - // Returns the current config of the container. - Config() configs.Config - - // Returns the PIDs inside this container. The PIDs are in the namespace of the calling process. - // - // errors: - // ContainerNotExists - Container no longer exists, - // Systemerror - System error. - // - // Some of the returned PIDs may no longer refer to processes in the Container, unless - // the Container state is PAUSED in which case every PID in the slice is valid. - Processes() ([]int, error) - - // Returns statistics for the container. - // - // errors: - // ContainerNotExists - Container no longer exists, - // Systemerror - System error. - Stats() (*Stats, error) - - // Set resources of container as configured - // - // We can use this to change resources when containers are running. - // - // errors: - // SystemError - System error. - Set(config configs.Config) error - - // Start a process inside the container. Returns error if process fails to - // start. You can track process lifecycle with passed Process structure. - // - // errors: - // ContainerNotExists - Container no longer exists, - // ConfigInvalid - config is invalid, - // ContainerPaused - Container is paused, - // SystemError - System error. - Start(process *Process) (err error) - - // Run immediately starts the process inside the container. Returns error if process - // fails to start. It does not block waiting for the exec fifo after start returns but - // opens the fifo after start returns. - // - // errors: - // ContainerNotExists - Container no longer exists, - // ConfigInvalid - config is invalid, - // ContainerPaused - Container is paused, - // SystemError - System error. - Run(process *Process) (err error) - - // Destroys the container, if its in a valid state, after killing any - // remaining running processes. - // - // Any event registrations are removed before the container is destroyed. - // No error is returned if the container is already destroyed. - // - // Running containers must first be stopped using Signal(..). - // Paused containers must first be resumed using Resume(..). - // - // errors: - // ContainerNotStopped - Container is still running, - // ContainerPaused - Container is paused, - // SystemError - System error. - Destroy() error - - // Signal sends the provided signal code to the container's initial process. - // - // If all is specified the signal is sent to all processes in the container - // including the initial process. - // - // errors: - // SystemError - System error. - Signal(s os.Signal, all bool) error - - // Exec signals the container to exec the users process at the end of the init. - // - // errors: - // SystemError - System error. - Exec() error -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go deleted file mode 100644 index 6ce1854f68..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/container_linux.go +++ /dev/null @@ -1,2154 +0,0 @@ -// +build linux - -package libcontainer - -import ( - "bytes" - "encoding/json" - "errors" - "fmt" - "io" - "io/ioutil" - "net" - "os" - "os/exec" - "path/filepath" - "reflect" - "strconv" - "strings" - "sync" - "time" - - securejoin "github.com/cyphar/filepath-securejoin" - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/intelrdt" - "github.com/opencontainers/runc/libcontainer/system" - "github.com/opencontainers/runc/libcontainer/utils" - "github.com/opencontainers/runtime-spec/specs-go" - - "github.com/checkpoint-restore/go-criu/v5" - criurpc "github.com/checkpoint-restore/go-criu/v5/rpc" - errorsf "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "github.com/vishvananda/netlink/nl" - "golang.org/x/sys/unix" - "google.golang.org/protobuf/proto" -) - -const stdioFdCount = 3 - -type linuxContainer struct { - id string - root string - config *configs.Config - cgroupManager cgroups.Manager - intelRdtManager intelrdt.Manager - initPath string - initArgs []string - initProcess parentProcess - initProcessStartTime uint64 - criuPath string - newuidmapPath string - newgidmapPath string - m sync.Mutex - criuVersion int - state containerState - created time.Time - fifo *os.File -} - -// State represents a running container's state -type State struct { - BaseState - - // Platform specific fields below here - - // Specified if the container was started under the rootless mode. - // Set to true if BaseState.Config.RootlessEUID && BaseState.Config.RootlessCgroups - Rootless bool `json:"rootless"` - - // Paths to all the container's cgroups, as returned by (*cgroups.Manager).GetPaths - // - // For cgroup v1, a key is cgroup subsystem name, and the value is the path - // to the cgroup for this subsystem. - // - // For cgroup v2 unified hierarchy, a key is "", and the value is the unified path. - CgroupPaths map[string]string `json:"cgroup_paths"` - - // NamespacePaths are filepaths to the container's namespaces. Key is the namespace type - // with the value as the path. - NamespacePaths map[configs.NamespaceType]string `json:"namespace_paths"` - - // Container's standard descriptors (std{in,out,err}), needed for checkpoint and restore - ExternalDescriptors []string `json:"external_descriptors,omitempty"` - - // Intel RDT "resource control" filesystem path - IntelRdtPath string `json:"intel_rdt_path"` -} - -// Container is a libcontainer container object. -// -// Each container is thread-safe within the same process. Since a container can -// be destroyed by a separate process, any function may return that the container -// was not found. -type Container interface { - BaseContainer - - // Methods below here are platform specific - - // Checkpoint checkpoints the running container's state to disk using the criu(8) utility. - // - // errors: - // Systemerror - System error. - Checkpoint(criuOpts *CriuOpts) error - - // Restore restores the checkpointed container to a running state using the criu(8) utility. - // - // errors: - // Systemerror - System error. - Restore(process *Process, criuOpts *CriuOpts) error - - // If the Container state is RUNNING or CREATED, sets the Container state to PAUSING and pauses - // the execution of any user processes. Asynchronously, when the container finished being paused the - // state is changed to PAUSED. - // If the Container state is PAUSED, do nothing. - // - // errors: - // ContainerNotExists - Container no longer exists, - // ContainerNotRunning - Container not running or created, - // Systemerror - System error. - Pause() error - - // If the Container state is PAUSED, resumes the execution of any user processes in the - // Container before setting the Container state to RUNNING. - // If the Container state is RUNNING, do nothing. - // - // errors: - // ContainerNotExists - Container no longer exists, - // ContainerNotPaused - Container is not paused, - // Systemerror - System error. - Resume() error - - // NotifyOOM returns a read-only channel signaling when the container receives an OOM notification. - // - // errors: - // Systemerror - System error. - NotifyOOM() (<-chan struct{}, error) - - // NotifyMemoryPressure returns a read-only channel signaling when the container reaches a given pressure level - // - // errors: - // Systemerror - System error. - NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) -} - -// ID returns the container's unique ID -func (c *linuxContainer) ID() string { - return c.id -} - -// Config returns the container's configuration -func (c *linuxContainer) Config() configs.Config { - return *c.config -} - -func (c *linuxContainer) Status() (Status, error) { - c.m.Lock() - defer c.m.Unlock() - return c.currentStatus() -} - -func (c *linuxContainer) State() (*State, error) { - c.m.Lock() - defer c.m.Unlock() - return c.currentState() -} - -func (c *linuxContainer) OCIState() (*specs.State, error) { - c.m.Lock() - defer c.m.Unlock() - return c.currentOCIState() -} - -func (c *linuxContainer) Processes() ([]int, error) { - var pids []int - status, err := c.currentStatus() - if err != nil { - return pids, err - } - // for systemd cgroup, the unit's cgroup path will be auto removed if container's all processes exited - if status == Stopped && !c.cgroupManager.Exists() { - return pids, nil - } - - pids, err = c.cgroupManager.GetAllPids() - if err != nil { - return nil, newSystemErrorWithCause(err, "getting all container pids from cgroups") - } - return pids, nil -} - -func (c *linuxContainer) Stats() (*Stats, error) { - var ( - err error - stats = &Stats{} - ) - if stats.CgroupStats, err = c.cgroupManager.GetStats(); err != nil { - return stats, newSystemErrorWithCause(err, "getting container stats from cgroups") - } - if c.intelRdtManager != nil { - if stats.IntelRdtStats, err = c.intelRdtManager.GetStats(); err != nil { - return stats, newSystemErrorWithCause(err, "getting container's Intel RDT stats") - } - } - for _, iface := range c.config.Networks { - switch iface.Type { - case "veth": - istats, err := getNetworkInterfaceStats(iface.HostInterfaceName) - if err != nil { - return stats, newSystemErrorWithCausef(err, "getting network stats for interface %q", iface.HostInterfaceName) - } - stats.Interfaces = append(stats.Interfaces, istats) - } - } - return stats, nil -} - -func (c *linuxContainer) Set(config configs.Config) error { - c.m.Lock() - defer c.m.Unlock() - status, err := c.currentStatus() - if err != nil { - return err - } - if status == Stopped { - return newGenericError(errors.New("container not running"), ContainerNotRunning) - } - if err := c.cgroupManager.Set(config.Cgroups.Resources); err != nil { - // Set configs back - if err2 := c.cgroupManager.Set(c.config.Cgroups.Resources); err2 != nil { - logrus.Warnf("Setting back cgroup configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2) - } - return err - } - if c.intelRdtManager != nil { - if err := c.intelRdtManager.Set(&config); err != nil { - // Set configs back - if err2 := c.cgroupManager.Set(c.config.Cgroups.Resources); err2 != nil { - logrus.Warnf("Setting back cgroup configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2) - } - if err2 := c.intelRdtManager.Set(c.config); err2 != nil { - logrus.Warnf("Setting back intelrdt configs failed due to error: %v, your state.json and actual configs might be inconsistent.", err2) - } - return err - } - } - // After config setting succeed, update config and states - c.config = &config - _, err = c.updateState(nil) - return err -} - -func (c *linuxContainer) Start(process *Process) error { - c.m.Lock() - defer c.m.Unlock() - if c.config.Cgroups.Resources.SkipDevices { - return newGenericError(errors.New("can't start container with SkipDevices set"), ConfigInvalid) - } - if process.Init { - if err := c.createExecFifo(); err != nil { - return err - } - } - if err := c.start(process); err != nil { - if process.Init { - c.deleteExecFifo() - } - return err - } - return nil -} - -func (c *linuxContainer) Run(process *Process) error { - if err := c.Start(process); err != nil { - return err - } - if process.Init { - return c.exec() - } - return nil -} - -func (c *linuxContainer) Exec() error { - c.m.Lock() - defer c.m.Unlock() - return c.exec() -} - -func (c *linuxContainer) exec() error { - path := filepath.Join(c.root, execFifoFilename) - pid := c.initProcess.pid() - blockingFifoOpenCh := awaitFifoOpen(path) - for { - select { - case result := <-blockingFifoOpenCh: - return handleFifoResult(result) - - case <-time.After(time.Millisecond * 100): - stat, err := system.Stat(pid) - if err != nil || stat.State == system.Zombie { - // could be because process started, ran, and completed between our 100ms timeout and our system.Stat() check. - // see if the fifo exists and has data (with a non-blocking open, which will succeed if the writing process is complete). - if err := handleFifoResult(fifoOpen(path, false)); err != nil { - return errors.New("container process is already dead") - } - return nil - } - } - } -} - -func readFromExecFifo(execFifo io.Reader) error { - data, err := ioutil.ReadAll(execFifo) - if err != nil { - return err - } - if len(data) <= 0 { - return errors.New("cannot start an already running container") - } - return nil -} - -func awaitFifoOpen(path string) <-chan openResult { - fifoOpened := make(chan openResult) - go func() { - result := fifoOpen(path, true) - fifoOpened <- result - }() - return fifoOpened -} - -func fifoOpen(path string, block bool) openResult { - flags := os.O_RDONLY - if !block { - flags |= unix.O_NONBLOCK - } - f, err := os.OpenFile(path, flags, 0) - if err != nil { - return openResult{err: newSystemErrorWithCause(err, "open exec fifo for reading")} - } - return openResult{file: f} -} - -func handleFifoResult(result openResult) error { - if result.err != nil { - return result.err - } - f := result.file - defer f.Close() - if err := readFromExecFifo(f); err != nil { - return err - } - return os.Remove(f.Name()) -} - -type openResult struct { - file *os.File - err error -} - -func (c *linuxContainer) start(process *Process) (retErr error) { - parent, err := c.newParentProcess(process) - if err != nil { - return newSystemErrorWithCause(err, "creating new parent process") - } - - logsDone := parent.forwardChildLogs() - if logsDone != nil { - defer func() { - // Wait for log forwarder to finish. This depends on - // runc init closing the _LIBCONTAINER_LOGPIPE log fd. - err := <-logsDone - if err != nil && retErr == nil { - retErr = newSystemErrorWithCause(err, "forwarding init logs") - } - }() - } - - if err := parent.start(); err != nil { - return newSystemErrorWithCause(err, "starting container process") - } - - if process.Init { - c.fifo.Close() - if c.config.Hooks != nil { - s, err := c.currentOCIState() - if err != nil { - return err - } - - if err := c.config.Hooks[configs.Poststart].RunHooks(s); err != nil { - if err := ignoreTerminateErrors(parent.terminate()); err != nil { - logrus.Warn(errorsf.Wrapf(err, "Running Poststart hook")) - } - return err - } - } - } - return nil -} - -func (c *linuxContainer) Signal(s os.Signal, all bool) error { - c.m.Lock() - defer c.m.Unlock() - status, err := c.currentStatus() - if err != nil { - return err - } - if all { - // for systemd cgroup, the unit's cgroup path will be auto removed if container's all processes exited - if status == Stopped && !c.cgroupManager.Exists() { - return nil - } - return signalAllProcesses(c.cgroupManager, s) - } - // to avoid a PID reuse attack - if status == Running || status == Created || status == Paused { - if err := c.initProcess.signal(s); err != nil { - return newSystemErrorWithCause(err, "signaling init process") - } - return nil - } - return newGenericError(errors.New("container not running"), ContainerNotRunning) -} - -func (c *linuxContainer) createExecFifo() error { - rootuid, err := c.Config().HostRootUID() - if err != nil { - return err - } - rootgid, err := c.Config().HostRootGID() - if err != nil { - return err - } - - fifoName := filepath.Join(c.root, execFifoFilename) - if _, err := os.Stat(fifoName); err == nil { - return fmt.Errorf("exec fifo %s already exists", fifoName) - } - oldMask := unix.Umask(0o000) - if err := unix.Mkfifo(fifoName, 0o622); err != nil { - unix.Umask(oldMask) - return err - } - unix.Umask(oldMask) - return os.Chown(fifoName, rootuid, rootgid) -} - -func (c *linuxContainer) deleteExecFifo() { - fifoName := filepath.Join(c.root, execFifoFilename) - os.Remove(fifoName) -} - -// includeExecFifo opens the container's execfifo as a pathfd, so that the -// container cannot access the statedir (and the FIFO itself remains -// un-opened). It then adds the FifoFd to the given exec.Cmd as an inherited -// fd, with _LIBCONTAINER_FIFOFD set to its fd number. -func (c *linuxContainer) includeExecFifo(cmd *exec.Cmd) error { - fifoName := filepath.Join(c.root, execFifoFilename) - fifo, err := os.OpenFile(fifoName, unix.O_PATH|unix.O_CLOEXEC, 0) - if err != nil { - return err - } - c.fifo = fifo - - cmd.ExtraFiles = append(cmd.ExtraFiles, fifo) - cmd.Env = append(cmd.Env, - "_LIBCONTAINER_FIFOFD="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1)) - return nil -} - -func (c *linuxContainer) newParentProcess(p *Process) (parentProcess, error) { - parentInitPipe, childInitPipe, err := utils.NewSockPair("init") - if err != nil { - return nil, newSystemErrorWithCause(err, "creating new init pipe") - } - messageSockPair := filePair{parentInitPipe, childInitPipe} - - parentLogPipe, childLogPipe, err := os.Pipe() - if err != nil { - return nil, fmt.Errorf("Unable to create the log pipe: %s", err) - } - logFilePair := filePair{parentLogPipe, childLogPipe} - - cmd := c.commandTemplate(p, childInitPipe, childLogPipe) - if !p.Init { - return c.newSetnsProcess(p, cmd, messageSockPair, logFilePair) - } - - // We only set up fifoFd if we're not doing a `runc exec`. The historic - // reason for this is that previously we would pass a dirfd that allowed - // for container rootfs escape (and not doing it in `runc exec` avoided - // that problem), but we no longer do that. However, there's no need to do - // this for `runc exec` so we just keep it this way to be safe. - if err := c.includeExecFifo(cmd); err != nil { - return nil, newSystemErrorWithCause(err, "including execfifo in cmd.Exec setup") - } - return c.newInitProcess(p, cmd, messageSockPair, logFilePair) -} - -func (c *linuxContainer) commandTemplate(p *Process, childInitPipe *os.File, childLogPipe *os.File) *exec.Cmd { - cmd := exec.Command(c.initPath, c.initArgs[1:]...) - cmd.Args[0] = c.initArgs[0] - cmd.Stdin = p.Stdin - cmd.Stdout = p.Stdout - cmd.Stderr = p.Stderr - cmd.Dir = c.config.Rootfs - if cmd.SysProcAttr == nil { - cmd.SysProcAttr = &unix.SysProcAttr{} - } - cmd.Env = append(cmd.Env, "GOMAXPROCS="+os.Getenv("GOMAXPROCS")) - cmd.ExtraFiles = append(cmd.ExtraFiles, p.ExtraFiles...) - if p.ConsoleSocket != nil { - cmd.ExtraFiles = append(cmd.ExtraFiles, p.ConsoleSocket) - cmd.Env = append(cmd.Env, - "_LIBCONTAINER_CONSOLE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1), - ) - } - cmd.ExtraFiles = append(cmd.ExtraFiles, childInitPipe) - cmd.Env = append(cmd.Env, - "_LIBCONTAINER_INITPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1), - "_LIBCONTAINER_STATEDIR="+c.root, - ) - - cmd.ExtraFiles = append(cmd.ExtraFiles, childLogPipe) - cmd.Env = append(cmd.Env, - "_LIBCONTAINER_LOGPIPE="+strconv.Itoa(stdioFdCount+len(cmd.ExtraFiles)-1), - "_LIBCONTAINER_LOGLEVEL="+p.LogLevel, - ) - - // NOTE: when running a container with no PID namespace and the parent process spawning the container is - // PID1 the pdeathsig is being delivered to the container's init process by the kernel for some reason - // even with the parent still running. - if c.config.ParentDeathSignal > 0 { - cmd.SysProcAttr.Pdeathsig = unix.Signal(c.config.ParentDeathSignal) - } - return cmd -} - -func (c *linuxContainer) newInitProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*initProcess, error) { - cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initStandard)) - nsMaps := make(map[configs.NamespaceType]string) - for _, ns := range c.config.Namespaces { - if ns.Path != "" { - nsMaps[ns.Type] = ns.Path - } - } - _, sharePidns := nsMaps[configs.NEWPID] - data, err := c.bootstrapData(c.config.Namespaces.CloneFlags(), nsMaps) - if err != nil { - return nil, err - } - init := &initProcess{ - cmd: cmd, - messageSockPair: messageSockPair, - logFilePair: logFilePair, - manager: c.cgroupManager, - intelRdtManager: c.intelRdtManager, - config: c.newInitConfig(p), - container: c, - process: p, - bootstrapData: data, - sharePidns: sharePidns, - } - c.initProcess = init - return init, nil -} - -func (c *linuxContainer) newSetnsProcess(p *Process, cmd *exec.Cmd, messageSockPair, logFilePair filePair) (*setnsProcess, error) { - cmd.Env = append(cmd.Env, "_LIBCONTAINER_INITTYPE="+string(initSetns)) - state, err := c.currentState() - if err != nil { - return nil, newSystemErrorWithCause(err, "getting container's current state") - } - // for setns process, we don't have to set cloneflags as the process namespaces - // will only be set via setns syscall - data, err := c.bootstrapData(0, state.NamespacePaths) - if err != nil { - return nil, err - } - return &setnsProcess{ - cmd: cmd, - cgroupPaths: state.CgroupPaths, - rootlessCgroups: c.config.RootlessCgroups, - intelRdtPath: state.IntelRdtPath, - messageSockPair: messageSockPair, - logFilePair: logFilePair, - manager: c.cgroupManager, - config: c.newInitConfig(p), - process: p, - bootstrapData: data, - initProcessPid: state.InitProcessPid, - }, nil -} - -func (c *linuxContainer) newInitConfig(process *Process) *initConfig { - cfg := &initConfig{ - Config: c.config, - Args: process.Args, - Env: process.Env, - User: process.User, - AdditionalGroups: process.AdditionalGroups, - Cwd: process.Cwd, - Capabilities: process.Capabilities, - PassedFilesCount: len(process.ExtraFiles), - ContainerId: c.ID(), - NoNewPrivileges: c.config.NoNewPrivileges, - RootlessEUID: c.config.RootlessEUID, - RootlessCgroups: c.config.RootlessCgroups, - AppArmorProfile: c.config.AppArmorProfile, - ProcessLabel: c.config.ProcessLabel, - Rlimits: c.config.Rlimits, - CreateConsole: process.ConsoleSocket != nil, - ConsoleWidth: process.ConsoleWidth, - ConsoleHeight: process.ConsoleHeight, - } - if process.NoNewPrivileges != nil { - cfg.NoNewPrivileges = *process.NoNewPrivileges - } - if process.AppArmorProfile != "" { - cfg.AppArmorProfile = process.AppArmorProfile - } - if process.Label != "" { - cfg.ProcessLabel = process.Label - } - if len(process.Rlimits) > 0 { - cfg.Rlimits = process.Rlimits - } - if cgroups.IsCgroup2UnifiedMode() { - cfg.Cgroup2Path = c.cgroupManager.Path("") - } - - return cfg -} - -func (c *linuxContainer) Destroy() error { - c.m.Lock() - defer c.m.Unlock() - return c.state.destroy() -} - -func (c *linuxContainer) Pause() error { - c.m.Lock() - defer c.m.Unlock() - status, err := c.currentStatus() - if err != nil { - return err - } - switch status { - case Running, Created: - if err := c.cgroupManager.Freeze(configs.Frozen); err != nil { - return err - } - return c.state.transition(&pausedState{ - c: c, - }) - } - return newGenericError(fmt.Errorf("container not running or created: %s", status), ContainerNotRunning) -} - -func (c *linuxContainer) Resume() error { - c.m.Lock() - defer c.m.Unlock() - status, err := c.currentStatus() - if err != nil { - return err - } - if status != Paused { - return newGenericError(fmt.Errorf("container not paused"), ContainerNotPaused) - } - if err := c.cgroupManager.Freeze(configs.Thawed); err != nil { - return err - } - return c.state.transition(&runningState{ - c: c, - }) -} - -func (c *linuxContainer) NotifyOOM() (<-chan struct{}, error) { - // XXX(cyphar): This requires cgroups. - if c.config.RootlessCgroups { - logrus.Warn("getting OOM notifications may fail if you don't have the full access to cgroups") - } - path := c.cgroupManager.Path("memory") - if cgroups.IsCgroup2UnifiedMode() { - return notifyOnOOMV2(path) - } - return notifyOnOOM(path) -} - -func (c *linuxContainer) NotifyMemoryPressure(level PressureLevel) (<-chan struct{}, error) { - // XXX(cyphar): This requires cgroups. - if c.config.RootlessCgroups { - logrus.Warn("getting memory pressure notifications may fail if you don't have the full access to cgroups") - } - return notifyMemoryPressure(c.cgroupManager.Path("memory"), level) -} - -var criuFeatures *criurpc.CriuFeatures - -func (c *linuxContainer) checkCriuFeatures(criuOpts *CriuOpts, rpcOpts *criurpc.CriuOpts, criuFeat *criurpc.CriuFeatures) error { - t := criurpc.CriuReqType_FEATURE_CHECK - - // make sure the features we are looking for are really not from - // some previous check - criuFeatures = nil - - req := &criurpc.CriuReq{ - Type: &t, - // Theoretically this should not be necessary but CRIU - // segfaults if Opts is empty. - // Fixed in CRIU 2.12 - Opts: rpcOpts, - Features: criuFeat, - } - - err := c.criuSwrk(nil, req, criuOpts, nil) - if err != nil { - logrus.Debugf("%s", err) - return errors.New("CRIU feature check failed") - } - - missingFeatures := false - - // The outer if checks if the fields actually exist - if (criuFeat.MemTrack != nil) && - (criuFeatures.MemTrack != nil) { - // The inner if checks if they are set to true - if *criuFeat.MemTrack && !*criuFeatures.MemTrack { - missingFeatures = true - logrus.Debugf("CRIU does not support MemTrack") - } - } - - // This needs to be repeated for every new feature check. - // Is there a way to put this in a function. Reflection? - if (criuFeat.LazyPages != nil) && - (criuFeatures.LazyPages != nil) { - if *criuFeat.LazyPages && !*criuFeatures.LazyPages { - missingFeatures = true - logrus.Debugf("CRIU does not support LazyPages") - } - } - - if missingFeatures { - return errors.New("CRIU is missing features") - } - - return nil -} - -func compareCriuVersion(criuVersion int, minVersion int) error { - // simple function to perform the actual version compare - if criuVersion < minVersion { - return fmt.Errorf("CRIU version %d must be %d or higher", criuVersion, minVersion) - } - - return nil -} - -// checkCriuVersion checks Criu version greater than or equal to minVersion -func (c *linuxContainer) checkCriuVersion(minVersion int) error { - // If the version of criu has already been determined there is no need - // to ask criu for the version again. Use the value from c.criuVersion. - if c.criuVersion != 0 { - return compareCriuVersion(c.criuVersion, minVersion) - } - - criu := criu.MakeCriu() - criu.SetCriuPath(c.criuPath) - var err error - c.criuVersion, err = criu.GetCriuVersion() - if err != nil { - return fmt.Errorf("CRIU version check failed: %s", err) - } - - return compareCriuVersion(c.criuVersion, minVersion) -} - -const descriptorsFilename = "descriptors.json" - -func (c *linuxContainer) addCriuDumpMount(req *criurpc.CriuReq, m *configs.Mount) { - mountDest := strings.TrimPrefix(m.Destination, c.config.Rootfs) - extMnt := &criurpc.ExtMountMap{ - Key: proto.String(mountDest), - Val: proto.String(mountDest), - } - req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt) -} - -func (c *linuxContainer) addMaskPaths(req *criurpc.CriuReq) error { - for _, path := range c.config.MaskPaths { - fi, err := os.Stat(fmt.Sprintf("/proc/%d/root/%s", c.initProcess.pid(), path)) - if err != nil { - if os.IsNotExist(err) { - continue - } - return err - } - if fi.IsDir() { - continue - } - - extMnt := &criurpc.ExtMountMap{ - Key: proto.String(path), - Val: proto.String("/dev/null"), - } - req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt) - } - return nil -} - -func (c *linuxContainer) handleCriuConfigurationFile(rpcOpts *criurpc.CriuOpts) { - // CRIU will evaluate a configuration starting with release 3.11. - // Settings in the configuration file will overwrite RPC settings. - // Look for annotations. The annotation 'org.criu.config' - // specifies if CRIU should use a different, container specific - // configuration file. - _, annotations := utils.Annotations(c.config.Labels) - configFile, exists := annotations["org.criu.config"] - if exists { - // If the annotation 'org.criu.config' exists and is set - // to a non-empty string, tell CRIU to use that as a - // configuration file. If the file does not exist, CRIU - // will just ignore it. - if configFile != "" { - rpcOpts.ConfigFile = proto.String(configFile) - } - // If 'org.criu.config' exists and is set to an empty - // string, a runc specific CRIU configuration file will - // be not set at all. - } else { - // If the mentioned annotation has not been found, specify - // a default CRIU configuration file. - rpcOpts.ConfigFile = proto.String("/etc/criu/runc.conf") - } -} - -func (c *linuxContainer) criuSupportsExtNS(t configs.NamespaceType) bool { - var minVersion int - switch t { - case configs.NEWNET: - // CRIU supports different external namespace with different released CRIU versions. - // For network namespaces to work we need at least criu 3.11.0 => 31100. - minVersion = 31100 - case configs.NEWPID: - // For PID namespaces criu 31500 is needed. - minVersion = 31500 - default: - return false - } - return c.checkCriuVersion(minVersion) == nil -} - -func criuNsToKey(t configs.NamespaceType) string { - return "extRoot" + strings.Title(configs.NsName(t)) + "NS" -} - -func (c *linuxContainer) handleCheckpointingExternalNamespaces(rpcOpts *criurpc.CriuOpts, t configs.NamespaceType) error { - if !c.criuSupportsExtNS(t) { - return nil - } - - nsPath := c.config.Namespaces.PathOf(t) - if nsPath == "" { - return nil - } - // CRIU expects the information about an external namespace - // like this: --external []: - // This is always 'extRootNS'. - var ns unix.Stat_t - if err := unix.Stat(nsPath, &ns); err != nil { - return err - } - criuExternal := fmt.Sprintf("%s[%d]:%s", configs.NsName(t), ns.Ino, criuNsToKey(t)) - rpcOpts.External = append(rpcOpts.External, criuExternal) - - return nil -} - -func (c *linuxContainer) handleRestoringNamespaces(rpcOpts *criurpc.CriuOpts, extraFiles *[]*os.File) error { - for _, ns := range c.config.Namespaces { - switch ns.Type { - case configs.NEWNET, configs.NEWPID: - // If the container is running in a network or PID namespace and has - // a path to the network or PID namespace configured, we will dump - // that network or PID namespace as an external namespace and we - // will expect that the namespace exists during restore. - // This basically means that CRIU will ignore the namespace - // and expect it to be setup correctly. - if err := c.handleRestoringExternalNamespaces(rpcOpts, extraFiles, ns.Type); err != nil { - return err - } - default: - // For all other namespaces except NET and PID CRIU has - // a simpler way of joining the existing namespace if set - nsPath := c.config.Namespaces.PathOf(ns.Type) - if nsPath == "" { - continue - } - if ns.Type == configs.NEWCGROUP { - // CRIU has no code to handle NEWCGROUP - return fmt.Errorf("Do not know how to handle namespace %v", ns.Type) - } - // CRIU has code to handle NEWTIME, but it does not seem to be defined in runc - - // CRIU will issue a warning for NEWUSER: - // criu/namespaces.c: 'join-ns with user-namespace is not fully tested and dangerous' - rpcOpts.JoinNs = append(rpcOpts.JoinNs, &criurpc.JoinNamespace{ - Ns: proto.String(configs.NsName(ns.Type)), - NsFile: proto.String(nsPath), - }) - } - } - - return nil -} - -func (c *linuxContainer) handleRestoringExternalNamespaces(rpcOpts *criurpc.CriuOpts, extraFiles *[]*os.File, t configs.NamespaceType) error { - if !c.criuSupportsExtNS(t) { - return nil - } - - nsPath := c.config.Namespaces.PathOf(t) - if nsPath == "" { - return nil - } - // CRIU wants the information about an existing namespace - // like this: --inherit-fd fd[]: - // The needs to be the same as during checkpointing. - // We are always using 'extRootNS' as the key in this. - nsFd, err := os.Open(nsPath) - if err != nil { - logrus.Errorf("If a specific network namespace is defined it must exist: %s", err) - return fmt.Errorf("Requested network namespace %v does not exist", nsPath) - } - inheritFd := &criurpc.InheritFd{ - Key: proto.String(criuNsToKey(t)), - // The offset of four is necessary because 0, 1, 2 and 3 are - // already used by stdin, stdout, stderr, 'criu swrk' socket. - Fd: proto.Int32(int32(4 + len(*extraFiles))), - } - rpcOpts.InheritFd = append(rpcOpts.InheritFd, inheritFd) - // All open FDs need to be transferred to CRIU via extraFiles - *extraFiles = append(*extraFiles, nsFd) - - return nil -} - -func (c *linuxContainer) Checkpoint(criuOpts *CriuOpts) error { - c.m.Lock() - defer c.m.Unlock() - - // Checkpoint is unlikely to work if os.Geteuid() != 0 || system.RunningInUserNS(). - // (CLI prints a warning) - // TODO(avagin): Figure out how to make this work nicely. CRIU 2.0 has - // support for doing unprivileged dumps, but the setup of - // rootless containers might make this complicated. - - // We are relying on the CRIU version RPC which was introduced with CRIU 3.0.0 - if err := c.checkCriuVersion(30000); err != nil { - return err - } - - if criuOpts.ImagesDirectory == "" { - return errors.New("invalid directory to save checkpoint") - } - - // Since a container can be C/R'ed multiple times, - // the checkpoint directory may already exist. - if err := os.Mkdir(criuOpts.ImagesDirectory, 0o700); err != nil && !os.IsExist(err) { - return err - } - - if criuOpts.WorkDirectory == "" { - criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work") - } - - if err := os.Mkdir(criuOpts.WorkDirectory, 0o700); err != nil && !os.IsExist(err) { - return err - } - - workDir, err := os.Open(criuOpts.WorkDirectory) - if err != nil { - return err - } - defer workDir.Close() - - imageDir, err := os.Open(criuOpts.ImagesDirectory) - if err != nil { - return err - } - defer imageDir.Close() - - rpcOpts := criurpc.CriuOpts{ - ImagesDirFd: proto.Int32(int32(imageDir.Fd())), - WorkDirFd: proto.Int32(int32(workDir.Fd())), - LogLevel: proto.Int32(4), - LogFile: proto.String("dump.log"), - Root: proto.String(c.config.Rootfs), - ManageCgroups: proto.Bool(true), - NotifyScripts: proto.Bool(true), - Pid: proto.Int32(int32(c.initProcess.pid())), - ShellJob: proto.Bool(criuOpts.ShellJob), - LeaveRunning: proto.Bool(criuOpts.LeaveRunning), - TcpEstablished: proto.Bool(criuOpts.TcpEstablished), - ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), - FileLocks: proto.Bool(criuOpts.FileLocks), - EmptyNs: proto.Uint32(criuOpts.EmptyNs), - OrphanPtsMaster: proto.Bool(true), - AutoDedup: proto.Bool(criuOpts.AutoDedup), - LazyPages: proto.Bool(criuOpts.LazyPages), - } - - c.handleCriuConfigurationFile(&rpcOpts) - - // If the container is running in a network namespace and has - // a path to the network namespace configured, we will dump - // that network namespace as an external namespace and we - // will expect that the namespace exists during restore. - // This basically means that CRIU will ignore the namespace - // and expect to be setup correctly. - if err := c.handleCheckpointingExternalNamespaces(&rpcOpts, configs.NEWNET); err != nil { - return err - } - - // Same for possible external PID namespaces - if err := c.handleCheckpointingExternalNamespaces(&rpcOpts, configs.NEWPID); err != nil { - return err - } - - // CRIU can use cgroup freezer; when rpcOpts.FreezeCgroup - // is not set, CRIU uses ptrace() to pause the processes. - // Note cgroup v2 freezer is only supported since CRIU release 3.14. - if !cgroups.IsCgroup2UnifiedMode() || c.checkCriuVersion(31400) == nil { - if fcg := c.cgroupManager.Path("freezer"); fcg != "" { - rpcOpts.FreezeCgroup = proto.String(fcg) - } - } - - // append optional criu opts, e.g., page-server and port - if criuOpts.PageServer.Address != "" && criuOpts.PageServer.Port != 0 { - rpcOpts.Ps = &criurpc.CriuPageServerInfo{ - Address: proto.String(criuOpts.PageServer.Address), - Port: proto.Int32(criuOpts.PageServer.Port), - } - } - - // pre-dump may need parentImage param to complete iterative migration - if criuOpts.ParentImage != "" { - rpcOpts.ParentImg = proto.String(criuOpts.ParentImage) - rpcOpts.TrackMem = proto.Bool(true) - } - - // append optional manage cgroups mode - if criuOpts.ManageCgroupsMode != 0 { - mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode) - rpcOpts.ManageCgroupsMode = &mode - } - - var t criurpc.CriuReqType - if criuOpts.PreDump { - feat := criurpc.CriuFeatures{ - MemTrack: proto.Bool(true), - } - - if err := c.checkCriuFeatures(criuOpts, &rpcOpts, &feat); err != nil { - return err - } - - t = criurpc.CriuReqType_PRE_DUMP - } else { - t = criurpc.CriuReqType_DUMP - } - - if criuOpts.LazyPages { - // lazy migration requested; check if criu supports it - feat := criurpc.CriuFeatures{ - LazyPages: proto.Bool(true), - } - if err := c.checkCriuFeatures(criuOpts, &rpcOpts, &feat); err != nil { - return err - } - - if fd := criuOpts.StatusFd; fd != -1 { - // check that the FD is valid - flags, err := unix.FcntlInt(uintptr(fd), unix.F_GETFL, 0) - if err != nil { - return fmt.Errorf("invalid --status-fd argument %d: %w", fd, err) - } - // and writable - if flags&unix.O_WRONLY == 0 { - return fmt.Errorf("invalid --status-fd argument %d: not writable", fd) - } - - if c.checkCriuVersion(31500) != nil { - // For criu 3.15+, use notifications (see case "status-ready" - // in criuNotifications). Otherwise, rely on criu status fd. - rpcOpts.StatusFd = proto.Int32(int32(fd)) - } - } - } - - req := &criurpc.CriuReq{ - Type: &t, - Opts: &rpcOpts, - } - - // no need to dump all this in pre-dump - if !criuOpts.PreDump { - hasCgroupns := c.config.Namespaces.Contains(configs.NEWCGROUP) - for _, m := range c.config.Mounts { - switch m.Device { - case "bind": - c.addCriuDumpMount(req, m) - case "cgroup": - if cgroups.IsCgroup2UnifiedMode() || hasCgroupns { - // real mount(s) - continue - } - // a set of "external" bind mounts - binds, err := getCgroupMounts(m) - if err != nil { - return err - } - for _, b := range binds { - c.addCriuDumpMount(req, b) - } - } - } - - if err := c.addMaskPaths(req); err != nil { - return err - } - - for _, node := range c.config.Devices { - m := &configs.Mount{Destination: node.Path, Source: node.Path} - c.addCriuDumpMount(req, m) - } - - // Write the FD info to a file in the image directory - fdsJSON, err := json.Marshal(c.initProcess.externalDescriptors()) - if err != nil { - return err - } - - err = ioutil.WriteFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename), fdsJSON, 0o600) - if err != nil { - return err - } - } - - err = c.criuSwrk(nil, req, criuOpts, nil) - if err != nil { - return err - } - return nil -} - -func (c *linuxContainer) addCriuRestoreMount(req *criurpc.CriuReq, m *configs.Mount) { - mountDest := strings.TrimPrefix(m.Destination, c.config.Rootfs) - extMnt := &criurpc.ExtMountMap{ - Key: proto.String(mountDest), - Val: proto.String(m.Source), - } - req.Opts.ExtMnt = append(req.Opts.ExtMnt, extMnt) -} - -func (c *linuxContainer) restoreNetwork(req *criurpc.CriuReq, criuOpts *CriuOpts) { - for _, iface := range c.config.Networks { - switch iface.Type { - case "veth": - veth := new(criurpc.CriuVethPair) - veth.IfOut = proto.String(iface.HostInterfaceName) - veth.IfIn = proto.String(iface.Name) - req.Opts.Veths = append(req.Opts.Veths, veth) - case "loopback": - // Do nothing - } - } - for _, i := range criuOpts.VethPairs { - veth := new(criurpc.CriuVethPair) - veth.IfOut = proto.String(i.HostInterfaceName) - veth.IfIn = proto.String(i.ContainerInterfaceName) - req.Opts.Veths = append(req.Opts.Veths, veth) - } -} - -// makeCriuRestoreMountpoints makes the actual mountpoints for the -// restore using CRIU. This function is inspired from the code in -// rootfs_linux.go -func (c *linuxContainer) makeCriuRestoreMountpoints(m *configs.Mount) error { - switch m.Device { - case "cgroup": - // No mount point(s) need to be created: - // - // * for v1, mount points are saved by CRIU because - // /sys/fs/cgroup is a tmpfs mount - // - // * for v2, /sys/fs/cgroup is a real mount, but - // the mountpoint appears as soon as /sys is mounted - return nil - case "bind": - // The prepareBindMount() function checks if source - // exists. So it cannot be used for other filesystem types. - if err := prepareBindMount(m, c.config.Rootfs); err != nil { - return err - } - default: - // for all other filesystems just create the mountpoints - dest, err := securejoin.SecureJoin(c.config.Rootfs, m.Destination) - if err != nil { - return err - } - if err := checkProcMount(c.config.Rootfs, dest, ""); err != nil { - return err - } - if err := os.MkdirAll(dest, 0o755); err != nil { - return err - } - } - return nil -} - -// isPathInPrefixList is a small function for CRIU restore to make sure -// mountpoints, which are on a tmpfs, are not created in the roofs -func isPathInPrefixList(path string, prefix []string) bool { - for _, p := range prefix { - if strings.HasPrefix(path, p+"/") { - return true - } - } - return false -} - -// prepareCriuRestoreMounts tries to set up the rootfs of the -// container to be restored in the same way runc does it for -// initial container creation. Even for a read-only rootfs container -// runc modifies the rootfs to add mountpoints which do not exist. -// This function also creates missing mountpoints as long as they -// are not on top of a tmpfs, as CRIU will restore tmpfs content anyway. -func (c *linuxContainer) prepareCriuRestoreMounts(mounts []*configs.Mount) error { - // First get a list of a all tmpfs mounts - tmpfs := []string{} - for _, m := range mounts { - switch m.Device { - case "tmpfs": - tmpfs = append(tmpfs, m.Destination) - } - } - // Now go through all mounts and create the mountpoints - // if the mountpoints are not on a tmpfs, as CRIU will - // restore the complete tmpfs content from its checkpoint. - umounts := []string{} - defer func() { - for _, u := range umounts { - _ = utils.WithProcfd(c.config.Rootfs, u, func(procfd string) error { - if e := unix.Unmount(procfd, unix.MNT_DETACH); e != nil { - if e != unix.EINVAL { - // Ignore EINVAL as it means 'target is not a mount point.' - // It probably has already been unmounted. - logrus.Warnf("Error during cleanup unmounting of %s (%s): %v", procfd, u, e) - } - } - return nil - }) - } - }() - for _, m := range mounts { - if !isPathInPrefixList(m.Destination, tmpfs) { - if err := c.makeCriuRestoreMountpoints(m); err != nil { - return err - } - // If the mount point is a bind mount, we need to mount - // it now so that runc can create the necessary mount - // points for mounts in bind mounts. - // This also happens during initial container creation. - // Without this CRIU restore will fail - // See: https://github.com/opencontainers/runc/issues/2748 - // It is also not necessary to order the mount points - // because during initial container creation mounts are - // set up in the order they are configured. - if m.Device == "bind" { - if err := utils.WithProcfd(c.config.Rootfs, m.Destination, func(procfd string) error { - if err := unix.Mount(m.Source, procfd, "", unix.MS_BIND|unix.MS_REC, ""); err != nil { - return errorsf.Wrapf(err, "unable to bind mount %q to %q (through %q)", m.Source, m.Destination, procfd) - } - return nil - }); err != nil { - return err - } - umounts = append(umounts, m.Destination) - } - } - } - return nil -} - -func (c *linuxContainer) Restore(process *Process, criuOpts *CriuOpts) error { - c.m.Lock() - defer c.m.Unlock() - - var extraFiles []*os.File - - // Restore is unlikely to work if os.Geteuid() != 0 || system.RunningInUserNS(). - // (CLI prints a warning) - // TODO(avagin): Figure out how to make this work nicely. CRIU doesn't have - // support for unprivileged restore at the moment. - - // We are relying on the CRIU version RPC which was introduced with CRIU 3.0.0 - if err := c.checkCriuVersion(30000); err != nil { - return err - } - if criuOpts.WorkDirectory == "" { - criuOpts.WorkDirectory = filepath.Join(c.root, "criu.work") - } - // Since a container can be C/R'ed multiple times, - // the work directory may already exist. - if err := os.Mkdir(criuOpts.WorkDirectory, 0o700); err != nil && !os.IsExist(err) { - return err - } - workDir, err := os.Open(criuOpts.WorkDirectory) - if err != nil { - return err - } - defer workDir.Close() - if criuOpts.ImagesDirectory == "" { - return errors.New("invalid directory to restore checkpoint") - } - imageDir, err := os.Open(criuOpts.ImagesDirectory) - if err != nil { - return err - } - defer imageDir.Close() - // CRIU has a few requirements for a root directory: - // * it must be a mount point - // * its parent must not be overmounted - // c.config.Rootfs is bind-mounted to a temporary directory - // to satisfy these requirements. - root := filepath.Join(c.root, "criu-root") - if err := os.Mkdir(root, 0o755); err != nil { - return err - } - defer os.Remove(root) - root, err = filepath.EvalSymlinks(root) - if err != nil { - return err - } - err = unix.Mount(c.config.Rootfs, root, "", unix.MS_BIND|unix.MS_REC, "") - if err != nil { - return err - } - defer unix.Unmount(root, unix.MNT_DETACH) //nolint: errcheck - t := criurpc.CriuReqType_RESTORE - req := &criurpc.CriuReq{ - Type: &t, - Opts: &criurpc.CriuOpts{ - ImagesDirFd: proto.Int32(int32(imageDir.Fd())), - WorkDirFd: proto.Int32(int32(workDir.Fd())), - EvasiveDevices: proto.Bool(true), - LogLevel: proto.Int32(4), - LogFile: proto.String("restore.log"), - RstSibling: proto.Bool(true), - Root: proto.String(root), - ManageCgroups: proto.Bool(true), - NotifyScripts: proto.Bool(true), - ShellJob: proto.Bool(criuOpts.ShellJob), - ExtUnixSk: proto.Bool(criuOpts.ExternalUnixConnections), - TcpEstablished: proto.Bool(criuOpts.TcpEstablished), - FileLocks: proto.Bool(criuOpts.FileLocks), - EmptyNs: proto.Uint32(criuOpts.EmptyNs), - OrphanPtsMaster: proto.Bool(true), - AutoDedup: proto.Bool(criuOpts.AutoDedup), - LazyPages: proto.Bool(criuOpts.LazyPages), - }, - } - - if criuOpts.LsmProfile != "" { - // CRIU older than 3.16 has a bug which breaks the possibility - // to set a different LSM profile. - if err := c.checkCriuVersion(31600); err != nil { - return errors.New("--lsm-profile requires at least CRIU 3.16") - } - req.Opts.LsmProfile = proto.String(criuOpts.LsmProfile) - } - - c.handleCriuConfigurationFile(req.Opts) - - if err := c.handleRestoringNamespaces(req.Opts, &extraFiles); err != nil { - return err - } - - // This will modify the rootfs of the container in the same way runc - // modifies the container during initial creation. - if err := c.prepareCriuRestoreMounts(c.config.Mounts); err != nil { - return err - } - - hasCgroupns := c.config.Namespaces.Contains(configs.NEWCGROUP) - for _, m := range c.config.Mounts { - switch m.Device { - case "bind": - c.addCriuRestoreMount(req, m) - case "cgroup": - if cgroups.IsCgroup2UnifiedMode() || hasCgroupns { - continue - } - // cgroup v1 is a set of bind mounts, unless cgroupns is used - binds, err := getCgroupMounts(m) - if err != nil { - return err - } - for _, b := range binds { - c.addCriuRestoreMount(req, b) - } - } - } - - if len(c.config.MaskPaths) > 0 { - m := &configs.Mount{Destination: "/dev/null", Source: "/dev/null"} - c.addCriuRestoreMount(req, m) - } - - for _, node := range c.config.Devices { - m := &configs.Mount{Destination: node.Path, Source: node.Path} - c.addCriuRestoreMount(req, m) - } - - if criuOpts.EmptyNs&unix.CLONE_NEWNET == 0 { - c.restoreNetwork(req, criuOpts) - } - - // append optional manage cgroups mode - if criuOpts.ManageCgroupsMode != 0 { - mode := criurpc.CriuCgMode(criuOpts.ManageCgroupsMode) - req.Opts.ManageCgroupsMode = &mode - } - - var ( - fds []string - fdJSON []byte - ) - if fdJSON, err = ioutil.ReadFile(filepath.Join(criuOpts.ImagesDirectory, descriptorsFilename)); err != nil { - return err - } - - if err := json.Unmarshal(fdJSON, &fds); err != nil { - return err - } - for i := range fds { - if s := fds[i]; strings.Contains(s, "pipe:") { - inheritFd := new(criurpc.InheritFd) - inheritFd.Key = proto.String(s) - inheritFd.Fd = proto.Int32(int32(i)) - req.Opts.InheritFd = append(req.Opts.InheritFd, inheritFd) - } - } - err = c.criuSwrk(process, req, criuOpts, extraFiles) - - // Now that CRIU is done let's close all opened FDs CRIU needed. - for _, fd := range extraFiles { - fd.Close() - } - - return err -} - -func (c *linuxContainer) criuApplyCgroups(pid int, req *criurpc.CriuReq) error { - // need to apply cgroups only on restore - if req.GetType() != criurpc.CriuReqType_RESTORE { - return nil - } - - // XXX: Do we need to deal with this case? AFAIK criu still requires root. - if err := c.cgroupManager.Apply(pid); err != nil { - return err - } - - if err := c.cgroupManager.Set(c.config.Cgroups.Resources); err != nil { - return newSystemError(err) - } - - if cgroups.IsCgroup2UnifiedMode() { - return nil - } - // the stuff below is cgroupv1-specific - - path := fmt.Sprintf("/proc/%d/cgroup", pid) - cgroupsPaths, err := cgroups.ParseCgroupFile(path) - if err != nil { - return err - } - - for c, p := range cgroupsPaths { - cgroupRoot := &criurpc.CgroupRoot{ - Ctrl: proto.String(c), - Path: proto.String(p), - } - req.Opts.CgRoot = append(req.Opts.CgRoot, cgroupRoot) - } - - return nil -} - -func (c *linuxContainer) criuSwrk(process *Process, req *criurpc.CriuReq, opts *CriuOpts, extraFiles []*os.File) error { - fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_SEQPACKET|unix.SOCK_CLOEXEC, 0) - if err != nil { - return err - } - - var logPath string - if opts != nil { - logPath = filepath.Join(opts.WorkDirectory, req.GetOpts().GetLogFile()) - } else { - // For the VERSION RPC 'opts' is set to 'nil' and therefore - // opts.WorkDirectory does not exist. Set logPath to "". - logPath = "" - } - criuClient := os.NewFile(uintptr(fds[0]), "criu-transport-client") - criuClientFileCon, err := net.FileConn(criuClient) - criuClient.Close() - if err != nil { - return err - } - - criuClientCon := criuClientFileCon.(*net.UnixConn) - defer criuClientCon.Close() - - criuServer := os.NewFile(uintptr(fds[1]), "criu-transport-server") - defer criuServer.Close() - - args := []string{"swrk", "3"} - if c.criuVersion != 0 { - // If the CRIU Version is still '0' then this is probably - // the initial CRIU run to detect the version. Skip it. - logrus.Debugf("Using CRIU %d at: %s", c.criuVersion, c.criuPath) - } - cmd := exec.Command(c.criuPath, args...) - if process != nil { - cmd.Stdin = process.Stdin - cmd.Stdout = process.Stdout - cmd.Stderr = process.Stderr - } - cmd.ExtraFiles = append(cmd.ExtraFiles, criuServer) - if extraFiles != nil { - cmd.ExtraFiles = append(cmd.ExtraFiles, extraFiles...) - } - - if err := cmd.Start(); err != nil { - return err - } - // we close criuServer so that even if CRIU crashes or unexpectedly exits, runc will not hang. - criuServer.Close() - // cmd.Process will be replaced by a restored init. - criuProcess := cmd.Process - - var criuProcessState *os.ProcessState - defer func() { - if criuProcessState == nil { - criuClientCon.Close() - _, err := criuProcess.Wait() - if err != nil { - logrus.Warnf("wait on criuProcess returned %v", err) - } - } - }() - - if err := c.criuApplyCgroups(criuProcess.Pid, req); err != nil { - return err - } - - var extFds []string - if process != nil { - extFds, err = getPipeFds(criuProcess.Pid) - if err != nil { - return err - } - } - - logrus.Debugf("Using CRIU in %s mode", req.GetType().String()) - // In the case of criurpc.CriuReqType_FEATURE_CHECK req.GetOpts() - // should be empty. For older CRIU versions it still will be - // available but empty. criurpc.CriuReqType_VERSION actually - // has no req.GetOpts(). - if logrus.GetLevel() >= logrus.DebugLevel && - !(req.GetType() == criurpc.CriuReqType_FEATURE_CHECK || - req.GetType() == criurpc.CriuReqType_VERSION) { - - val := reflect.ValueOf(req.GetOpts()) - v := reflect.Indirect(val) - for i := 0; i < v.NumField(); i++ { - st := v.Type() - name := st.Field(i).Name - if 'A' <= name[0] && name[0] <= 'Z' { - value := val.MethodByName("Get" + name).Call([]reflect.Value{}) - logrus.Debugf("CRIU option %s with value %v", name, value[0]) - } - } - } - data, err := proto.Marshal(req) - if err != nil { - return err - } - _, err = criuClientCon.Write(data) - if err != nil { - return err - } - - buf := make([]byte, 10*4096) - oob := make([]byte, 4096) - for { - n, oobn, _, _, err := criuClientCon.ReadMsgUnix(buf, oob) - if req.Opts != nil && req.Opts.StatusFd != nil { - // Close status_fd as soon as we got something back from criu, - // assuming it has consumed (reopened) it by this time. - // Otherwise it will might be left open forever and whoever - // is waiting on it will wait forever. - fd := int(*req.Opts.StatusFd) - _ = unix.Close(fd) - req.Opts.StatusFd = nil - } - if err != nil { - return err - } - if n == 0 { - return errors.New("unexpected EOF") - } - if n == len(buf) { - return errors.New("buffer is too small") - } - - resp := new(criurpc.CriuResp) - err = proto.Unmarshal(buf[:n], resp) - if err != nil { - return err - } - if !resp.GetSuccess() { - typeString := req.GetType().String() - return fmt.Errorf("criu failed: type %s errno %d\nlog file: %s", typeString, resp.GetCrErrno(), logPath) - } - - t := resp.GetType() - switch { - case t == criurpc.CriuReqType_FEATURE_CHECK: - logrus.Debugf("Feature check says: %s", resp) - criuFeatures = resp.GetFeatures() - case t == criurpc.CriuReqType_NOTIFY: - if err := c.criuNotifications(resp, process, cmd, opts, extFds, oob[:oobn]); err != nil { - return err - } - t = criurpc.CriuReqType_NOTIFY - req = &criurpc.CriuReq{ - Type: &t, - NotifySuccess: proto.Bool(true), - } - data, err = proto.Marshal(req) - if err != nil { - return err - } - _, err = criuClientCon.Write(data) - if err != nil { - return err - } - continue - case t == criurpc.CriuReqType_RESTORE: - case t == criurpc.CriuReqType_DUMP: - case t == criurpc.CriuReqType_PRE_DUMP: - default: - return fmt.Errorf("unable to parse the response %s", resp.String()) - } - - break - } - - _ = criuClientCon.CloseWrite() - // cmd.Wait() waits cmd.goroutines which are used for proxying file descriptors. - // Here we want to wait only the CRIU process. - criuProcessState, err = criuProcess.Wait() - if err != nil { - return err - } - - // In pre-dump mode CRIU is in a loop and waits for - // the final DUMP command. - // The current runc pre-dump approach, however, is - // start criu in PRE_DUMP once for a single pre-dump - // and not the whole series of pre-dump, pre-dump, ...m, dump - // If we got the message CriuReqType_PRE_DUMP it means - // CRIU was successful and we need to forcefully stop CRIU - if !criuProcessState.Success() && *req.Type != criurpc.CriuReqType_PRE_DUMP { - return fmt.Errorf("criu failed: %s\nlog file: %s", criuProcessState.String(), logPath) - } - return nil -} - -// block any external network activity -func lockNetwork(config *configs.Config) error { - for _, config := range config.Networks { - strategy, err := getStrategy(config.Type) - if err != nil { - return err - } - - if err := strategy.detach(config); err != nil { - return err - } - } - return nil -} - -func unlockNetwork(config *configs.Config) error { - for _, config := range config.Networks { - strategy, err := getStrategy(config.Type) - if err != nil { - return err - } - if err = strategy.attach(config); err != nil { - return err - } - } - return nil -} - -func (c *linuxContainer) criuNotifications(resp *criurpc.CriuResp, process *Process, cmd *exec.Cmd, opts *CriuOpts, fds []string, oob []byte) error { - notify := resp.GetNotify() - if notify == nil { - return fmt.Errorf("invalid response: %s", resp.String()) - } - script := notify.GetScript() - logrus.Debugf("notify: %s\n", script) - switch script { - case "post-dump": - f, err := os.Create(filepath.Join(c.root, "checkpoint")) - if err != nil { - return err - } - f.Close() - case "network-unlock": - if err := unlockNetwork(c.config); err != nil { - return err - } - case "network-lock": - if err := lockNetwork(c.config); err != nil { - return err - } - case "setup-namespaces": - if c.config.Hooks != nil { - s, err := c.currentOCIState() - if err != nil { - return nil - } - s.Pid = int(notify.GetPid()) - - if err := c.config.Hooks[configs.Prestart].RunHooks(s); err != nil { - return err - } - if err := c.config.Hooks[configs.CreateRuntime].RunHooks(s); err != nil { - return err - } - } - case "post-restore": - pid := notify.GetPid() - - p, err := os.FindProcess(int(pid)) - if err != nil { - return err - } - cmd.Process = p - - r, err := newRestoredProcess(cmd, fds) - if err != nil { - return err - } - process.ops = r - if err := c.state.transition(&restoredState{ - imageDir: opts.ImagesDirectory, - c: c, - }); err != nil { - return err - } - // create a timestamp indicating when the restored checkpoint was started - c.created = time.Now().UTC() - if _, err := c.updateState(r); err != nil { - return err - } - if err := os.Remove(filepath.Join(c.root, "checkpoint")); err != nil { - if !os.IsNotExist(err) { - logrus.Error(err) - } - } - case "orphan-pts-master": - scm, err := unix.ParseSocketControlMessage(oob) - if err != nil { - return err - } - fds, err := unix.ParseUnixRights(&scm[0]) - if err != nil { - return err - } - - master := os.NewFile(uintptr(fds[0]), "orphan-pts-master") - defer master.Close() - - // While we can access console.master, using the API is a good idea. - if err := utils.SendFd(process.ConsoleSocket, master.Name(), master.Fd()); err != nil { - return err - } - case "status-ready": - if opts.StatusFd != -1 { - // write \0 to status fd to notify that lazy page server is ready - _, err := unix.Write(opts.StatusFd, []byte{0}) - if err != nil { - logrus.Warnf("can't write \\0 to status fd: %v", err) - } - _ = unix.Close(opts.StatusFd) - opts.StatusFd = -1 - } - } - return nil -} - -func (c *linuxContainer) updateState(process parentProcess) (*State, error) { - if process != nil { - c.initProcess = process - } - state, err := c.currentState() - if err != nil { - return nil, err - } - err = c.saveState(state) - if err != nil { - return nil, err - } - return state, nil -} - -func (c *linuxContainer) saveState(s *State) (retErr error) { - tmpFile, err := ioutil.TempFile(c.root, "state-") - if err != nil { - return err - } - - defer func() { - if retErr != nil { - tmpFile.Close() - os.Remove(tmpFile.Name()) - } - }() - - err = utils.WriteJSON(tmpFile, s) - if err != nil { - return err - } - err = tmpFile.Close() - if err != nil { - return err - } - - stateFilePath := filepath.Join(c.root, stateFilename) - return os.Rename(tmpFile.Name(), stateFilePath) -} - -func (c *linuxContainer) currentStatus() (Status, error) { - if err := c.refreshState(); err != nil { - return -1, err - } - return c.state.status(), nil -} - -// refreshState needs to be called to verify that the current state on the -// container is what is true. Because consumers of libcontainer can use it -// out of process we need to verify the container's status based on runtime -// information and not rely on our in process info. -func (c *linuxContainer) refreshState() error { - paused, err := c.isPaused() - if err != nil { - return err - } - if paused { - return c.state.transition(&pausedState{c: c}) - } - t := c.runType() - switch t { - case Created: - return c.state.transition(&createdState{c: c}) - case Running: - return c.state.transition(&runningState{c: c}) - } - return c.state.transition(&stoppedState{c: c}) -} - -func (c *linuxContainer) runType() Status { - if c.initProcess == nil { - return Stopped - } - pid := c.initProcess.pid() - stat, err := system.Stat(pid) - if err != nil { - return Stopped - } - if stat.StartTime != c.initProcessStartTime || stat.State == system.Zombie || stat.State == system.Dead { - return Stopped - } - // We'll create exec fifo and blocking on it after container is created, - // and delete it after start container. - if _, err := os.Stat(filepath.Join(c.root, execFifoFilename)); err == nil { - return Created - } - return Running -} - -func (c *linuxContainer) isPaused() (bool, error) { - state, err := c.cgroupManager.GetFreezerState() - if err != nil { - return false, err - } - return state == configs.Frozen, nil -} - -func (c *linuxContainer) currentState() (*State, error) { - var ( - startTime uint64 - externalDescriptors []string - pid = -1 - ) - if c.initProcess != nil { - pid = c.initProcess.pid() - startTime, _ = c.initProcess.startTime() - externalDescriptors = c.initProcess.externalDescriptors() - } - intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID()) - if err != nil { - intelRdtPath = "" - } - state := &State{ - BaseState: BaseState{ - ID: c.ID(), - Config: *c.config, - InitProcessPid: pid, - InitProcessStartTime: startTime, - Created: c.created, - }, - Rootless: c.config.RootlessEUID && c.config.RootlessCgroups, - CgroupPaths: c.cgroupManager.GetPaths(), - IntelRdtPath: intelRdtPath, - NamespacePaths: make(map[configs.NamespaceType]string), - ExternalDescriptors: externalDescriptors, - } - if pid > 0 { - for _, ns := range c.config.Namespaces { - state.NamespacePaths[ns.Type] = ns.GetPath(pid) - } - for _, nsType := range configs.NamespaceTypes() { - if !configs.IsNamespaceSupported(nsType) { - continue - } - if _, ok := state.NamespacePaths[nsType]; !ok { - ns := configs.Namespace{Type: nsType} - state.NamespacePaths[ns.Type] = ns.GetPath(pid) - } - } - } - return state, nil -} - -func (c *linuxContainer) currentOCIState() (*specs.State, error) { - bundle, annotations := utils.Annotations(c.config.Labels) - state := &specs.State{ - Version: specs.Version, - ID: c.ID(), - Bundle: bundle, - Annotations: annotations, - } - status, err := c.currentStatus() - if err != nil { - return nil, err - } - state.Status = specs.ContainerState(status.String()) - if status != Stopped { - if c.initProcess != nil { - state.Pid = c.initProcess.pid() - } - } - return state, nil -} - -// orderNamespacePaths sorts namespace paths into a list of paths that we -// can setns in order. -func (c *linuxContainer) orderNamespacePaths(namespaces map[configs.NamespaceType]string) ([]string, error) { - paths := []string{} - for _, ns := range configs.NamespaceTypes() { - - // Remove namespaces that we don't need to join. - if !c.config.Namespaces.Contains(ns) { - continue - } - - if p, ok := namespaces[ns]; ok && p != "" { - // check if the requested namespace is supported - if !configs.IsNamespaceSupported(ns) { - return nil, newSystemError(fmt.Errorf("namespace %s is not supported", ns)) - } - // only set to join this namespace if it exists - if _, err := os.Lstat(p); err != nil { - return nil, newSystemErrorWithCausef(err, "running lstat on namespace path %q", p) - } - // do not allow namespace path with comma as we use it to separate - // the namespace paths - if strings.ContainsRune(p, ',') { - return nil, newSystemError(fmt.Errorf("invalid path %s", p)) - } - paths = append(paths, fmt.Sprintf("%s:%s", configs.NsName(ns), p)) - } - - } - - return paths, nil -} - -func encodeIDMapping(idMap []configs.IDMap) ([]byte, error) { - data := bytes.NewBuffer(nil) - for _, im := range idMap { - line := fmt.Sprintf("%d %d %d\n", im.ContainerID, im.HostID, im.Size) - if _, err := data.WriteString(line); err != nil { - return nil, err - } - } - return data.Bytes(), nil -} - -// bootstrapData encodes the necessary data in netlink binary format -// as a io.Reader. -// Consumer can write the data to a bootstrap program -// such as one that uses nsenter package to bootstrap the container's -// init process correctly, i.e. with correct namespaces, uid/gid -// mapping etc. -func (c *linuxContainer) bootstrapData(cloneFlags uintptr, nsMaps map[configs.NamespaceType]string) (io.Reader, error) { - // create the netlink message - r := nl.NewNetlinkRequest(int(InitMsg), 0) - - // write cloneFlags - r.AddData(&Int32msg{ - Type: CloneFlagsAttr, - Value: uint32(cloneFlags), - }) - - // write custom namespace paths - if len(nsMaps) > 0 { - nsPaths, err := c.orderNamespacePaths(nsMaps) - if err != nil { - return nil, err - } - r.AddData(&Bytemsg{ - Type: NsPathsAttr, - Value: []byte(strings.Join(nsPaths, ",")), - }) - } - - // write namespace paths only when we are not joining an existing user ns - _, joinExistingUser := nsMaps[configs.NEWUSER] - if !joinExistingUser { - // write uid mappings - if len(c.config.UidMappings) > 0 { - if c.config.RootlessEUID && c.newuidmapPath != "" { - r.AddData(&Bytemsg{ - Type: UidmapPathAttr, - Value: []byte(c.newuidmapPath), - }) - } - b, err := encodeIDMapping(c.config.UidMappings) - if err != nil { - return nil, err - } - r.AddData(&Bytemsg{ - Type: UidmapAttr, - Value: b, - }) - } - - // write gid mappings - if len(c.config.GidMappings) > 0 { - b, err := encodeIDMapping(c.config.GidMappings) - if err != nil { - return nil, err - } - r.AddData(&Bytemsg{ - Type: GidmapAttr, - Value: b, - }) - if c.config.RootlessEUID && c.newgidmapPath != "" { - r.AddData(&Bytemsg{ - Type: GidmapPathAttr, - Value: []byte(c.newgidmapPath), - }) - } - if requiresRootOrMappingTool(c.config) { - r.AddData(&Boolmsg{ - Type: SetgroupAttr, - Value: true, - }) - } - } - } - - if c.config.OomScoreAdj != nil { - // write oom_score_adj - r.AddData(&Bytemsg{ - Type: OomScoreAdjAttr, - Value: []byte(strconv.Itoa(*c.config.OomScoreAdj)), - }) - } - - // write rootless - r.AddData(&Boolmsg{ - Type: RootlessEUIDAttr, - Value: c.config.RootlessEUID, - }) - - return bytes.NewReader(r.Serialize()), nil -} - -// ignoreTerminateErrors returns nil if the given err matches an error known -// to indicate that the terminate occurred successfully or err was nil, otherwise -// err is returned unaltered. -func ignoreTerminateErrors(err error) error { - if err == nil { - return nil - } - // terminate() might return an error from ether Kill or Wait. - // The (*Cmd).Wait documentation says: "If the command fails to run - // or doesn't complete successfully, the error is of type *ExitError". - // Filter out such errors (like "exit status 1" or "signal: killed"). - var exitErr *exec.ExitError - if errors.As(err, &exitErr) { - return nil - } - // TODO: use errors.Is(err, os.ErrProcessDone) here and - // remove "process already finished" string comparison below - // once go 1.16 is minimally supported version. - - s := err.Error() - if strings.Contains(s, "process already finished") || - strings.Contains(s, "Wait was already called") { - return nil - } - return err -} - -func requiresRootOrMappingTool(c *configs.Config) bool { - gidMap := []configs.IDMap{ - {ContainerID: 0, HostID: os.Getegid(), Size: 1}, - } - return !reflect.DeepEqual(c.GidMappings, gidMap) -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go deleted file mode 100644 index 0db43e74e8..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/criu_opts_linux.go +++ /dev/null @@ -1,33 +0,0 @@ -package libcontainer - -import criu "github.com/checkpoint-restore/go-criu/v5/rpc" - -type CriuPageServerInfo struct { - Address string // IP address of CRIU page server - Port int32 // port number of CRIU page server -} - -type VethPairName struct { - ContainerInterfaceName string - HostInterfaceName string -} - -type CriuOpts struct { - ImagesDirectory string // directory for storing image files - WorkDirectory string // directory to cd and write logs/pidfiles/stats to - ParentImage string // directory for storing parent image files in pre-dump and dump - LeaveRunning bool // leave container in running state after checkpoint - TcpEstablished bool // checkpoint/restore established TCP connections - ExternalUnixConnections bool // allow external unix connections - ShellJob bool // allow to dump and restore shell jobs - FileLocks bool // handle file locks, for safety - PreDump bool // call criu predump to perform iterative checkpoint - PageServer CriuPageServerInfo // allow to dump to criu page server - VethPairs []VethPairName // pass the veth to criu when restore - ManageCgroupsMode criu.CriuCgMode // dump or restore cgroup mode - EmptyNs uint32 // don't c/r properties for namespace from this mask - AutoDedup bool // auto deduplication for incremental dumps - LazyPages bool // restore memory pages lazily using userfaultfd - StatusFd int // fd for feedback when lazy server is ready - LsmProfile string // LSM profile used to restore the container -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/error.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/error.go deleted file mode 100644 index 21a3789ba1..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/error.go +++ /dev/null @@ -1,70 +0,0 @@ -package libcontainer - -import "io" - -// ErrorCode is the API error code type. -type ErrorCode int - -// API error codes. -const ( - // Factory errors - IdInUse ErrorCode = iota - InvalidIdFormat - - // Container errors - ContainerNotExists - ContainerPaused - ContainerNotStopped - ContainerNotRunning - ContainerNotPaused - - // Process errors - NoProcessOps - - // Common errors - ConfigInvalid - ConsoleExists - SystemError -) - -func (c ErrorCode) String() string { - switch c { - case IdInUse: - return "Id already in use" - case InvalidIdFormat: - return "Invalid format" - case ContainerPaused: - return "Container paused" - case ConfigInvalid: - return "Invalid configuration" - case SystemError: - return "System error" - case ContainerNotExists: - return "Container does not exist" - case ContainerNotStopped: - return "Container is not stopped" - case ContainerNotRunning: - return "Container is not running" - case ConsoleExists: - return "Console exists for process" - case ContainerNotPaused: - return "Container is not paused" - case NoProcessOps: - return "No process operations" - default: - return "Unknown error" - } -} - -// Error is the API error type. -type Error interface { - error - - // Returns an error if it failed to write the detail of the Error to w. - // The detail of the Error may include the error message and a - // representation of the stack trace. - Detail(w io.Writer) error - - // Returns the error code for this error. - Code() ErrorCode -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/factory.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/factory.go deleted file mode 100644 index 0986cd77e3..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/factory.go +++ /dev/null @@ -1,44 +0,0 @@ -package libcontainer - -import ( - "github.com/opencontainers/runc/libcontainer/configs" -) - -type Factory interface { - // Creates a new container with the given id and starts the initial process inside it. - // id must be a string containing only letters, digits and underscores and must contain - // between 1 and 1024 characters, inclusive. - // - // The id must not already be in use by an existing container. Containers created using - // a factory with the same path (and filesystem) must have distinct ids. - // - // Returns the new container with a running process. - // - // errors: - // IdInUse - id is already in use by a container - // InvalidIdFormat - id has incorrect format - // ConfigInvalid - config is invalid - // Systemerror - System error - // - // On error, any partially created container parts are cleaned up (the operation is atomic). - Create(id string, config *configs.Config) (Container, error) - - // Load takes an ID for an existing container and returns the container information - // from the state. This presents a read only view of the container. - // - // errors: - // Path does not exist - // System error - Load(id string) (Container, error) - - // StartInitialization is an internal API to libcontainer used during the reexec of the - // container. - // - // Errors: - // Pipe connection error - // System error - StartInitialization() error - - // Type returns info string about factory type (e.g. lxc, libcontainer...) - Type() string -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go deleted file mode 100644 index dbd410b88f..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/factory_linux.go +++ /dev/null @@ -1,453 +0,0 @@ -// +build linux - -package libcontainer - -import ( - "encoding/json" - "fmt" - "os" - "path/filepath" - "regexp" - "runtime/debug" - "strconv" - - securejoin "github.com/cyphar/filepath-securejoin" - "github.com/moby/sys/mountinfo" - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fs" - "github.com/opencontainers/runc/libcontainer/cgroups/fs2" - "github.com/opencontainers/runc/libcontainer/cgroups/systemd" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/configs/validate" - "github.com/opencontainers/runc/libcontainer/intelrdt" - "github.com/opencontainers/runc/libcontainer/utils" - "github.com/pkg/errors" - - "golang.org/x/sys/unix" -) - -const ( - stateFilename = "state.json" - execFifoFilename = "exec.fifo" -) - -var idRegex = regexp.MustCompile(`^[\w+-\.]+$`) - -// InitArgs returns an options func to configure a LinuxFactory with the -// provided init binary path and arguments. -func InitArgs(args ...string) func(*LinuxFactory) error { - return func(l *LinuxFactory) (err error) { - if len(args) > 0 { - // Resolve relative paths to ensure that its available - // after directory changes. - if args[0], err = filepath.Abs(args[0]); err != nil { - return newGenericError(err, ConfigInvalid) - } - } - - l.InitArgs = args - return nil - } -} - -func getUnifiedPath(paths map[string]string) string { - path := "" - for k, v := range paths { - if path == "" { - path = v - } else if v != path { - panic(errors.Errorf("expected %q path to be unified path %q, got %q", k, path, v)) - } - } - // can be empty - if path != "" { - if filepath.Clean(path) != path || !filepath.IsAbs(path) { - panic(errors.Errorf("invalid dir path %q", path)) - } - } - - return path -} - -func systemdCgroupV2(l *LinuxFactory, rootless bool) error { - l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { - return systemd.NewUnifiedManager(config, getUnifiedPath(paths), rootless) - } - return nil -} - -// SystemdCgroups is an options func to configure a LinuxFactory to return -// containers that use systemd to create and manage cgroups. -func SystemdCgroups(l *LinuxFactory) error { - if !systemd.IsRunningSystemd() { - return fmt.Errorf("systemd not running on this host, can't use systemd as cgroups manager") - } - - if cgroups.IsCgroup2UnifiedMode() { - return systemdCgroupV2(l, false) - } - - l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { - return systemd.NewLegacyManager(config, paths) - } - - return nil -} - -// RootlessSystemdCgroups is rootless version of SystemdCgroups. -func RootlessSystemdCgroups(l *LinuxFactory) error { - if !systemd.IsRunningSystemd() { - return fmt.Errorf("systemd not running on this host, can't use systemd as cgroups manager") - } - - if !cgroups.IsCgroup2UnifiedMode() { - return fmt.Errorf("cgroup v2 not enabled on this host, can't use systemd (rootless) as cgroups manager") - } - return systemdCgroupV2(l, true) -} - -func cgroupfs2(l *LinuxFactory, rootless bool) error { - l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { - m, err := fs2.NewManager(config, getUnifiedPath(paths), rootless) - if err != nil { - panic(err) - } - return m - } - return nil -} - -func cgroupfs(l *LinuxFactory, rootless bool) error { - if cgroups.IsCgroup2UnifiedMode() { - return cgroupfs2(l, rootless) - } - l.NewCgroupsManager = func(config *configs.Cgroup, paths map[string]string) cgroups.Manager { - return fs.NewManager(config, paths, rootless) - } - return nil -} - -// Cgroupfs is an options func to configure a LinuxFactory to return containers -// that use the native cgroups filesystem implementation to create and manage -// cgroups. -func Cgroupfs(l *LinuxFactory) error { - return cgroupfs(l, false) -} - -// RootlessCgroupfs is an options func to configure a LinuxFactory to return -// containers that use the native cgroups filesystem implementation to create -// and manage cgroups. The difference between RootlessCgroupfs and Cgroupfs is -// that RootlessCgroupfs can transparently handle permission errors that occur -// during rootless container (including euid=0 in userns) setup (while still allowing cgroup usage if -// they've been set up properly). -func RootlessCgroupfs(l *LinuxFactory) error { - return cgroupfs(l, true) -} - -// IntelRdtfs is an options func to configure a LinuxFactory to return -// containers that use the Intel RDT "resource control" filesystem to -// create and manage Intel RDT resources (e.g., L3 cache, memory bandwidth). -func IntelRdtFs(l *LinuxFactory) error { - if !intelrdt.IsCATEnabled() && !intelrdt.IsMBAEnabled() { - l.NewIntelRdtManager = nil - } else { - l.NewIntelRdtManager = func(config *configs.Config, id string, path string) intelrdt.Manager { - return intelrdt.NewManager(config, id, path) - } - } - return nil -} - -// TmpfsRoot is an option func to mount LinuxFactory.Root to tmpfs. -func TmpfsRoot(l *LinuxFactory) error { - mounted, err := mountinfo.Mounted(l.Root) - if err != nil { - return err - } - if !mounted { - if err := unix.Mount("tmpfs", l.Root, "tmpfs", 0, ""); err != nil { - return err - } - } - return nil -} - -// CriuPath returns an option func to configure a LinuxFactory with the -// provided criupath -func CriuPath(criupath string) func(*LinuxFactory) error { - return func(l *LinuxFactory) error { - l.CriuPath = criupath - return nil - } -} - -// New returns a linux based container factory based in the root directory and -// configures the factory with the provided option funcs. -func New(root string, options ...func(*LinuxFactory) error) (Factory, error) { - if root != "" { - if err := os.MkdirAll(root, 0o700); err != nil { - return nil, newGenericError(err, SystemError) - } - } - l := &LinuxFactory{ - Root: root, - InitPath: "/proc/self/exe", - InitArgs: []string{os.Args[0], "init"}, - Validator: validate.New(), - CriuPath: "criu", - } - - if err := Cgroupfs(l); err != nil { - return nil, err - } - - for _, opt := range options { - if opt == nil { - continue - } - if err := opt(l); err != nil { - return nil, err - } - } - return l, nil -} - -// LinuxFactory implements the default factory interface for linux based systems. -type LinuxFactory struct { - // Root directory for the factory to store state. - Root string - - // InitPath is the path for calling the init responsibilities for spawning - // a container. - InitPath string - - // InitArgs are arguments for calling the init responsibilities for spawning - // a container. - InitArgs []string - - // CriuPath is the path to the criu binary used for checkpoint and restore of - // containers. - CriuPath string - - // New{u,g}idmapPath is the path to the binaries used for mapping with - // rootless containers. - NewuidmapPath string - NewgidmapPath string - - // Validator provides validation to container configurations. - Validator validate.Validator - - // NewCgroupsManager returns an initialized cgroups manager for a single container. - NewCgroupsManager func(config *configs.Cgroup, paths map[string]string) cgroups.Manager - - // NewIntelRdtManager returns an initialized Intel RDT manager for a single container. - NewIntelRdtManager func(config *configs.Config, id string, path string) intelrdt.Manager -} - -func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, error) { - if l.Root == "" { - return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) - } - if err := l.validateID(id); err != nil { - return nil, err - } - if err := l.Validator.Validate(config); err != nil { - return nil, newGenericError(err, ConfigInvalid) - } - containerRoot, err := securejoin.SecureJoin(l.Root, id) - if err != nil { - return nil, err - } - if _, err := os.Stat(containerRoot); err == nil { - return nil, newGenericError(fmt.Errorf("container with id exists: %v", id), IdInUse) - } else if !os.IsNotExist(err) { - return nil, newGenericError(err, SystemError) - } - if err := os.MkdirAll(containerRoot, 0o711); err != nil { - return nil, newGenericError(err, SystemError) - } - if err := os.Chown(containerRoot, unix.Geteuid(), unix.Getegid()); err != nil { - return nil, newGenericError(err, SystemError) - } - c := &linuxContainer{ - id: id, - root: containerRoot, - config: config, - initPath: l.InitPath, - initArgs: l.InitArgs, - criuPath: l.CriuPath, - newuidmapPath: l.NewuidmapPath, - newgidmapPath: l.NewgidmapPath, - cgroupManager: l.NewCgroupsManager(config.Cgroups, nil), - } - if l.NewIntelRdtManager != nil { - c.intelRdtManager = l.NewIntelRdtManager(config, id, "") - } - c.state = &stoppedState{c: c} - return c, nil -} - -func (l *LinuxFactory) Load(id string) (Container, error) { - if l.Root == "" { - return nil, newGenericError(fmt.Errorf("invalid root"), ConfigInvalid) - } - // when load, we need to check id is valid or not. - if err := l.validateID(id); err != nil { - return nil, err - } - containerRoot, err := securejoin.SecureJoin(l.Root, id) - if err != nil { - return nil, err - } - state, err := l.loadState(containerRoot, id) - if err != nil { - return nil, err - } - r := &nonChildProcess{ - processPid: state.InitProcessPid, - processStartTime: state.InitProcessStartTime, - fds: state.ExternalDescriptors, - } - c := &linuxContainer{ - initProcess: r, - initProcessStartTime: state.InitProcessStartTime, - id: id, - config: &state.Config, - initPath: l.InitPath, - initArgs: l.InitArgs, - criuPath: l.CriuPath, - newuidmapPath: l.NewuidmapPath, - newgidmapPath: l.NewgidmapPath, - cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths), - root: containerRoot, - created: state.Created, - } - if l.NewIntelRdtManager != nil { - c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath) - } - c.state = &loadedState{c: c} - if err := c.refreshState(); err != nil { - return nil, err - } - return c, nil -} - -func (l *LinuxFactory) Type() string { - return "libcontainer" -} - -// StartInitialization loads a container by opening the pipe fd from the parent to read the configuration and state -// This is a low level implementation detail of the reexec and should not be consumed externally -func (l *LinuxFactory) StartInitialization() (err error) { - // Get the INITPIPE. - envInitPipe := os.Getenv("_LIBCONTAINER_INITPIPE") - pipefd, err := strconv.Atoi(envInitPipe) - if err != nil { - return fmt.Errorf("unable to convert _LIBCONTAINER_INITPIPE=%s to int: %s", envInitPipe, err) - } - pipe := os.NewFile(uintptr(pipefd), "pipe") - defer pipe.Close() - - // Only init processes have FIFOFD. - fifofd := -1 - envInitType := os.Getenv("_LIBCONTAINER_INITTYPE") - it := initType(envInitType) - if it == initStandard { - envFifoFd := os.Getenv("_LIBCONTAINER_FIFOFD") - if fifofd, err = strconv.Atoi(envFifoFd); err != nil { - return fmt.Errorf("unable to convert _LIBCONTAINER_FIFOFD=%s to int: %s", envFifoFd, err) - } - } - - var consoleSocket *os.File - if envConsole := os.Getenv("_LIBCONTAINER_CONSOLE"); envConsole != "" { - console, err := strconv.Atoi(envConsole) - if err != nil { - return fmt.Errorf("unable to convert _LIBCONTAINER_CONSOLE=%s to int: %s", envConsole, err) - } - consoleSocket = os.NewFile(uintptr(console), "console-socket") - defer consoleSocket.Close() - } - - logPipeFdStr := os.Getenv("_LIBCONTAINER_LOGPIPE") - logPipeFd, err := strconv.Atoi(logPipeFdStr) - if err != nil { - return fmt.Errorf("unable to convert _LIBCONTAINER_LOGPIPE=%s to int: %s", logPipeFdStr, err) - } - - // clear the current process's environment to clean any libcontainer - // specific env vars. - os.Clearenv() - - defer func() { - // We have an error during the initialization of the container's init, - // send it back to the parent process in the form of an initError. - if werr := utils.WriteJSON(pipe, syncT{procError}); werr != nil { - fmt.Fprintln(os.Stderr, err) - return - } - if werr := utils.WriteJSON(pipe, newSystemError(err)); werr != nil { - fmt.Fprintln(os.Stderr, err) - return - } - }() - defer func() { - if e := recover(); e != nil { - err = fmt.Errorf("panic from initialization: %v, %v", e, string(debug.Stack())) - } - }() - - i, err := newContainerInit(it, pipe, consoleSocket, fifofd, logPipeFd) - if err != nil { - return err - } - - // If Init succeeds, syscall.Exec will not return, hence none of the defers will be called. - return i.Init() -} - -func (l *LinuxFactory) loadState(root, id string) (*State, error) { - stateFilePath, err := securejoin.SecureJoin(root, stateFilename) - if err != nil { - return nil, err - } - f, err := os.Open(stateFilePath) - if err != nil { - if os.IsNotExist(err) { - return nil, newGenericError(fmt.Errorf("container %q does not exist", id), ContainerNotExists) - } - return nil, newGenericError(err, SystemError) - } - defer f.Close() - var state *State - if err := json.NewDecoder(f).Decode(&state); err != nil { - return nil, newGenericError(err, SystemError) - } - return state, nil -} - -func (l *LinuxFactory) validateID(id string) error { - if !idRegex.MatchString(id) || string(os.PathSeparator)+id != utils.CleanPath(string(os.PathSeparator)+id) { - return newGenericError(fmt.Errorf("invalid id format: %v", id), InvalidIdFormat) - } - - return nil -} - -// NewuidmapPath returns an option func to configure a LinuxFactory with the -// provided .. -func NewuidmapPath(newuidmapPath string) func(*LinuxFactory) error { - return func(l *LinuxFactory) error { - l.NewuidmapPath = newuidmapPath - return nil - } -} - -// NewgidmapPath returns an option func to configure a LinuxFactory with the -// provided .. -func NewgidmapPath(newgidmapPath string) func(*LinuxFactory) error { - return func(l *LinuxFactory) error { - l.NewgidmapPath = newgidmapPath - return nil - } -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/generic_error.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/generic_error.go deleted file mode 100644 index d185ebd898..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/generic_error.go +++ /dev/null @@ -1,92 +0,0 @@ -package libcontainer - -import ( - "fmt" - "io" - "text/template" - "time" - - "github.com/opencontainers/runc/libcontainer/stacktrace" -) - -var errorTemplate = template.Must(template.New("error").Parse(`Timestamp: {{.Timestamp}} -Code: {{.ECode}} -{{if .Message }} -Message: {{.Message}} -{{end}} -Frames:{{range $i, $frame := .Stack.Frames}} ---- -{{$i}}: {{$frame.Function}} -Package: {{$frame.Package}} -File: {{$frame.File}}@{{$frame.Line}}{{end}} -`)) - -func newGenericError(err error, c ErrorCode) Error { - if le, ok := err.(Error); ok { - return le - } - gerr := &genericError{ - Timestamp: time.Now(), - Err: err, - ECode: c, - Stack: stacktrace.Capture(1), - } - if err != nil { - gerr.Message = err.Error() - } - return gerr -} - -func newSystemError(err error) Error { - return createSystemError(err, "") -} - -func newSystemErrorWithCausef(err error, cause string, v ...interface{}) Error { - return createSystemError(err, fmt.Sprintf(cause, v...)) -} - -func newSystemErrorWithCause(err error, cause string) Error { - return createSystemError(err, cause) -} - -// createSystemError creates the specified error with the correct number of -// stack frames skipped. This is only to be called by the other functions for -// formatting the error. -func createSystemError(err error, cause string) Error { - gerr := &genericError{ - Timestamp: time.Now(), - Err: err, - ECode: SystemError, - Cause: cause, - Stack: stacktrace.Capture(2), - } - if err != nil { - gerr.Message = err.Error() - } - return gerr -} - -type genericError struct { - Timestamp time.Time - ECode ErrorCode - Err error `json:"-"` - Cause string - Message string - Stack stacktrace.Stacktrace -} - -func (e *genericError) Error() string { - if e.Cause == "" { - return e.Message - } - frame := e.Stack.Frames[0] - return fmt.Sprintf("%s:%d: %s caused: %s", frame.File, frame.Line, e.Cause, e.Message) -} - -func (e *genericError) Code() ErrorCode { - return e.ECode -} - -func (e *genericError) Detail(w io.Writer) error { - return errorTemplate.Execute(w, e) -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go deleted file mode 100644 index c456cbe7a1..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/init_linux.go +++ /dev/null @@ -1,575 +0,0 @@ -// +build linux - -package libcontainer - -import ( - "bytes" - "encoding/json" - "fmt" - "io" - "io/ioutil" - "net" - "os" - "strings" - "unsafe" - - "github.com/containerd/console" - "github.com/opencontainers/runc/libcontainer/capabilities" - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/system" - "github.com/opencontainers/runc/libcontainer/user" - "github.com/opencontainers/runc/libcontainer/utils" - "github.com/opencontainers/runtime-spec/specs-go" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "github.com/vishvananda/netlink" - "golang.org/x/sys/unix" -) - -type initType string - -const ( - initSetns initType = "setns" - initStandard initType = "standard" -) - -type pid struct { - Pid int `json:"stage2_pid"` - PidFirstChild int `json:"stage1_pid"` -} - -// network is an internal struct used to setup container networks. -type network struct { - configs.Network - - // TempVethPeerName is a unique temporary veth peer name that was placed into - // the container's namespace. - TempVethPeerName string `json:"temp_veth_peer_name"` -} - -// initConfig is used for transferring parameters from Exec() to Init() -type initConfig struct { - Args []string `json:"args"` - Env []string `json:"env"` - Cwd string `json:"cwd"` - Capabilities *configs.Capabilities `json:"capabilities"` - ProcessLabel string `json:"process_label"` - AppArmorProfile string `json:"apparmor_profile"` - NoNewPrivileges bool `json:"no_new_privileges"` - User string `json:"user"` - AdditionalGroups []string `json:"additional_groups"` - Config *configs.Config `json:"config"` - Networks []*network `json:"network"` - PassedFilesCount int `json:"passed_files_count"` - ContainerId string `json:"containerid"` - Rlimits []configs.Rlimit `json:"rlimits"` - CreateConsole bool `json:"create_console"` - ConsoleWidth uint16 `json:"console_width"` - ConsoleHeight uint16 `json:"console_height"` - RootlessEUID bool `json:"rootless_euid,omitempty"` - RootlessCgroups bool `json:"rootless_cgroups,omitempty"` - SpecState *specs.State `json:"spec_state,omitempty"` - Cgroup2Path string `json:"cgroup2_path,omitempty"` -} - -type initer interface { - Init() error -} - -func newContainerInit(t initType, pipe *os.File, consoleSocket *os.File, fifoFd, logFd int) (initer, error) { - var config *initConfig - if err := json.NewDecoder(pipe).Decode(&config); err != nil { - return nil, err - } - if err := populateProcessEnvironment(config.Env); err != nil { - return nil, err - } - switch t { - case initSetns: - return &linuxSetnsInit{ - pipe: pipe, - consoleSocket: consoleSocket, - config: config, - logFd: logFd, - }, nil - case initStandard: - return &linuxStandardInit{ - pipe: pipe, - consoleSocket: consoleSocket, - parentPid: unix.Getppid(), - config: config, - fifoFd: fifoFd, - logFd: logFd, - }, nil - } - return nil, fmt.Errorf("unknown init type %q", t) -} - -// populateProcessEnvironment loads the provided environment variables into the -// current processes's environment. -func populateProcessEnvironment(env []string) error { - for _, pair := range env { - p := strings.SplitN(pair, "=", 2) - if len(p) < 2 { - return fmt.Errorf("invalid environment variable: %q", pair) - } - name, val := p[0], p[1] - if name == "" { - return fmt.Errorf("environment variable name can't be empty: %q", pair) - } - if strings.IndexByte(name, 0) >= 0 { - return fmt.Errorf("environment variable name can't contain null(\\x00): %q", pair) - } - if strings.IndexByte(val, 0) >= 0 { - return fmt.Errorf("environment variable value can't contain null(\\x00): %q", pair) - } - if err := os.Setenv(name, val); err != nil { - return err - } - } - return nil -} - -// finalizeNamespace drops the caps, sets the correct user -// and working dir, and closes any leaked file descriptors -// before executing the command inside the namespace -func finalizeNamespace(config *initConfig) error { - // Ensure that all unwanted fds we may have accidentally - // inherited are marked close-on-exec so they stay out of the - // container - if err := utils.CloseExecFrom(config.PassedFilesCount + 3); err != nil { - return errors.Wrap(err, "close exec fds") - } - - // we only do chdir if it's specified - doChdir := config.Cwd != "" - if doChdir { - // First, attempt the chdir before setting up the user. - // This could allow us to access a directory that the user running runc can access - // but the container user cannot. - err := unix.Chdir(config.Cwd) - switch { - case err == nil: - doChdir = false - case os.IsPermission(err): - // If we hit an EPERM, we should attempt again after setting up user. - // This will allow us to successfully chdir if the container user has access - // to the directory, but the user running runc does not. - // This is useful in cases where the cwd is also a volume that's been chowned to the container user. - default: - return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err) - } - } - - caps := &configs.Capabilities{} - if config.Capabilities != nil { - caps = config.Capabilities - } else if config.Config.Capabilities != nil { - caps = config.Config.Capabilities - } - w, err := capabilities.New(caps) - if err != nil { - return err - } - // drop capabilities in bounding set before changing user - if err := w.ApplyBoundingSet(); err != nil { - return errors.Wrap(err, "apply bounding set") - } - // preserve existing capabilities while we change users - if err := system.SetKeepCaps(); err != nil { - return errors.Wrap(err, "set keep caps") - } - if err := setupUser(config); err != nil { - return errors.Wrap(err, "setup user") - } - // Change working directory AFTER the user has been set up, if we haven't done it yet. - if doChdir { - if err := unix.Chdir(config.Cwd); err != nil { - return fmt.Errorf("chdir to cwd (%q) set in config.json failed: %v", config.Cwd, err) - } - } - if err := system.ClearKeepCaps(); err != nil { - return errors.Wrap(err, "clear keep caps") - } - if err := w.ApplyCaps(); err != nil { - return errors.Wrap(err, "apply caps") - } - return nil -} - -// setupConsole sets up the console from inside the container, and sends the -// master pty fd to the config.Pipe (using cmsg). This is done to ensure that -// consoles are scoped to a container properly (see runc#814 and the many -// issues related to that). This has to be run *after* we've pivoted to the new -// rootfs (and the users' configuration is entirely set up). -func setupConsole(socket *os.File, config *initConfig, mount bool) error { - defer socket.Close() - // At this point, /dev/ptmx points to something that we would expect. We - // used to change the owner of the slave path, but since the /dev/pts mount - // can have gid=X set (at the users' option). So touching the owner of the - // slave PTY is not necessary, as the kernel will handle that for us. Note - // however, that setupUser (specifically fixStdioPermissions) *will* change - // the UID owner of the console to be the user the process will run as (so - // they can actually control their console). - - pty, slavePath, err := console.NewPty() - if err != nil { - return err - } - - // After we return from here, we don't need the console anymore. - defer pty.Close() - - if config.ConsoleHeight != 0 && config.ConsoleWidth != 0 { - err = pty.Resize(console.WinSize{ - Height: config.ConsoleHeight, - Width: config.ConsoleWidth, - }) - - if err != nil { - return err - } - } - - // Mount the console inside our rootfs. - if mount { - if err := mountConsole(slavePath); err != nil { - return err - } - } - // While we can access console.master, using the API is a good idea. - if err := utils.SendFd(socket, pty.Name(), pty.Fd()); err != nil { - return err - } - // Now, dup over all the things. - return dupStdio(slavePath) -} - -// syncParentReady sends to the given pipe a JSON payload which indicates that -// the init is ready to Exec the child process. It then waits for the parent to -// indicate that it is cleared to Exec. -func syncParentReady(pipe io.ReadWriter) error { - // Tell parent. - if err := writeSync(pipe, procReady); err != nil { - return err - } - - // Wait for parent to give the all-clear. - return readSync(pipe, procRun) -} - -// syncParentHooks sends to the given pipe a JSON payload which indicates that -// the parent should execute pre-start hooks. It then waits for the parent to -// indicate that it is cleared to resume. -func syncParentHooks(pipe io.ReadWriter) error { - // Tell parent. - if err := writeSync(pipe, procHooks); err != nil { - return err - } - - // Wait for parent to give the all-clear. - return readSync(pipe, procResume) -} - -// setupUser changes the groups, gid, and uid for the user inside the container -func setupUser(config *initConfig) error { - // Set up defaults. - defaultExecUser := user.ExecUser{ - Uid: 0, - Gid: 0, - Home: "/", - } - - passwdPath, err := user.GetPasswdPath() - if err != nil { - return err - } - - groupPath, err := user.GetGroupPath() - if err != nil { - return err - } - - execUser, err := user.GetExecUserPath(config.User, &defaultExecUser, passwdPath, groupPath) - if err != nil { - return err - } - - var addGroups []int - if len(config.AdditionalGroups) > 0 { - addGroups, err = user.GetAdditionalGroupsPath(config.AdditionalGroups, groupPath) - if err != nil { - return err - } - } - - // Rather than just erroring out later in setuid(2) and setgid(2), check - // that the user is mapped here. - if _, err := config.Config.HostUID(execUser.Uid); err != nil { - return errors.New("cannot set uid to unmapped user in user namespace") - } - if _, err := config.Config.HostGID(execUser.Gid); err != nil { - return errors.New("cannot set gid to unmapped user in user namespace") - } - - if config.RootlessEUID { - // We cannot set any additional groups in a rootless container and thus - // we bail if the user asked us to do so. TODO: We currently can't do - // this check earlier, but if libcontainer.Process.User was typesafe - // this might work. - if len(addGroups) > 0 { - return errors.New("cannot set any additional groups in a rootless container") - } - } - - // Before we change to the container's user make sure that the processes - // STDIO is correctly owned by the user that we are switching to. - if err := fixStdioPermissions(config, execUser); err != nil { - return err - } - - setgroups, err := ioutil.ReadFile("/proc/self/setgroups") - if err != nil && !os.IsNotExist(err) { - return err - } - - // This isn't allowed in an unprivileged user namespace since Linux 3.19. - // There's nothing we can do about /etc/group entries, so we silently - // ignore setting groups here (since the user didn't explicitly ask us to - // set the group). - allowSupGroups := !config.RootlessEUID && string(bytes.TrimSpace(setgroups)) != "deny" - - if allowSupGroups { - suppGroups := append(execUser.Sgids, addGroups...) - if err := unix.Setgroups(suppGroups); err != nil { - return err - } - } - - if err := system.Setgid(execUser.Gid); err != nil { - return err - } - if err := system.Setuid(execUser.Uid); err != nil { - return err - } - - // if we didn't get HOME already, set it based on the user's HOME - if envHome := os.Getenv("HOME"); envHome == "" { - if err := os.Setenv("HOME", execUser.Home); err != nil { - return err - } - } - return nil -} - -// fixStdioPermissions fixes the permissions of PID 1's STDIO within the container to the specified user. -// The ownership needs to match because it is created outside of the container and needs to be -// localized. -func fixStdioPermissions(config *initConfig, u *user.ExecUser) error { - var null unix.Stat_t - if err := unix.Stat("/dev/null", &null); err != nil { - return err - } - for _, fd := range []uintptr{ - os.Stdin.Fd(), - os.Stderr.Fd(), - os.Stdout.Fd(), - } { - var s unix.Stat_t - if err := unix.Fstat(int(fd), &s); err != nil { - return err - } - - // Skip chown of /dev/null if it was used as one of the STDIO fds. - if s.Rdev == null.Rdev { - continue - } - - // We only change the uid owner (as it is possible for the mount to - // prefer a different gid, and there's no reason for us to change it). - // The reason why we don't just leave the default uid=X mount setup is - // that users expect to be able to actually use their console. Without - // this code, you couldn't effectively run as a non-root user inside a - // container and also have a console set up. - if err := unix.Fchown(int(fd), u.Uid, int(s.Gid)); err != nil { - // If we've hit an EINVAL then s.Gid isn't mapped in the user - // namespace. If we've hit an EPERM then the inode's current owner - // is not mapped in our user namespace (in particular, - // privileged_wrt_inode_uidgid() has failed). In either case, we - // are in a configuration where it's better for us to just not - // touch the stdio rather than bail at this point. - if err == unix.EINVAL || err == unix.EPERM { - continue - } - return err - } - } - return nil -} - -// setupNetwork sets up and initializes any network interface inside the container. -func setupNetwork(config *initConfig) error { - for _, config := range config.Networks { - strategy, err := getStrategy(config.Type) - if err != nil { - return err - } - if err := strategy.initialize(config); err != nil { - return err - } - } - return nil -} - -func setupRoute(config *configs.Config) error { - for _, config := range config.Routes { - _, dst, err := net.ParseCIDR(config.Destination) - if err != nil { - return err - } - src := net.ParseIP(config.Source) - if src == nil { - return fmt.Errorf("Invalid source for route: %s", config.Source) - } - gw := net.ParseIP(config.Gateway) - if gw == nil { - return fmt.Errorf("Invalid gateway for route: %s", config.Gateway) - } - l, err := netlink.LinkByName(config.InterfaceName) - if err != nil { - return err - } - route := &netlink.Route{ - Scope: netlink.SCOPE_UNIVERSE, - Dst: dst, - Src: src, - Gw: gw, - LinkIndex: l.Attrs().Index, - } - if err := netlink.RouteAdd(route); err != nil { - return err - } - } - return nil -} - -func setupRlimits(limits []configs.Rlimit, pid int) error { - for _, rlimit := range limits { - if err := system.Prlimit(pid, rlimit.Type, unix.Rlimit{Max: rlimit.Hard, Cur: rlimit.Soft}); err != nil { - return fmt.Errorf("error setting rlimit type %v: %v", rlimit.Type, err) - } - } - return nil -} - -const _P_PID = 1 - -//nolint:structcheck,unused -type siginfo struct { - si_signo int32 - si_errno int32 - si_code int32 - // below here is a union; si_pid is the only field we use - si_pid int32 - // Pad to 128 bytes as detailed in blockUntilWaitable - pad [96]byte -} - -// isWaitable returns true if the process has exited false otherwise. -// Its based off blockUntilWaitable in src/os/wait_waitid.go -func isWaitable(pid int) (bool, error) { - si := &siginfo{} - _, _, e := unix.Syscall6(unix.SYS_WAITID, _P_PID, uintptr(pid), uintptr(unsafe.Pointer(si)), unix.WEXITED|unix.WNOWAIT|unix.WNOHANG, 0, 0) - if e != 0 { - return false, os.NewSyscallError("waitid", e) - } - - return si.si_pid != 0, nil -} - -// isNoChildren returns true if err represents a unix.ECHILD (formerly syscall.ECHILD) false otherwise -func isNoChildren(err error) bool { - switch err := err.(type) { - case unix.Errno: - if err == unix.ECHILD { - return true - } - case *os.SyscallError: - if err.Err == unix.ECHILD { - return true - } - } - return false -} - -// signalAllProcesses freezes then iterates over all the processes inside the -// manager's cgroups sending the signal s to them. -// If s is SIGKILL then it will wait for each process to exit. -// For all other signals it will check if the process is ready to report its -// exit status and only if it is will a wait be performed. -func signalAllProcesses(m cgroups.Manager, s os.Signal) error { - var procs []*os.Process - if err := m.Freeze(configs.Frozen); err != nil { - logrus.Warn(err) - } - pids, err := m.GetAllPids() - if err != nil { - if err := m.Freeze(configs.Thawed); err != nil { - logrus.Warn(err) - } - return err - } - for _, pid := range pids { - p, err := os.FindProcess(pid) - if err != nil { - logrus.Warn(err) - continue - } - procs = append(procs, p) - if err := p.Signal(s); err != nil { - logrus.Warn(err) - } - } - if err := m.Freeze(configs.Thawed); err != nil { - logrus.Warn(err) - } - - subreaper, err := system.GetSubreaper() - if err != nil { - // The error here means that PR_GET_CHILD_SUBREAPER is not - // supported because this code might run on a kernel older - // than 3.4. We don't want to throw an error in that case, - // and we simplify things, considering there is no subreaper - // set. - subreaper = 0 - } - - for _, p := range procs { - if s != unix.SIGKILL { - if ok, err := isWaitable(p.Pid); err != nil { - if !isNoChildren(err) { - logrus.Warn("signalAllProcesses: ", p.Pid, err) - } - continue - } else if !ok { - // Not ready to report so don't wait - continue - } - } - - // In case a subreaper has been setup, this code must not - // wait for the process. Otherwise, we cannot be sure the - // current process will be reaped by the subreaper, while - // the subreaper might be waiting for this process in order - // to retrieve its exit code. - if subreaper == 0 { - if _, err := p.Wait(); err != nil { - if !isNoChildren(err) { - logrus.Warn("wait: ", err) - } - } - } - } - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/cmt.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/cmt.go deleted file mode 100644 index 6480a13069..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/cmt.go +++ /dev/null @@ -1,23 +0,0 @@ -package intelrdt - -var cmtEnabled bool - -// Check if Intel RDT/CMT is enabled. -func IsCMTEnabled() bool { - featuresInit() - return cmtEnabled -} - -func getCMTNumaNodeStats(numaPath string) (*CMTNumaNodeStats, error) { - stats := &CMTNumaNodeStats{} - - if enabledMonFeatures.llcOccupancy { - llcOccupancy, err := getIntelRdtParamUint(numaPath, "llc_occupancy") - if err != nil { - return nil, err - } - stats.LLCOccupancy = llcOccupancy - } - - return stats, nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go deleted file mode 100644 index 3fa11b8002..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/intelrdt.go +++ /dev/null @@ -1,816 +0,0 @@ -// +build linux - -package intelrdt - -import ( - "bufio" - "bytes" - "fmt" - "io" - "io/ioutil" - "os" - "path/filepath" - "strconv" - "strings" - "sync" - - "github.com/moby/sys/mountinfo" - "github.com/opencontainers/runc/libcontainer/configs" -) - -/* - * About Intel RDT features: - * Intel platforms with new Xeon CPU support Resource Director Technology (RDT). - * Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are - * two sub-features of RDT. - * - * Cache Allocation Technology (CAT) provides a way for the software to restrict - * cache allocation to a defined 'subset' of L3 cache which may be overlapping - * with other 'subsets'. The different subsets are identified by class of - * service (CLOS) and each CLOS has a capacity bitmask (CBM). - * - * Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle - * over memory bandwidth for the software. A user controls the resource by - * indicating the percentage of maximum memory bandwidth or memory bandwidth - * limit in MBps unit if MBA Software Controller is enabled. - * - * More details about Intel RDT CAT and MBA can be found in the section 17.18 - * of Intel Software Developer Manual: - * https://software.intel.com/en-us/articles/intel-sdm - * - * About Intel RDT kernel interface: - * In Linux 4.10 kernel or newer, the interface is defined and exposed via - * "resource control" filesystem, which is a "cgroup-like" interface. - * - * Comparing with cgroups, it has similar process management lifecycle and - * interfaces in a container. But unlike cgroups' hierarchy, it has single level - * filesystem layout. - * - * CAT and MBA features are introduced in Linux 4.10 and 4.12 kernel via - * "resource control" filesystem. - * - * Intel RDT "resource control" filesystem hierarchy: - * mount -t resctrl resctrl /sys/fs/resctrl - * tree /sys/fs/resctrl - * /sys/fs/resctrl/ - * |-- info - * | |-- L3 - * | | |-- cbm_mask - * | | |-- min_cbm_bits - * | | |-- num_closids - * | |-- L3_MON - * | | |-- max_threshold_occupancy - * | | |-- mon_features - * | | |-- num_rmids - * | |-- MB - * | |-- bandwidth_gran - * | |-- delay_linear - * | |-- min_bandwidth - * | |-- num_closids - * |-- ... - * |-- schemata - * |-- tasks - * |-- - * |-- ... - * |-- schemata - * |-- tasks - * - * For runc, we can make use of `tasks` and `schemata` configuration for L3 - * cache and memory bandwidth resources constraints. - * - * The file `tasks` has a list of tasks that belongs to this group (e.g., - * " group). Tasks can be added to a group by writing the task ID - * to the "tasks" file (which will automatically remove them from the previous - * group to which they belonged). New tasks created by fork(2) and clone(2) are - * added to the same group as their parent. - * - * The file `schemata` has a list of all the resources available to this group. - * Each resource (L3 cache, memory bandwidth) has its own line and format. - * - * L3 cache schema: - * It has allocation bitmasks/values for L3 cache on each socket, which - * contains L3 cache id and capacity bitmask (CBM). - * Format: "L3:=;=;..." - * For example, on a two-socket machine, the schema line could be "L3:0=ff;1=c0" - * which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM is 0xc0. - * - * The valid L3 cache CBM is a *contiguous bits set* and number of bits that can - * be set is less than the max bit. The max bits in the CBM is varied among - * supported Intel CPU models. Kernel will check if it is valid when writing. - * e.g., default value 0xfffff in root indicates the max bits of CBM is 20 - * bits, which mapping to entire L3 cache capacity. Some valid CBM values to - * set in a group: 0xf, 0xf0, 0x3ff, 0x1f00 and etc. - * - * Memory bandwidth schema: - * It has allocation values for memory bandwidth on each socket, which contains - * L3 cache id and memory bandwidth. - * Format: "MB:=bandwidth0;=bandwidth1;..." - * For example, on a two-socket machine, the schema line could be "MB:0=20;1=70" - * - * The minimum bandwidth percentage value for each CPU model is predefined and - * can be looked up through "info/MB/min_bandwidth". The bandwidth granularity - * that is allocated is also dependent on the CPU model and can be looked up at - * "info/MB/bandwidth_gran". The available bandwidth control steps are: - * min_bw + N * bw_gran. Intermediate values are rounded to the next control - * step available on the hardware. - * - * If MBA Software Controller is enabled through mount option "-o mba_MBps": - * mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl - * We could specify memory bandwidth in "MBps" (Mega Bytes per second) unit - * instead of "percentages". The kernel underneath would use a software feedback - * mechanism or a "Software Controller" which reads the actual bandwidth using - * MBM counters and adjust the memory bandwidth percentages to ensure: - * "actual memory bandwidth < user specified memory bandwidth". - * - * For example, on a two-socket machine, the schema line could be - * "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on socket 0 - * and 7000 MBps memory bandwidth limit on socket 1. - * - * For more information about Intel RDT kernel interface: - * https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt - * - * An example for runc: - * Consider a two-socket machine with two L3 caches where the default CBM is - * 0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10% - * with a memory bandwidth granularity of 10%. - * - * Tasks inside the container only have access to the "upper" 7/11 of L3 cache - * on socket 0 and the "lower" 5/11 L3 cache on socket 1, and may use a - * maximum memory bandwidth of 20% on socket 0 and 70% on socket 1. - * - * "linux": { - * "intelRdt": { - * "l3CacheSchema": "L3:0=7f0;1=1f", - * "memBwSchema": "MB:0=20;1=70" - * } - * } - */ - -type Manager interface { - // Applies Intel RDT configuration to the process with the specified pid - Apply(pid int) error - - // Returns statistics for Intel RDT - GetStats() (*Stats, error) - - // Destroys the Intel RDT 'container_id' group - Destroy() error - - // Returns Intel RDT path to save in a state file and to be able to - // restore the object later - GetPath() string - - // Set Intel RDT "resource control" filesystem as configured. - Set(container *configs.Config) error -} - -// This implements interface Manager -type intelRdtManager struct { - mu sync.Mutex - config *configs.Config - id string - path string -} - -func NewManager(config *configs.Config, id string, path string) Manager { - return &intelRdtManager{ - config: config, - id: id, - path: path, - } -} - -const ( - IntelRdtTasks = "tasks" -) - -var ( - // The absolute root path of the Intel RDT "resource control" filesystem - intelRdtRoot string - intelRdtRootLock sync.Mutex - - // The flag to indicate if Intel RDT/CAT is enabled - catEnabled bool - // The flag to indicate if Intel RDT/MBA is enabled - mbaEnabled bool - // The flag to indicate if Intel RDT/MBA Software Controller is enabled - mbaScEnabled bool - - // For Intel RDT initialization - initOnce sync.Once -) - -type intelRdtData struct { - root string - config *configs.Config - pid int -} - -// Check if Intel RDT sub-features are enabled in featuresInit() -func featuresInit() { - initOnce.Do(func() { - // 1. Check if hardware and kernel support Intel RDT sub-features - flagsSet, err := parseCpuInfoFile("/proc/cpuinfo") - if err != nil { - return - } - - // 2. Check if Intel RDT "resource control" filesystem is mounted - // The user guarantees to mount the filesystem - if !isIntelRdtMounted() { - return - } - - // 3. Double check if Intel RDT sub-features are available in - // "resource control" filesystem. Intel RDT sub-features can be - // selectively disabled or enabled by kernel command line - // (e.g., rdt=!l3cat,mba) in 4.14 and newer kernel - if flagsSet.CAT { - if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "L3")); err == nil { - catEnabled = true - } - } - if mbaScEnabled { - // We confirm MBA Software Controller is enabled in step 2, - // MBA should be enabled because MBA Software Controller - // depends on MBA - mbaEnabled = true - } else if flagsSet.MBA { - if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "MB")); err == nil { - mbaEnabled = true - } - } - if flagsSet.MBMTotal || flagsSet.MBMLocal || flagsSet.CMT { - if _, err := os.Stat(filepath.Join(intelRdtRoot, "info", "L3_MON")); err != nil { - return - } - enabledMonFeatures, err = getMonFeatures(intelRdtRoot) - if err != nil { - return - } - if enabledMonFeatures.mbmTotalBytes || enabledMonFeatures.mbmLocalBytes { - mbmEnabled = true - } - if enabledMonFeatures.llcOccupancy { - cmtEnabled = true - } - } - }) -} - -// Return the mount point path of Intel RDT "resource control" filesysem -func findIntelRdtMountpointDir(f io.Reader) (string, error) { - mi, err := mountinfo.GetMountsFromReader(f, func(m *mountinfo.Info) (bool, bool) { - // similar to mountinfo.FSTypeFilter but stops after the first match - if m.FSType == "resctrl" { - return false, true // don't skip, stop - } - return true, false // skip, keep going - }) - if err != nil { - return "", err - } - if len(mi) < 1 { - return "", NewNotFoundError("Intel RDT") - } - - // Check if MBA Software Controller is enabled through mount option "-o mba_MBps" - if strings.Contains(","+mi[0].VFSOptions+",", ",mba_MBps,") { - mbaScEnabled = true - } - - return mi[0].Mountpoint, nil -} - -// Gets the root path of Intel RDT "resource control" filesystem -func getIntelRdtRoot() (string, error) { - intelRdtRootLock.Lock() - defer intelRdtRootLock.Unlock() - - if intelRdtRoot != "" { - return intelRdtRoot, nil - } - - f, err := os.Open("/proc/self/mountinfo") - if err != nil { - return "", err - } - root, err := findIntelRdtMountpointDir(f) - f.Close() - if err != nil { - return "", err - } - - if _, err := os.Stat(root); err != nil { - return "", err - } - - intelRdtRoot = root - return intelRdtRoot, nil -} - -func isIntelRdtMounted() bool { - _, err := getIntelRdtRoot() - return err == nil -} - -type cpuInfoFlags struct { - CAT bool // Cache Allocation Technology - MBA bool // Memory Bandwidth Allocation - - // Memory Bandwidth Monitoring related. - MBMTotal bool - MBMLocal bool - - CMT bool // Cache Monitoring Technology -} - -func parseCpuInfoFile(path string) (cpuInfoFlags, error) { - infoFlags := cpuInfoFlags{} - - f, err := os.Open(path) - if err != nil { - return infoFlags, err - } - defer f.Close() - - s := bufio.NewScanner(f) - for s.Scan() { - line := s.Text() - - // Search "cat_l3" and "mba" flags in first "flags" line - if strings.HasPrefix(line, "flags") { - flags := strings.Split(line, " ") - // "cat_l3" flag for CAT and "mba" flag for MBA - for _, flag := range flags { - switch flag { - case "cat_l3": - infoFlags.CAT = true - case "mba": - infoFlags.MBA = true - case "cqm_mbm_total": - infoFlags.MBMTotal = true - case "cqm_mbm_local": - infoFlags.MBMLocal = true - case "cqm_occup_llc": - infoFlags.CMT = true - } - } - return infoFlags, nil - } - } - if err := s.Err(); err != nil { - return infoFlags, err - } - - return infoFlags, nil -} - -func parseUint(s string, base, bitSize int) (uint64, error) { - value, err := strconv.ParseUint(s, base, bitSize) - if err != nil { - intValue, intErr := strconv.ParseInt(s, base, bitSize) - // 1. Handle negative values greater than MinInt64 (and) - // 2. Handle negative values lesser than MinInt64 - if intErr == nil && intValue < 0 { - return 0, nil - } else if intErr != nil && intErr.(*strconv.NumError).Err == strconv.ErrRange && intValue < 0 { - return 0, nil - } - - return value, err - } - - return value, nil -} - -// Gets a single uint64 value from the specified file. -func getIntelRdtParamUint(path, file string) (uint64, error) { - fileName := filepath.Join(path, file) - contents, err := ioutil.ReadFile(fileName) - if err != nil { - return 0, err - } - - res, err := parseUint(string(bytes.TrimSpace(contents)), 10, 64) - if err != nil { - return res, fmt.Errorf("unable to parse %q as a uint from file %q", string(contents), fileName) - } - return res, nil -} - -// Gets a string value from the specified file -func getIntelRdtParamString(path, file string) (string, error) { - contents, err := ioutil.ReadFile(filepath.Join(path, file)) - if err != nil { - return "", err - } - - return string(bytes.TrimSpace(contents)), nil -} - -func writeFile(dir, file, data string) error { - if dir == "" { - return fmt.Errorf("no such directory for %s", file) - } - if err := ioutil.WriteFile(filepath.Join(dir, file), []byte(data+"\n"), 0o600); err != nil { - return fmt.Errorf("failed to write %v to %v: %v", data, file, err) - } - return nil -} - -func getIntelRdtData(c *configs.Config, pid int) (*intelRdtData, error) { - rootPath, err := getIntelRdtRoot() - if err != nil { - return nil, err - } - return &intelRdtData{ - root: rootPath, - config: c, - pid: pid, - }, nil -} - -// Get the read-only L3 cache information -func getL3CacheInfo() (*L3CacheInfo, error) { - l3CacheInfo := &L3CacheInfo{} - - rootPath, err := getIntelRdtRoot() - if err != nil { - return l3CacheInfo, err - } - - path := filepath.Join(rootPath, "info", "L3") - cbmMask, err := getIntelRdtParamString(path, "cbm_mask") - if err != nil { - return l3CacheInfo, err - } - minCbmBits, err := getIntelRdtParamUint(path, "min_cbm_bits") - if err != nil { - return l3CacheInfo, err - } - numClosids, err := getIntelRdtParamUint(path, "num_closids") - if err != nil { - return l3CacheInfo, err - } - - l3CacheInfo.CbmMask = cbmMask - l3CacheInfo.MinCbmBits = minCbmBits - l3CacheInfo.NumClosids = numClosids - - return l3CacheInfo, nil -} - -// Get the read-only memory bandwidth information -func getMemBwInfo() (*MemBwInfo, error) { - memBwInfo := &MemBwInfo{} - - rootPath, err := getIntelRdtRoot() - if err != nil { - return memBwInfo, err - } - - path := filepath.Join(rootPath, "info", "MB") - bandwidthGran, err := getIntelRdtParamUint(path, "bandwidth_gran") - if err != nil { - return memBwInfo, err - } - delayLinear, err := getIntelRdtParamUint(path, "delay_linear") - if err != nil { - return memBwInfo, err - } - minBandwidth, err := getIntelRdtParamUint(path, "min_bandwidth") - if err != nil { - return memBwInfo, err - } - numClosids, err := getIntelRdtParamUint(path, "num_closids") - if err != nil { - return memBwInfo, err - } - - memBwInfo.BandwidthGran = bandwidthGran - memBwInfo.DelayLinear = delayLinear - memBwInfo.MinBandwidth = minBandwidth - memBwInfo.NumClosids = numClosids - - return memBwInfo, nil -} - -// Get diagnostics for last filesystem operation error from file info/last_cmd_status -func getLastCmdStatus() (string, error) { - rootPath, err := getIntelRdtRoot() - if err != nil { - return "", err - } - - path := filepath.Join(rootPath, "info") - lastCmdStatus, err := getIntelRdtParamString(path, "last_cmd_status") - if err != nil { - return "", err - } - - return lastCmdStatus, nil -} - -// WriteIntelRdtTasks writes the specified pid into the "tasks" file -func WriteIntelRdtTasks(dir string, pid int) error { - if dir == "" { - return fmt.Errorf("no such directory for %s", IntelRdtTasks) - } - - // Don't attach any pid if -1 is specified as a pid - if pid != -1 { - if err := ioutil.WriteFile(filepath.Join(dir, IntelRdtTasks), []byte(strconv.Itoa(pid)), 0o600); err != nil { - return fmt.Errorf("failed to write %v to %v: %v", pid, IntelRdtTasks, err) - } - } - return nil -} - -// Check if Intel RDT/CAT is enabled -func IsCATEnabled() bool { - featuresInit() - return catEnabled -} - -// Check if Intel RDT/MBA is enabled -func IsMBAEnabled() bool { - featuresInit() - return mbaEnabled -} - -// Check if Intel RDT/MBA Software Controller is enabled -func IsMBAScEnabled() bool { - featuresInit() - return mbaScEnabled -} - -// Get the 'container_id' path in Intel RDT "resource control" filesystem -func GetIntelRdtPath(id string) (string, error) { - rootPath, err := getIntelRdtRoot() - if err != nil { - return "", err - } - - path := filepath.Join(rootPath, id) - return path, nil -} - -// Applies Intel RDT configuration to the process with the specified pid -func (m *intelRdtManager) Apply(pid int) (err error) { - // If intelRdt is not specified in config, we do nothing - if m.config.IntelRdt == nil { - return nil - } - d, err := getIntelRdtData(m.config, pid) - if err != nil && !IsNotFound(err) { - return err - } - - m.mu.Lock() - defer m.mu.Unlock() - path, err := d.join(m.id) - if err != nil { - return err - } - - m.path = path - return nil -} - -// Destroys the Intel RDT 'container_id' group -func (m *intelRdtManager) Destroy() error { - m.mu.Lock() - defer m.mu.Unlock() - if err := os.RemoveAll(m.GetPath()); err != nil { - return err - } - m.path = "" - return nil -} - -// Returns Intel RDT path to save in a state file and to be able to -// restore the object later -func (m *intelRdtManager) GetPath() string { - if m.path == "" { - m.path, _ = GetIntelRdtPath(m.id) - } - return m.path -} - -// Returns statistics for Intel RDT -func (m *intelRdtManager) GetStats() (*Stats, error) { - // If intelRdt is not specified in config - if m.config.IntelRdt == nil { - return nil, nil - } - - m.mu.Lock() - defer m.mu.Unlock() - stats := NewStats() - - rootPath, err := getIntelRdtRoot() - if err != nil { - return nil, err - } - // The read-only L3 cache and memory bandwidth schemata in root - tmpRootStrings, err := getIntelRdtParamString(rootPath, "schemata") - if err != nil { - return nil, err - } - schemaRootStrings := strings.Split(tmpRootStrings, "\n") - - // The L3 cache and memory bandwidth schemata in 'container_id' group - containerPath := m.GetPath() - tmpStrings, err := getIntelRdtParamString(containerPath, "schemata") - if err != nil { - return nil, err - } - schemaStrings := strings.Split(tmpStrings, "\n") - - if IsCATEnabled() { - // The read-only L3 cache information - l3CacheInfo, err := getL3CacheInfo() - if err != nil { - return nil, err - } - stats.L3CacheInfo = l3CacheInfo - - // The read-only L3 cache schema in root - for _, schemaRoot := range schemaRootStrings { - if strings.Contains(schemaRoot, "L3") { - stats.L3CacheSchemaRoot = strings.TrimSpace(schemaRoot) - } - } - - // The L3 cache schema in 'container_id' group - for _, schema := range schemaStrings { - if strings.Contains(schema, "L3") { - stats.L3CacheSchema = strings.TrimSpace(schema) - } - } - } - - if IsMBAEnabled() { - // The read-only memory bandwidth information - memBwInfo, err := getMemBwInfo() - if err != nil { - return nil, err - } - stats.MemBwInfo = memBwInfo - - // The read-only memory bandwidth information - for _, schemaRoot := range schemaRootStrings { - if strings.Contains(schemaRoot, "MB") { - stats.MemBwSchemaRoot = strings.TrimSpace(schemaRoot) - } - } - - // The memory bandwidth schema in 'container_id' group - for _, schema := range schemaStrings { - if strings.Contains(schema, "MB") { - stats.MemBwSchema = strings.TrimSpace(schema) - } - } - } - - if IsMBMEnabled() || IsCMTEnabled() { - err = getMonitoringStats(containerPath, stats) - if err != nil { - return nil, err - } - } - - return stats, nil -} - -// Set Intel RDT "resource control" filesystem as configured. -func (m *intelRdtManager) Set(container *configs.Config) error { - // About L3 cache schema: - // It has allocation bitmasks/values for L3 cache on each socket, - // which contains L3 cache id and capacity bitmask (CBM). - // Format: "L3:=;=;..." - // For example, on a two-socket machine, the schema line could be: - // L3:0=ff;1=c0 - // which means L3 cache id 0's CBM is 0xff, and L3 cache id 1's CBM - // is 0xc0. - // - // The valid L3 cache CBM is a *contiguous bits set* and number of - // bits that can be set is less than the max bit. The max bits in the - // CBM is varied among supported Intel CPU models. Kernel will check - // if it is valid when writing. e.g., default value 0xfffff in root - // indicates the max bits of CBM is 20 bits, which mapping to entire - // L3 cache capacity. Some valid CBM values to set in a group: - // 0xf, 0xf0, 0x3ff, 0x1f00 and etc. - // - // - // About memory bandwidth schema: - // It has allocation values for memory bandwidth on each socket, which - // contains L3 cache id and memory bandwidth. - // Format: "MB:=bandwidth0;=bandwidth1;..." - // For example, on a two-socket machine, the schema line could be: - // "MB:0=20;1=70" - // - // The minimum bandwidth percentage value for each CPU model is - // predefined and can be looked up through "info/MB/min_bandwidth". - // The bandwidth granularity that is allocated is also dependent on - // the CPU model and can be looked up at "info/MB/bandwidth_gran". - // The available bandwidth control steps are: min_bw + N * bw_gran. - // Intermediate values are rounded to the next control step available - // on the hardware. - // - // If MBA Software Controller is enabled through mount option - // "-o mba_MBps": mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl - // We could specify memory bandwidth in "MBps" (Mega Bytes per second) - // unit instead of "percentages". The kernel underneath would use a - // software feedback mechanism or a "Software Controller" which reads - // the actual bandwidth using MBM counters and adjust the memory - // bandwidth percentages to ensure: - // "actual memory bandwidth < user specified memory bandwidth". - // - // For example, on a two-socket machine, the schema line could be - // "MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on - // socket 0 and 7000 MBps memory bandwidth limit on socket 1. - if container.IntelRdt != nil { - path := m.GetPath() - l3CacheSchema := container.IntelRdt.L3CacheSchema - memBwSchema := container.IntelRdt.MemBwSchema - - // Write a single joint schema string to schemata file - if l3CacheSchema != "" && memBwSchema != "" { - if err := writeFile(path, "schemata", l3CacheSchema+"\n"+memBwSchema); err != nil { - return NewLastCmdError(err) - } - } - - // Write only L3 cache schema string to schemata file - if l3CacheSchema != "" && memBwSchema == "" { - if err := writeFile(path, "schemata", l3CacheSchema); err != nil { - return NewLastCmdError(err) - } - } - - // Write only memory bandwidth schema string to schemata file - if l3CacheSchema == "" && memBwSchema != "" { - if err := writeFile(path, "schemata", memBwSchema); err != nil { - return NewLastCmdError(err) - } - } - } - - return nil -} - -func (raw *intelRdtData) join(id string) (string, error) { - path := filepath.Join(raw.root, id) - if err := os.MkdirAll(path, 0o755); err != nil { - return "", NewLastCmdError(err) - } - - if err := WriteIntelRdtTasks(path, raw.pid); err != nil { - return "", NewLastCmdError(err) - } - return path, nil -} - -type NotFoundError struct { - ResourceControl string -} - -func (e *NotFoundError) Error() string { - return fmt.Sprintf("mountpoint for %s not found", e.ResourceControl) -} - -func NewNotFoundError(res string) error { - return &NotFoundError{ - ResourceControl: res, - } -} - -func IsNotFound(err error) bool { - if err == nil { - return false - } - _, ok := err.(*NotFoundError) - return ok -} - -type LastCmdError struct { - LastCmdStatus string - Err error -} - -func (e *LastCmdError) Error() string { - return e.Err.Error() + ", last_cmd_status: " + e.LastCmdStatus -} - -func NewLastCmdError(err error) error { - lastCmdStatus, err1 := getLastCmdStatus() - if err1 == nil { - return &LastCmdError{ - LastCmdStatus: lastCmdStatus, - Err: err, - } - } - return err -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/mbm.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/mbm.go deleted file mode 100644 index 0fbecdeb22..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/mbm.go +++ /dev/null @@ -1,33 +0,0 @@ -// +build linux - -package intelrdt - -// The flag to indicate if Intel RDT/MBM is enabled -var mbmEnabled bool - -// Check if Intel RDT/MBM is enabled. -func IsMBMEnabled() bool { - featuresInit() - return mbmEnabled -} - -func getMBMNumaNodeStats(numaPath string) (*MBMNumaNodeStats, error) { - stats := &MBMNumaNodeStats{} - if enabledMonFeatures.mbmTotalBytes { - mbmTotalBytes, err := getIntelRdtParamUint(numaPath, "mbm_total_bytes") - if err != nil { - return nil, err - } - stats.MBMTotalBytes = mbmTotalBytes - } - - if enabledMonFeatures.mbmLocalBytes { - mbmLocalBytes, err := getIntelRdtParamUint(numaPath, "mbm_local_bytes") - if err != nil { - return nil, err - } - stats.MBMLocalBytes = mbmLocalBytes - } - - return stats, nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/monitoring.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/monitoring.go deleted file mode 100644 index 547c15470f..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/monitoring.go +++ /dev/null @@ -1,84 +0,0 @@ -package intelrdt - -import ( - "bufio" - "io" - "io/ioutil" - "os" - "path/filepath" - - "github.com/sirupsen/logrus" -) - -var enabledMonFeatures monFeatures - -type monFeatures struct { - mbmTotalBytes bool - mbmLocalBytes bool - llcOccupancy bool -} - -func getMonFeatures(intelRdtRoot string) (monFeatures, error) { - file, err := os.Open(filepath.Join(intelRdtRoot, "info", "L3_MON", "mon_features")) - if err != nil { - return monFeatures{}, err - } - defer file.Close() - return parseMonFeatures(file) -} - -func parseMonFeatures(reader io.Reader) (monFeatures, error) { - scanner := bufio.NewScanner(reader) - - monFeatures := monFeatures{} - - for scanner.Scan() { - switch feature := scanner.Text(); feature { - case "mbm_total_bytes": - monFeatures.mbmTotalBytes = true - case "mbm_local_bytes": - monFeatures.mbmLocalBytes = true - case "llc_occupancy": - monFeatures.llcOccupancy = true - default: - logrus.Warnf("Unsupported Intel RDT monitoring feature: %s", feature) - } - } - - return monFeatures, scanner.Err() -} - -func getMonitoringStats(containerPath string, stats *Stats) error { - numaFiles, err := ioutil.ReadDir(filepath.Join(containerPath, "mon_data")) - if err != nil { - return err - } - - var mbmStats []MBMNumaNodeStats - var cmtStats []CMTNumaNodeStats - - for _, file := range numaFiles { - if file.IsDir() { - numaPath := filepath.Join(containerPath, "mon_data", file.Name()) - if IsMBMEnabled() { - numaMBMStats, err := getMBMNumaNodeStats(numaPath) - if err != nil { - return err - } - mbmStats = append(mbmStats, *numaMBMStats) - } - if IsCMTEnabled() { - numaCMTStats, err := getCMTNumaNodeStats(numaPath) - if err != nil { - return err - } - cmtStats = append(cmtStats, *numaCMTStats) - } - } - } - - stats.MBMStats = &mbmStats - stats.CMTStats = &cmtStats - - return err -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go deleted file mode 100644 index 70df0d14e6..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/intelrdt/stats.go +++ /dev/null @@ -1,59 +0,0 @@ -// +build linux - -package intelrdt - -type L3CacheInfo struct { - CbmMask string `json:"cbm_mask,omitempty"` - MinCbmBits uint64 `json:"min_cbm_bits,omitempty"` - NumClosids uint64 `json:"num_closids,omitempty"` -} - -type MemBwInfo struct { - BandwidthGran uint64 `json:"bandwidth_gran,omitempty"` - DelayLinear uint64 `json:"delay_linear,omitempty"` - MinBandwidth uint64 `json:"min_bandwidth,omitempty"` - NumClosids uint64 `json:"num_closids,omitempty"` -} - -type MBMNumaNodeStats struct { - // The 'mbm_total_bytes' in 'container_id' group. - MBMTotalBytes uint64 `json:"mbm_total_bytes"` - - // The 'mbm_local_bytes' in 'container_id' group. - MBMLocalBytes uint64 `json:"mbm_local_bytes"` -} - -type CMTNumaNodeStats struct { - // The 'llc_occupancy' in 'container_id' group. - LLCOccupancy uint64 `json:"llc_occupancy"` -} - -type Stats struct { - // The read-only L3 cache information - L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"` - - // The read-only L3 cache schema in root - L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"` - - // The L3 cache schema in 'container_id' group - L3CacheSchema string `json:"l3_cache_schema,omitempty"` - - // The read-only memory bandwidth information - MemBwInfo *MemBwInfo `json:"mem_bw_info,omitempty"` - - // The read-only memory bandwidth schema in root - MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"` - - // The memory bandwidth schema in 'container_id' group - MemBwSchema string `json:"mem_bw_schema,omitempty"` - - // The memory bandwidth monitoring statistics from NUMA nodes in 'container_id' group - MBMStats *[]MBMNumaNodeStats `json:"mbm_stats,omitempty"` - - // The cache monitoring technology statistics from NUMA nodes in 'container_id' group - CMTStats *[]CMTNumaNodeStats `json:"cmt_stats,omitempty"` -} - -func NewStats() *Stats { - return &Stats{} -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go deleted file mode 100644 index 4a60c34b84..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/keys/keyctl.go +++ /dev/null @@ -1,47 +0,0 @@ -// +build linux - -package keys - -import ( - "strconv" - "strings" - - "github.com/pkg/errors" - - "golang.org/x/sys/unix" -) - -type KeySerial uint32 - -func JoinSessionKeyring(name string) (KeySerial, error) { - sessKeyID, err := unix.KeyctlJoinSessionKeyring(name) - if err != nil { - return 0, errors.Wrap(err, "create session key") - } - return KeySerial(sessKeyID), nil -} - -// ModKeyringPerm modifies permissions on a keyring by reading the current permissions, -// anding the bits with the given mask (clearing permissions) and setting -// additional permission bits -func ModKeyringPerm(ringID KeySerial, mask, setbits uint32) error { - dest, err := unix.KeyctlString(unix.KEYCTL_DESCRIBE, int(ringID)) - if err != nil { - return err - } - - res := strings.Split(dest, ";") - if len(res) < 5 { - return errors.New("Destination buffer for key description is too small") - } - - // parse permissions - perm64, err := strconv.ParseUint(res[3], 16, 32) - if err != nil { - return err - } - - perm := (uint32(perm64) & mask) | setbits - - return unix.KeyctlSetperm(int(ringID), perm) -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/logs/logs.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/logs/logs.go deleted file mode 100644 index 6610a1aaee..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/logs/logs.go +++ /dev/null @@ -1,106 +0,0 @@ -package logs - -import ( - "bufio" - "encoding/json" - "fmt" - "io" - "os" - "sync" - - "github.com/pkg/errors" - "github.com/sirupsen/logrus" -) - -var ( - configureMutex sync.Mutex - // loggingConfigured will be set once logging has been configured via invoking `ConfigureLogging`. - // Subsequent invocations of `ConfigureLogging` would be no-op - loggingConfigured = false -) - -type Config struct { - LogLevel logrus.Level - LogFormat string - LogFilePath string - LogPipeFd int - LogCaller bool -} - -func ForwardLogs(logPipe io.ReadCloser) chan error { - done := make(chan error, 1) - s := bufio.NewScanner(logPipe) - - go func() { - for s.Scan() { - processEntry(s.Bytes()) - } - if err := logPipe.Close(); err != nil { - logrus.Errorf("error closing log source: %v", err) - } - // The only error we want to return is when reading from - // logPipe has failed. - done <- s.Err() - close(done) - }() - - return done -} - -func processEntry(text []byte) { - if len(text) == 0 { - return - } - - var jl struct { - Level string `json:"level"` - Msg string `json:"msg"` - } - if err := json.Unmarshal(text, &jl); err != nil { - logrus.Errorf("failed to decode %q to json: %v", text, err) - return - } - - lvl, err := logrus.ParseLevel(jl.Level) - if err != nil { - logrus.Errorf("failed to parse log level %q: %v", jl.Level, err) - return - } - logrus.StandardLogger().Logf(lvl, jl.Msg) -} - -func ConfigureLogging(config Config) error { - configureMutex.Lock() - defer configureMutex.Unlock() - - if loggingConfigured { - return errors.New("logging has already been configured") - } - - logrus.SetLevel(config.LogLevel) - logrus.SetReportCaller(config.LogCaller) - - // XXX: while 0 is a valid fd (usually stdin), here we assume - // that we never deliberately set LogPipeFd to 0. - if config.LogPipeFd > 0 { - logrus.SetOutput(os.NewFile(uintptr(config.LogPipeFd), "logpipe")) - } else if config.LogFilePath != "" { - f, err := os.OpenFile(config.LogFilePath, os.O_CREATE|os.O_WRONLY|os.O_APPEND|os.O_SYNC, 0o644) - if err != nil { - return err - } - logrus.SetOutput(f) - } - - switch config.LogFormat { - case "text": - // retain logrus's default. - case "json": - logrus.SetFormatter(new(logrus.JSONFormatter)) - default: - return fmt.Errorf("unknown log-format %q", config.LogFormat) - } - - loggingConfigured = true - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go deleted file mode 100644 index 1d4f5033aa..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/message_linux.go +++ /dev/null @@ -1,89 +0,0 @@ -// +build linux - -package libcontainer - -import ( - "github.com/vishvananda/netlink/nl" - "golang.org/x/sys/unix" -) - -// list of known message types we want to send to bootstrap program -// The number is randomly chosen to not conflict with known netlink types -const ( - InitMsg uint16 = 62000 - CloneFlagsAttr uint16 = 27281 - NsPathsAttr uint16 = 27282 - UidmapAttr uint16 = 27283 - GidmapAttr uint16 = 27284 - SetgroupAttr uint16 = 27285 - OomScoreAdjAttr uint16 = 27286 - RootlessEUIDAttr uint16 = 27287 - UidmapPathAttr uint16 = 27288 - GidmapPathAttr uint16 = 27289 -) - -type Int32msg struct { - Type uint16 - Value uint32 -} - -// Serialize serializes the message. -// Int32msg has the following representation -// | nlattr len | nlattr type | -// | uint32 value | -func (msg *Int32msg) Serialize() []byte { - buf := make([]byte, msg.Len()) - native := nl.NativeEndian() - native.PutUint16(buf[0:2], uint16(msg.Len())) - native.PutUint16(buf[2:4], msg.Type) - native.PutUint32(buf[4:8], msg.Value) - return buf -} - -func (msg *Int32msg) Len() int { - return unix.NLA_HDRLEN + 4 -} - -// Bytemsg has the following representation -// | nlattr len | nlattr type | -// | value | pad | -type Bytemsg struct { - Type uint16 - Value []byte -} - -func (msg *Bytemsg) Serialize() []byte { - l := msg.Len() - buf := make([]byte, (l+unix.NLA_ALIGNTO-1) & ^(unix.NLA_ALIGNTO-1)) - native := nl.NativeEndian() - native.PutUint16(buf[0:2], uint16(l)) - native.PutUint16(buf[2:4], msg.Type) - copy(buf[4:], msg.Value) - return buf -} - -func (msg *Bytemsg) Len() int { - return unix.NLA_HDRLEN + len(msg.Value) + 1 // null-terminated -} - -type Boolmsg struct { - Type uint16 - Value bool -} - -func (msg *Boolmsg) Serialize() []byte { - buf := make([]byte, msg.Len()) - native := nl.NativeEndian() - native.PutUint16(buf[0:2], uint16(msg.Len())) - native.PutUint16(buf[2:4], msg.Type) - if msg.Value { - native.PutUint32(buf[4:8], uint32(1)) - } else { - native.PutUint32(buf[4:8], uint32(0)) - } - return buf -} - -func (msg *Boolmsg) Len() int { - return unix.NLA_HDRLEN + 4 // alignment -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/network_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/network_linux.go deleted file mode 100644 index 12e5800fc0..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/network_linux.go +++ /dev/null @@ -1,102 +0,0 @@ -// +build linux - -package libcontainer - -import ( - "bytes" - "fmt" - "io/ioutil" - "path/filepath" - "strconv" - - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/types" - "github.com/vishvananda/netlink" -) - -var strategies = map[string]networkStrategy{ - "loopback": &loopback{}, -} - -// networkStrategy represents a specific network configuration for -// a container's networking stack -type networkStrategy interface { - create(*network, int) error - initialize(*network) error - detach(*configs.Network) error - attach(*configs.Network) error -} - -// getStrategy returns the specific network strategy for the -// provided type. -func getStrategy(tpe string) (networkStrategy, error) { - s, exists := strategies[tpe] - if !exists { - return nil, fmt.Errorf("unknown strategy type %q", tpe) - } - return s, nil -} - -// Returns the network statistics for the network interfaces represented by the NetworkRuntimeInfo. -func getNetworkInterfaceStats(interfaceName string) (*types.NetworkInterface, error) { - out := &types.NetworkInterface{Name: interfaceName} - // This can happen if the network runtime information is missing - possible if the - // container was created by an old version of libcontainer. - if interfaceName == "" { - return out, nil - } - type netStatsPair struct { - // Where to write the output. - Out *uint64 - // The network stats file to read. - File string - } - // Ingress for host veth is from the container. Hence tx_bytes stat on the host veth is actually number of bytes received by the container. - netStats := []netStatsPair{ - {Out: &out.RxBytes, File: "tx_bytes"}, - {Out: &out.RxPackets, File: "tx_packets"}, - {Out: &out.RxErrors, File: "tx_errors"}, - {Out: &out.RxDropped, File: "tx_dropped"}, - - {Out: &out.TxBytes, File: "rx_bytes"}, - {Out: &out.TxPackets, File: "rx_packets"}, - {Out: &out.TxErrors, File: "rx_errors"}, - {Out: &out.TxDropped, File: "rx_dropped"}, - } - for _, netStat := range netStats { - data, err := readSysfsNetworkStats(interfaceName, netStat.File) - if err != nil { - return nil, err - } - *(netStat.Out) = data - } - return out, nil -} - -// Reads the specified statistics available under /sys/class/net//statistics -func readSysfsNetworkStats(ethInterface, statsFile string) (uint64, error) { - data, err := ioutil.ReadFile(filepath.Join("/sys/class/net", ethInterface, "statistics", statsFile)) - if err != nil { - return 0, err - } - return strconv.ParseUint(string(bytes.TrimSpace(data)), 10, 64) -} - -// loopback is a network strategy that provides a basic loopback device -type loopback struct{} - -func (l *loopback) create(n *network, nspid int) error { - return nil -} - -func (l *loopback) initialize(config *network) error { - return netlink.LinkSetUp(&netlink.Device{LinkAttrs: netlink.LinkAttrs{Name: "lo"}}) -} - -func (l *loopback) attach(n *configs.Network) (err error) { - return nil -} - -func (l *loopback) detach(n *configs.Network) (err error) { - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go deleted file mode 100644 index 73a6f59465..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/notify_linux.go +++ /dev/null @@ -1,87 +0,0 @@ -// +build linux - -package libcontainer - -import ( - "errors" - "fmt" - "io/ioutil" - "os" - "path/filepath" - - "golang.org/x/sys/unix" -) - -type PressureLevel uint - -const ( - LowPressure PressureLevel = iota - MediumPressure - CriticalPressure -) - -func registerMemoryEvent(cgDir string, evName string, arg string) (<-chan struct{}, error) { - evFile, err := os.Open(filepath.Join(cgDir, evName)) - if err != nil { - return nil, err - } - fd, err := unix.Eventfd(0, unix.EFD_CLOEXEC) - if err != nil { - evFile.Close() - return nil, err - } - - eventfd := os.NewFile(uintptr(fd), "eventfd") - - eventControlPath := filepath.Join(cgDir, "cgroup.event_control") - data := fmt.Sprintf("%d %d %s", eventfd.Fd(), evFile.Fd(), arg) - if err := ioutil.WriteFile(eventControlPath, []byte(data), 0o700); err != nil { - eventfd.Close() - evFile.Close() - return nil, err - } - ch := make(chan struct{}) - go func() { - defer func() { - eventfd.Close() - evFile.Close() - close(ch) - }() - buf := make([]byte, 8) - for { - if _, err := eventfd.Read(buf); err != nil { - return - } - // When a cgroup is destroyed, an event is sent to eventfd. - // So if the control path is gone, return instead of notifying. - if _, err := os.Lstat(eventControlPath); os.IsNotExist(err) { - return - } - ch <- struct{}{} - } - }() - return ch, nil -} - -// notifyOnOOM returns channel on which you can expect event about OOM, -// if process died without OOM this channel will be closed. -func notifyOnOOM(dir string) (<-chan struct{}, error) { - if dir == "" { - return nil, errors.New("memory controller missing") - } - - return registerMemoryEvent(dir, "memory.oom_control", "") -} - -func notifyMemoryPressure(dir string, level PressureLevel) (<-chan struct{}, error) { - if dir == "" { - return nil, errors.New("memory controller missing") - } - - if level > CriticalPressure { - return nil, fmt.Errorf("invalid pressure level %d", level) - } - - levelStr := []string{"low", "medium", "critical"}[level] - return registerMemoryEvent(dir, "memory.pressure_level", levelStr) -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/notify_linux_v2.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/notify_linux_v2.go deleted file mode 100644 index dd0ec290ec..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/notify_linux_v2.go +++ /dev/null @@ -1,82 +0,0 @@ -// +build linux - -package libcontainer - -import ( - "path/filepath" - "unsafe" - - "github.com/opencontainers/runc/libcontainer/cgroups/fscommon" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -func registerMemoryEventV2(cgDir, evName, cgEvName string) (<-chan struct{}, error) { - fd, err := unix.InotifyInit() - if err != nil { - return nil, errors.Wrap(err, "unable to init inotify") - } - // watching oom kill - evFd, err := unix.InotifyAddWatch(fd, filepath.Join(cgDir, evName), unix.IN_MODIFY) - if err != nil { - unix.Close(fd) - return nil, errors.Wrap(err, "unable to add inotify watch") - } - // Because no `unix.IN_DELETE|unix.IN_DELETE_SELF` event for cgroup file system, so watching all process exited - cgFd, err := unix.InotifyAddWatch(fd, filepath.Join(cgDir, cgEvName), unix.IN_MODIFY) - if err != nil { - unix.Close(fd) - return nil, errors.Wrap(err, "unable to add inotify watch") - } - ch := make(chan struct{}) - go func() { - var ( - buffer [unix.SizeofInotifyEvent + unix.PathMax + 1]byte - offset uint32 - ) - defer func() { - unix.Close(fd) - close(ch) - }() - - for { - n, err := unix.Read(fd, buffer[:]) - if err != nil { - logrus.Warnf("unable to read event data from inotify, got error: %v", err) - return - } - if n < unix.SizeofInotifyEvent { - logrus.Warnf("we should read at least %d bytes from inotify, but got %d bytes.", unix.SizeofInotifyEvent, n) - return - } - offset = 0 - for offset <= uint32(n-unix.SizeofInotifyEvent) { - rawEvent := (*unix.InotifyEvent)(unsafe.Pointer(&buffer[offset])) - offset += unix.SizeofInotifyEvent + uint32(rawEvent.Len) - if rawEvent.Mask&unix.IN_MODIFY != unix.IN_MODIFY { - continue - } - switch int(rawEvent.Wd) { - case evFd: - oom, err := fscommon.GetValueByKey(cgDir, evName, "oom_kill") - if err != nil || oom > 0 { - ch <- struct{}{} - } - case cgFd: - pids, err := fscommon.GetValueByKey(cgDir, cgEvName, "populated") - if err != nil || pids == 0 { - return - } - } - } - } - }() - return ch, nil -} - -// notifyOnOOMV2 returns channel on which you can expect event about OOM, -// if process died without OOM this channel will be closed. -func notifyOnOOMV2(path string) (<-chan struct{}, error) { - return registerMemoryEventV2(path, "memory.events", "cgroup.events") -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/process.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/process.go deleted file mode 100644 index d3e472a4fd..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/process.go +++ /dev/null @@ -1,115 +0,0 @@ -package libcontainer - -import ( - "fmt" - "io" - "math" - "os" - - "github.com/opencontainers/runc/libcontainer/configs" -) - -type processOperations interface { - wait() (*os.ProcessState, error) - signal(sig os.Signal) error - pid() int -} - -// Process specifies the configuration and IO for a process inside -// a container. -type Process struct { - // The command to be run followed by any arguments. - Args []string - - // Env specifies the environment variables for the process. - Env []string - - // User will set the uid and gid of the executing process running inside the container - // local to the container's user and group configuration. - User string - - // AdditionalGroups specifies the gids that should be added to supplementary groups - // in addition to those that the user belongs to. - AdditionalGroups []string - - // Cwd will change the processes current working directory inside the container's rootfs. - Cwd string - - // Stdin is a pointer to a reader which provides the standard input stream. - Stdin io.Reader - - // Stdout is a pointer to a writer which receives the standard output stream. - Stdout io.Writer - - // Stderr is a pointer to a writer which receives the standard error stream. - Stderr io.Writer - - // ExtraFiles specifies additional open files to be inherited by the container - ExtraFiles []*os.File - - // Initial sizings for the console - ConsoleWidth uint16 - ConsoleHeight uint16 - - // Capabilities specify the capabilities to keep when executing the process inside the container - // All capabilities not specified will be dropped from the processes capability mask - Capabilities *configs.Capabilities - - // AppArmorProfile specifies the profile to apply to the process and is - // changed at the time the process is execed - AppArmorProfile string - - // Label specifies the label to apply to the process. It is commonly used by selinux - Label string - - // NoNewPrivileges controls whether processes can gain additional privileges. - NoNewPrivileges *bool - - // Rlimits specifies the resource limits, such as max open files, to set in the container - // If Rlimits are not set, the container will inherit rlimits from the parent process - Rlimits []configs.Rlimit - - // ConsoleSocket provides the masterfd console. - ConsoleSocket *os.File - - // Init specifies whether the process is the first process in the container. - Init bool - - ops processOperations - - LogLevel string -} - -// Wait waits for the process to exit. -// Wait releases any resources associated with the Process -func (p Process) Wait() (*os.ProcessState, error) { - if p.ops == nil { - return nil, newGenericError(fmt.Errorf("invalid process"), NoProcessOps) - } - return p.ops.wait() -} - -// Pid returns the process ID -func (p Process) Pid() (int, error) { - // math.MinInt32 is returned here, because it's invalid value - // for the kill() system call. - if p.ops == nil { - return math.MinInt32, newGenericError(fmt.Errorf("invalid process"), NoProcessOps) - } - return p.ops.pid(), nil -} - -// Signal sends a signal to the Process. -func (p Process) Signal(sig os.Signal) error { - if p.ops == nil { - return newGenericError(fmt.Errorf("invalid process"), NoProcessOps) - } - return p.ops.signal(sig) -} - -// IO holds the process's STDIO -type IO struct { - Stdin io.WriteCloser - Stdout io.ReadCloser - Stderr io.ReadCloser -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go deleted file mode 100644 index 80f25e334f..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/process_linux.go +++ /dev/null @@ -1,726 +0,0 @@ -// +build linux - -package libcontainer - -import ( - "encoding/json" - "errors" - "fmt" - "io" - "os" - "os/exec" - "path/filepath" - "strconv" - "time" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fs2" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/intelrdt" - "github.com/opencontainers/runc/libcontainer/logs" - "github.com/opencontainers/runc/libcontainer/system" - "github.com/opencontainers/runc/libcontainer/utils" - "github.com/opencontainers/runtime-spec/specs-go" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -// Synchronisation value for cgroup namespace setup. -// The same constant is defined in nsexec.c as "CREATECGROUPNS". -const createCgroupns = 0x80 - -type parentProcess interface { - // pid returns the pid for the running process. - pid() int - - // start starts the process execution. - start() error - - // send a SIGKILL to the process and wait for the exit. - terminate() error - - // wait waits on the process returning the process state. - wait() (*os.ProcessState, error) - - // startTime returns the process start time. - startTime() (uint64, error) - - signal(os.Signal) error - - externalDescriptors() []string - - setExternalDescriptors(fds []string) - - forwardChildLogs() chan error -} - -type filePair struct { - parent *os.File - child *os.File -} - -type setnsProcess struct { - cmd *exec.Cmd - messageSockPair filePair - logFilePair filePair - cgroupPaths map[string]string - rootlessCgroups bool - manager cgroups.Manager - intelRdtPath string - config *initConfig - fds []string - process *Process - bootstrapData io.Reader - initProcessPid int -} - -func (p *setnsProcess) startTime() (uint64, error) { - stat, err := system.Stat(p.pid()) - return stat.StartTime, err -} - -func (p *setnsProcess) signal(sig os.Signal) error { - s, ok := sig.(unix.Signal) - if !ok { - return errors.New("os: unsupported signal type") - } - return unix.Kill(p.pid(), s) -} - -func (p *setnsProcess) start() (retErr error) { - defer p.messageSockPair.parent.Close() - // get the "before" value of oom kill count - oom, _ := p.manager.OOMKillCount() - err := p.cmd.Start() - // close the write-side of the pipes (controlled by child) - p.messageSockPair.child.Close() - p.logFilePair.child.Close() - if err != nil { - return newSystemErrorWithCause(err, "starting setns process") - } - - waitInit := initWaiter(p.messageSockPair.parent) - defer func() { - if retErr != nil { - if newOom, err := p.manager.OOMKillCount(); err == nil && newOom != oom { - // Someone in this cgroup was killed, this _might_ be us. - retErr = newSystemErrorWithCause(retErr, "possibly OOM-killed") - } - werr := <-waitInit - if werr != nil { - logrus.WithError(werr).Warn() - } - err := ignoreTerminateErrors(p.terminate()) - if err != nil { - logrus.WithError(err).Warn("unable to terminate setnsProcess") - } - } - }() - - if p.bootstrapData != nil { - if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil { - return newSystemErrorWithCause(err, "copying bootstrap data to pipe") - } - } - err = <-waitInit - if err != nil { - return err - } - if err := p.execSetns(); err != nil { - return newSystemErrorWithCause(err, "executing setns process") - } - if len(p.cgroupPaths) > 0 { - if err := cgroups.EnterPid(p.cgroupPaths, p.pid()); err != nil && !p.rootlessCgroups { - // On cgroup v2 + nesting + domain controllers, EnterPid may fail with EBUSY. - // https://github.com/opencontainers/runc/issues/2356#issuecomment-621277643 - // Try to join the cgroup of InitProcessPid. - if cgroups.IsCgroup2UnifiedMode() { - initProcCgroupFile := fmt.Sprintf("/proc/%d/cgroup", p.initProcessPid) - initCg, initCgErr := cgroups.ParseCgroupFile(initProcCgroupFile) - if initCgErr == nil { - if initCgPath, ok := initCg[""]; ok { - initCgDirpath := filepath.Join(fs2.UnifiedMountpoint, initCgPath) - logrus.Debugf("adding pid %d to cgroups %v failed (%v), attempting to join %q (obtained from %s)", - p.pid(), p.cgroupPaths, err, initCg, initCgDirpath) - // NOTE: initCgDirPath is not guaranteed to exist because we didn't pause the container. - err = cgroups.WriteCgroupProc(initCgDirpath, p.pid()) - } - } - } - if err != nil { - return newSystemErrorWithCausef(err, "adding pid %d to cgroups", p.pid()) - } - } - } - if p.intelRdtPath != "" { - // if Intel RDT "resource control" filesystem path exists - _, err := os.Stat(p.intelRdtPath) - if err == nil { - if err := intelrdt.WriteIntelRdtTasks(p.intelRdtPath, p.pid()); err != nil { - return newSystemErrorWithCausef(err, "adding pid %d to Intel RDT resource control filesystem", p.pid()) - } - } - } - // set rlimits, this has to be done here because we lose permissions - // to raise the limits once we enter a user-namespace - if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { - return newSystemErrorWithCause(err, "setting rlimits for process") - } - if err := utils.WriteJSON(p.messageSockPair.parent, p.config); err != nil { - return newSystemErrorWithCause(err, "writing config to pipe") - } - - ierr := parseSync(p.messageSockPair.parent, func(sync *syncT) error { - switch sync.Type { - case procReady: - // This shouldn't happen. - panic("unexpected procReady in setns") - case procHooks: - // This shouldn't happen. - panic("unexpected procHooks in setns") - default: - return newSystemError(errors.New("invalid JSON payload from child")) - } - }) - - if err := unix.Shutdown(int(p.messageSockPair.parent.Fd()), unix.SHUT_WR); err != nil { - return newSystemErrorWithCause(err, "calling shutdown on init pipe") - } - // Must be done after Shutdown so the child will exit and we can wait for it. - if ierr != nil { - _, _ = p.wait() - return ierr - } - return nil -} - -// execSetns runs the process that executes C code to perform the setns calls -// because setns support requires the C process to fork off a child and perform the setns -// before the go runtime boots, we wait on the process to die and receive the child's pid -// over the provided pipe. -func (p *setnsProcess) execSetns() error { - status, err := p.cmd.Process.Wait() - if err != nil { - _ = p.cmd.Wait() - return newSystemErrorWithCause(err, "waiting on setns process to finish") - } - if !status.Success() { - _ = p.cmd.Wait() - return newSystemError(&exec.ExitError{ProcessState: status}) - } - var pid *pid - if err := json.NewDecoder(p.messageSockPair.parent).Decode(&pid); err != nil { - _ = p.cmd.Wait() - return newSystemErrorWithCause(err, "reading pid from init pipe") - } - - // Clean up the zombie parent process - // On Unix systems FindProcess always succeeds. - firstChildProcess, _ := os.FindProcess(pid.PidFirstChild) - - // Ignore the error in case the child has already been reaped for any reason - _, _ = firstChildProcess.Wait() - - process, err := os.FindProcess(pid.Pid) - if err != nil { - return err - } - p.cmd.Process = process - p.process.ops = p - return nil -} - -// terminate sends a SIGKILL to the forked process for the setns routine then waits to -// avoid the process becoming a zombie. -func (p *setnsProcess) terminate() error { - if p.cmd.Process == nil { - return nil - } - err := p.cmd.Process.Kill() - if _, werr := p.wait(); err == nil { - err = werr - } - return err -} - -func (p *setnsProcess) wait() (*os.ProcessState, error) { - err := p.cmd.Wait() - - // Return actual ProcessState even on Wait error - return p.cmd.ProcessState, err -} - -func (p *setnsProcess) pid() int { - return p.cmd.Process.Pid -} - -func (p *setnsProcess) externalDescriptors() []string { - return p.fds -} - -func (p *setnsProcess) setExternalDescriptors(newFds []string) { - p.fds = newFds -} - -func (p *setnsProcess) forwardChildLogs() chan error { - return logs.ForwardLogs(p.logFilePair.parent) -} - -type initProcess struct { - cmd *exec.Cmd - messageSockPair filePair - logFilePair filePair - config *initConfig - manager cgroups.Manager - intelRdtManager intelrdt.Manager - container *linuxContainer - fds []string - process *Process - bootstrapData io.Reader - sharePidns bool -} - -func (p *initProcess) pid() int { - return p.cmd.Process.Pid -} - -func (p *initProcess) externalDescriptors() []string { - return p.fds -} - -// getChildPid receives the final child's pid over the provided pipe. -func (p *initProcess) getChildPid() (int, error) { - var pid pid - if err := json.NewDecoder(p.messageSockPair.parent).Decode(&pid); err != nil { - _ = p.cmd.Wait() - return -1, err - } - - // Clean up the zombie parent process - // On Unix systems FindProcess always succeeds. - firstChildProcess, _ := os.FindProcess(pid.PidFirstChild) - - // Ignore the error in case the child has already been reaped for any reason - _, _ = firstChildProcess.Wait() - - return pid.Pid, nil -} - -func (p *initProcess) waitForChildExit(childPid int) error { - status, err := p.cmd.Process.Wait() - if err != nil { - _ = p.cmd.Wait() - return err - } - if !status.Success() { - _ = p.cmd.Wait() - return &exec.ExitError{ProcessState: status} - } - - process, err := os.FindProcess(childPid) - if err != nil { - return err - } - p.cmd.Process = process - p.process.ops = p - return nil -} - -func (p *initProcess) start() (retErr error) { - defer p.messageSockPair.parent.Close() //nolint: errcheck - err := p.cmd.Start() - p.process.ops = p - // close the write-side of the pipes (controlled by child) - _ = p.messageSockPair.child.Close() - _ = p.logFilePair.child.Close() - if err != nil { - p.process.ops = nil - return newSystemErrorWithCause(err, "starting init process command") - } - - waitInit := initWaiter(p.messageSockPair.parent) - defer func() { - if retErr != nil { - // Find out if init is killed by the kernel's OOM killer. - // Get the count before killing init as otherwise cgroup - // might be removed by systemd. - oom, err := p.manager.OOMKillCount() - if err != nil { - logrus.WithError(err).Warn("unable to get oom kill count") - } else if oom > 0 { - // Does not matter what the particular error was, - // its cause is most probably OOM, so report that. - const oomError = "container init was OOM-killed (memory limit too low?)" - - if logrus.GetLevel() >= logrus.DebugLevel { - // Only show the original error if debug is set, - // as it is not generally very useful. - retErr = newSystemErrorWithCause(retErr, oomError) - } else { - retErr = newSystemError(errors.New(oomError)) - } - } - - werr := <-waitInit - if werr != nil { - logrus.WithError(werr).Warn() - } - - // Terminate the process to ensure we can remove cgroups. - if err := ignoreTerminateErrors(p.terminate()); err != nil { - logrus.WithError(err).Warn("unable to terminate initProcess") - } - - _ = p.manager.Destroy() - if p.intelRdtManager != nil { - _ = p.intelRdtManager.Destroy() - } - } - }() - - // Do this before syncing with child so that no children can escape the - // cgroup. We don't need to worry about not doing this and not being root - // because we'd be using the rootless cgroup manager in that case. - if err := p.manager.Apply(p.pid()); err != nil { - return newSystemErrorWithCause(err, "applying cgroup configuration for process") - } - if p.intelRdtManager != nil { - if err := p.intelRdtManager.Apply(p.pid()); err != nil { - return newSystemErrorWithCause(err, "applying Intel RDT configuration for process") - } - } - if _, err := io.Copy(p.messageSockPair.parent, p.bootstrapData); err != nil { - return newSystemErrorWithCause(err, "copying bootstrap data to pipe") - } - err = <-waitInit - if err != nil { - return err - } - - childPid, err := p.getChildPid() - if err != nil { - return newSystemErrorWithCause(err, "getting the final child's pid from pipe") - } - - // Save the standard descriptor names before the container process - // can potentially move them (e.g., via dup2()). If we don't do this now, - // we won't know at checkpoint time which file descriptor to look up. - fds, err := getPipeFds(childPid) - if err != nil { - return newSystemErrorWithCausef(err, "getting pipe fds for pid %d", childPid) - } - p.setExternalDescriptors(fds) - - // Now it's time to setup cgroup namesapce - if p.config.Config.Namespaces.Contains(configs.NEWCGROUP) && p.config.Config.Namespaces.PathOf(configs.NEWCGROUP) == "" { - if _, err := p.messageSockPair.parent.Write([]byte{createCgroupns}); err != nil { - return newSystemErrorWithCause(err, "sending synchronization value to init process") - } - } - - // Wait for our first child to exit - if err := p.waitForChildExit(childPid); err != nil { - return newSystemErrorWithCause(err, "waiting for our first child to exit") - } - - if err := p.createNetworkInterfaces(); err != nil { - return newSystemErrorWithCause(err, "creating network interfaces") - } - if err := p.updateSpecState(); err != nil { - return newSystemErrorWithCause(err, "updating the spec state") - } - if err := p.sendConfig(); err != nil { - return newSystemErrorWithCause(err, "sending config to init process") - } - var ( - sentRun bool - sentResume bool - ) - - ierr := parseSync(p.messageSockPair.parent, func(sync *syncT) error { - switch sync.Type { - case procReady: - // set rlimits, this has to be done here because we lose permissions - // to raise the limits once we enter a user-namespace - if err := setupRlimits(p.config.Rlimits, p.pid()); err != nil { - return newSystemErrorWithCause(err, "setting rlimits for ready process") - } - // call prestart and CreateRuntime hooks - if !p.config.Config.Namespaces.Contains(configs.NEWNS) { - // Setup cgroup before the hook, so that the prestart and CreateRuntime hook could apply cgroup permissions. - if err := p.manager.Set(p.config.Config.Cgroups.Resources); err != nil { - return newSystemErrorWithCause(err, "setting cgroup config for ready process") - } - if p.intelRdtManager != nil { - if err := p.intelRdtManager.Set(p.config.Config); err != nil { - return newSystemErrorWithCause(err, "setting Intel RDT config for ready process") - } - } - - if p.config.Config.Hooks != nil { - s, err := p.container.currentOCIState() - if err != nil { - return err - } - // initProcessStartTime hasn't been set yet. - s.Pid = p.cmd.Process.Pid - s.Status = specs.StateCreating - hooks := p.config.Config.Hooks - - if err := hooks[configs.Prestart].RunHooks(s); err != nil { - return err - } - if err := hooks[configs.CreateRuntime].RunHooks(s); err != nil { - return err - } - } - } - - // generate a timestamp indicating when the container was started - p.container.created = time.Now().UTC() - p.container.state = &createdState{ - c: p.container, - } - - // NOTE: If the procRun state has been synced and the - // runc-create process has been killed for some reason, - // the runc-init[2:stage] process will be leaky. And - // the runc command also fails to parse root directory - // because the container doesn't have state.json. - // - // In order to cleanup the runc-init[2:stage] by - // runc-delete/stop, we should store the status before - // procRun sync. - state, uerr := p.container.updateState(p) - if uerr != nil { - return newSystemErrorWithCause(err, "store init state") - } - p.container.initProcessStartTime = state.InitProcessStartTime - - // Sync with child. - if err := writeSync(p.messageSockPair.parent, procRun); err != nil { - return newSystemErrorWithCause(err, "writing syncT 'run'") - } - sentRun = true - case procHooks: - // Setup cgroup before prestart hook, so that the prestart hook could apply cgroup permissions. - if err := p.manager.Set(p.config.Config.Cgroups.Resources); err != nil { - return newSystemErrorWithCause(err, "setting cgroup config for procHooks process") - } - if p.intelRdtManager != nil { - if err := p.intelRdtManager.Set(p.config.Config); err != nil { - return newSystemErrorWithCause(err, "setting Intel RDT config for procHooks process") - } - } - if p.config.Config.Hooks != nil { - s, err := p.container.currentOCIState() - if err != nil { - return err - } - // initProcessStartTime hasn't been set yet. - s.Pid = p.cmd.Process.Pid - s.Status = specs.StateCreating - hooks := p.config.Config.Hooks - - if err := hooks[configs.Prestart].RunHooks(s); err != nil { - return err - } - if err := hooks[configs.CreateRuntime].RunHooks(s); err != nil { - return err - } - } - // Sync with child. - if err := writeSync(p.messageSockPair.parent, procResume); err != nil { - return newSystemErrorWithCause(err, "writing syncT 'resume'") - } - sentResume = true - default: - return newSystemError(errors.New("invalid JSON payload from child")) - } - - return nil - }) - - if !sentRun { - return newSystemErrorWithCause(ierr, "container init") - } - if p.config.Config.Namespaces.Contains(configs.NEWNS) && !sentResume { - return newSystemError(errors.New("could not synchronise after executing prestart and CreateRuntime hooks with container process")) - } - if err := unix.Shutdown(int(p.messageSockPair.parent.Fd()), unix.SHUT_WR); err != nil { - return newSystemErrorWithCause(err, "shutting down init pipe") - } - - // Must be done after Shutdown so the child will exit and we can wait for it. - if ierr != nil { - _, _ = p.wait() - return ierr - } - return nil -} - -func (p *initProcess) wait() (*os.ProcessState, error) { - err := p.cmd.Wait() - // we should kill all processes in cgroup when init is died if we use host PID namespace - if p.sharePidns { - _ = signalAllProcesses(p.manager, unix.SIGKILL) - } - return p.cmd.ProcessState, err -} - -func (p *initProcess) terminate() error { - if p.cmd.Process == nil { - return nil - } - err := p.cmd.Process.Kill() - if _, werr := p.wait(); err == nil { - err = werr - } - return err -} - -func (p *initProcess) startTime() (uint64, error) { - stat, err := system.Stat(p.pid()) - return stat.StartTime, err -} - -func (p *initProcess) updateSpecState() error { - s, err := p.container.currentOCIState() - if err != nil { - return err - } - - p.config.SpecState = s - return nil -} - -func (p *initProcess) sendConfig() error { - // send the config to the container's init process, we don't use JSON Encode - // here because there might be a problem in JSON decoder in some cases, see: - // https://github.com/docker/docker/issues/14203#issuecomment-174177790 - return utils.WriteJSON(p.messageSockPair.parent, p.config) -} - -func (p *initProcess) createNetworkInterfaces() error { - for _, config := range p.config.Config.Networks { - strategy, err := getStrategy(config.Type) - if err != nil { - return err - } - n := &network{ - Network: *config, - } - if err := strategy.create(n, p.pid()); err != nil { - return err - } - p.config.Networks = append(p.config.Networks, n) - } - return nil -} - -func (p *initProcess) signal(sig os.Signal) error { - s, ok := sig.(unix.Signal) - if !ok { - return errors.New("os: unsupported signal type") - } - return unix.Kill(p.pid(), s) -} - -func (p *initProcess) setExternalDescriptors(newFds []string) { - p.fds = newFds -} - -func (p *initProcess) forwardChildLogs() chan error { - return logs.ForwardLogs(p.logFilePair.parent) -} - -func getPipeFds(pid int) ([]string, error) { - fds := make([]string, 3) - - dirPath := filepath.Join("/proc", strconv.Itoa(pid), "/fd") - for i := 0; i < 3; i++ { - // XXX: This breaks if the path is not a valid symlink (which can - // happen in certain particularly unlucky mount namespace setups). - f := filepath.Join(dirPath, strconv.Itoa(i)) - target, err := os.Readlink(f) - if err != nil { - // Ignore permission errors, for rootless containers and other - // non-dumpable processes. if we can't get the fd for a particular - // file, there's not much we can do. - if os.IsPermission(err) { - continue - } - return fds, err - } - fds[i] = target - } - return fds, nil -} - -// InitializeIO creates pipes for use with the process's stdio and returns the -// opposite side for each. Do not use this if you want to have a pseudoterminal -// set up for you by libcontainer (TODO: fix that too). -// TODO: This is mostly unnecessary, and should be handled by clients. -func (p *Process) InitializeIO(rootuid, rootgid int) (i *IO, err error) { - var fds []uintptr - i = &IO{} - // cleanup in case of an error - defer func() { - if err != nil { - for _, fd := range fds { - _ = unix.Close(int(fd)) - } - } - }() - // STDIN - r, w, err := os.Pipe() - if err != nil { - return nil, err - } - fds = append(fds, r.Fd(), w.Fd()) - p.Stdin, i.Stdin = r, w - // STDOUT - if r, w, err = os.Pipe(); err != nil { - return nil, err - } - fds = append(fds, r.Fd(), w.Fd()) - p.Stdout, i.Stdout = w, r - // STDERR - if r, w, err = os.Pipe(); err != nil { - return nil, err - } - fds = append(fds, r.Fd(), w.Fd()) - p.Stderr, i.Stderr = w, r - // change ownership of the pipes in case we are in a user namespace - for _, fd := range fds { - if err := unix.Fchown(int(fd), rootuid, rootgid); err != nil { - return nil, err - } - } - return i, nil -} - -// initWaiter returns a channel to wait on for making sure -// runc init has finished the initial setup. -func initWaiter(r io.Reader) chan error { - ch := make(chan error, 1) - go func() { - defer close(ch) - - inited := make([]byte, 1) - n, err := r.Read(inited) - if err == nil { - if n < 1 { - err = errors.New("short read") - } else if inited[0] != 0 { - err = fmt.Errorf("unexpected %d != 0", inited[0]) - } else { - ch <- nil - return - } - } - ch <- newSystemErrorWithCause(err, "waiting for init preliminary setup") - }() - - return ch -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/restored_process.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/restored_process.go deleted file mode 100644 index 34270e64ed..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/restored_process.go +++ /dev/null @@ -1,129 +0,0 @@ -// +build linux - -package libcontainer - -import ( - "fmt" - "os" - "os/exec" - - "github.com/opencontainers/runc/libcontainer/system" -) - -func newRestoredProcess(cmd *exec.Cmd, fds []string) (*restoredProcess, error) { - var err error - pid := cmd.Process.Pid - stat, err := system.Stat(pid) - if err != nil { - return nil, err - } - return &restoredProcess{ - cmd: cmd, - processStartTime: stat.StartTime, - fds: fds, - }, nil -} - -type restoredProcess struct { - cmd *exec.Cmd - processStartTime uint64 - fds []string -} - -func (p *restoredProcess) start() error { - return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError) -} - -func (p *restoredProcess) pid() int { - return p.cmd.Process.Pid -} - -func (p *restoredProcess) terminate() error { - err := p.cmd.Process.Kill() - if _, werr := p.wait(); err == nil { - err = werr - } - return err -} - -func (p *restoredProcess) wait() (*os.ProcessState, error) { - // TODO: how do we wait on the actual process? - // maybe use --exec-cmd in criu - err := p.cmd.Wait() - if err != nil { - if _, ok := err.(*exec.ExitError); !ok { - return nil, err - } - } - st := p.cmd.ProcessState - return st, nil -} - -func (p *restoredProcess) startTime() (uint64, error) { - return p.processStartTime, nil -} - -func (p *restoredProcess) signal(s os.Signal) error { - return p.cmd.Process.Signal(s) -} - -func (p *restoredProcess) externalDescriptors() []string { - return p.fds -} - -func (p *restoredProcess) setExternalDescriptors(newFds []string) { - p.fds = newFds -} - -func (p *restoredProcess) forwardChildLogs() chan error { - return nil -} - -// nonChildProcess represents a process where the calling process is not -// the parent process. This process is created when a factory loads a container from -// a persisted state. -type nonChildProcess struct { - processPid int - processStartTime uint64 - fds []string -} - -func (p *nonChildProcess) start() error { - return newGenericError(fmt.Errorf("restored process cannot be started"), SystemError) -} - -func (p *nonChildProcess) pid() int { - return p.processPid -} - -func (p *nonChildProcess) terminate() error { - return newGenericError(fmt.Errorf("restored process cannot be terminated"), SystemError) -} - -func (p *nonChildProcess) wait() (*os.ProcessState, error) { - return nil, newGenericError(fmt.Errorf("restored process cannot be waited on"), SystemError) -} - -func (p *nonChildProcess) startTime() (uint64, error) { - return p.processStartTime, nil -} - -func (p *nonChildProcess) signal(s os.Signal) error { - proc, err := os.FindProcess(p.processPid) - if err != nil { - return err - } - return proc.Signal(s) -} - -func (p *nonChildProcess) externalDescriptors() []string { - return p.fds -} - -func (p *nonChildProcess) setExternalDescriptors(newFds []string) { - p.fds = newFds -} - -func (p *nonChildProcess) forwardChildLogs() chan error { - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go deleted file mode 100644 index 430f490dec..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/rootfs_linux.go +++ /dev/null @@ -1,1080 +0,0 @@ -// +build linux - -package libcontainer - -import ( - "fmt" - "io" - "io/ioutil" - "os" - "os/exec" - "path" - "path/filepath" - "strings" - "time" - - securejoin "github.com/cyphar/filepath-securejoin" - "github.com/moby/sys/mountinfo" - "github.com/mrunalp/fileutils" - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/cgroups/fs2" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/devices" - "github.com/opencontainers/runc/libcontainer/userns" - "github.com/opencontainers/runc/libcontainer/utils" - libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" - "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/selinux/go-selinux/label" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -const defaultMountFlags = unix.MS_NOEXEC | unix.MS_NOSUID | unix.MS_NODEV - -type mountConfig struct { - root string - label string - cgroup2Path string - rootlessCgroups bool - cgroupns bool -} - -// needsSetupDev returns true if /dev needs to be set up. -func needsSetupDev(config *configs.Config) bool { - for _, m := range config.Mounts { - if m.Device == "bind" && libcontainerUtils.CleanPath(m.Destination) == "/dev" { - return false - } - } - return true -} - -// prepareRootfs sets up the devices, mount points, and filesystems for use -// inside a new mount namespace. It doesn't set anything as ro. You must call -// finalizeRootfs after this function to finish setting up the rootfs. -func prepareRootfs(pipe io.ReadWriter, iConfig *initConfig) (err error) { - config := iConfig.Config - if err := prepareRoot(config); err != nil { - return newSystemErrorWithCause(err, "preparing rootfs") - } - - mountConfig := &mountConfig{ - root: config.Rootfs, - label: config.MountLabel, - cgroup2Path: iConfig.Cgroup2Path, - rootlessCgroups: iConfig.RootlessCgroups, - cgroupns: config.Namespaces.Contains(configs.NEWCGROUP), - } - setupDev := needsSetupDev(config) - for _, m := range config.Mounts { - for _, precmd := range m.PremountCmds { - if err := mountCmd(precmd); err != nil { - return newSystemErrorWithCause(err, "running premount command") - } - } - if err := mountToRootfs(m, mountConfig); err != nil { - return newSystemErrorWithCausef(err, "mounting %q to rootfs at %q", m.Source, m.Destination) - } - - for _, postcmd := range m.PostmountCmds { - if err := mountCmd(postcmd); err != nil { - return newSystemErrorWithCause(err, "running postmount command") - } - } - } - - if setupDev { - if err := createDevices(config); err != nil { - return newSystemErrorWithCause(err, "creating device nodes") - } - if err := setupPtmx(config); err != nil { - return newSystemErrorWithCause(err, "setting up ptmx") - } - if err := setupDevSymlinks(config.Rootfs); err != nil { - return newSystemErrorWithCause(err, "setting up /dev symlinks") - } - } - - // Signal the parent to run the pre-start hooks. - // The hooks are run after the mounts are setup, but before we switch to the new - // root, so that the old root is still available in the hooks for any mount - // manipulations. - // Note that iConfig.Cwd is not guaranteed to exist here. - if err := syncParentHooks(pipe); err != nil { - return err - } - - // The reason these operations are done here rather than in finalizeRootfs - // is because the console-handling code gets quite sticky if we have to set - // up the console before doing the pivot_root(2). This is because the - // Console API has to also work with the ExecIn case, which means that the - // API must be able to deal with being inside as well as outside the - // container. It's just cleaner to do this here (at the expense of the - // operation not being perfectly split). - - if err := unix.Chdir(config.Rootfs); err != nil { - return newSystemErrorWithCausef(err, "changing dir to %q", config.Rootfs) - } - - s := iConfig.SpecState - s.Pid = unix.Getpid() - s.Status = specs.StateCreating - if err := iConfig.Config.Hooks[configs.CreateContainer].RunHooks(s); err != nil { - return err - } - - if config.NoPivotRoot { - err = msMoveRoot(config.Rootfs) - } else if config.Namespaces.Contains(configs.NEWNS) { - err = pivotRoot(config.Rootfs) - } else { - err = chroot() - } - if err != nil { - return newSystemErrorWithCause(err, "jailing process inside rootfs") - } - - if setupDev { - if err := reOpenDevNull(); err != nil { - return newSystemErrorWithCause(err, "reopening /dev/null inside container") - } - } - - if cwd := iConfig.Cwd; cwd != "" { - // Note that spec.Process.Cwd can contain unclean value like "../../../../foo/bar...". - // However, we are safe to call MkDirAll directly because we are in the jail here. - if err := os.MkdirAll(cwd, 0o755); err != nil { - return err - } - } - - return nil -} - -// finalizeRootfs sets anything to ro if necessary. You must call -// prepareRootfs first. -func finalizeRootfs(config *configs.Config) (err error) { - // remount dev as ro if specified - for _, m := range config.Mounts { - if libcontainerUtils.CleanPath(m.Destination) == "/dev" { - if m.Flags&unix.MS_RDONLY == unix.MS_RDONLY { - if err := remountReadonly(m); err != nil { - return newSystemErrorWithCausef(err, "remounting %q as readonly", m.Destination) - } - } - break - } - } - - // set rootfs ( / ) as readonly - if config.Readonlyfs { - if err := setReadonly(); err != nil { - return newSystemErrorWithCause(err, "setting rootfs as readonly") - } - } - - if config.Umask != nil { - unix.Umask(int(*config.Umask)) - } else { - unix.Umask(0o022) - } - return nil -} - -// /tmp has to be mounted as private to allow MS_MOVE to work in all situations -func prepareTmp(topTmpDir string) (string, error) { - tmpdir, err := ioutil.TempDir(topTmpDir, "runctop") - if err != nil { - return "", err - } - if err := unix.Mount(tmpdir, tmpdir, "bind", unix.MS_BIND, ""); err != nil { - return "", err - } - if err := unix.Mount("", tmpdir, "", uintptr(unix.MS_PRIVATE), ""); err != nil { - return "", err - } - return tmpdir, nil -} - -func cleanupTmp(tmpdir string) { - _ = unix.Unmount(tmpdir, 0) - _ = os.RemoveAll(tmpdir) -} - -func mountCmd(cmd configs.Command) error { - command := exec.Command(cmd.Path, cmd.Args[:]...) - command.Env = cmd.Env - command.Dir = cmd.Dir - if out, err := command.CombinedOutput(); err != nil { - return fmt.Errorf("%#v failed: %s: %v", cmd, string(out), err) - } - return nil -} - -func prepareBindMount(m *configs.Mount, rootfs string) error { - stat, err := os.Stat(m.Source) - if err != nil { - // error out if the source of a bind mount does not exist as we will be - // unable to bind anything to it. - return err - } - // ensure that the destination of the bind mount is resolved of symlinks at mount time because - // any previous mounts can invalidate the next mount's destination. - // this can happen when a user specifies mounts within other mounts to cause breakouts or other - // evil stuff to try to escape the container's rootfs. - var dest string - if dest, err = securejoin.SecureJoin(rootfs, m.Destination); err != nil { - return err - } - if err := checkProcMount(rootfs, dest, m.Source); err != nil { - return err - } - if err := createIfNotExists(dest, stat.IsDir()); err != nil { - return err - } - - return nil -} - -func mountCgroupV1(m *configs.Mount, c *mountConfig) error { - binds, err := getCgroupMounts(m) - if err != nil { - return err - } - var merged []string - for _, b := range binds { - ss := filepath.Base(b.Destination) - if strings.Contains(ss, ",") { - merged = append(merged, ss) - } - } - tmpfs := &configs.Mount{ - Source: "tmpfs", - Device: "tmpfs", - Destination: m.Destination, - Flags: defaultMountFlags, - Data: "mode=755", - PropagationFlags: m.PropagationFlags, - } - if err := mountToRootfs(tmpfs, c); err != nil { - return err - } - for _, b := range binds { - if c.cgroupns { - subsystemPath := filepath.Join(c.root, b.Destination) - if err := os.MkdirAll(subsystemPath, 0o755); err != nil { - return err - } - if err := utils.WithProcfd(c.root, b.Destination, func(procfd string) error { - flags := defaultMountFlags - if m.Flags&unix.MS_RDONLY != 0 { - flags = flags | unix.MS_RDONLY - } - var ( - source = "cgroup" - data = filepath.Base(subsystemPath) - ) - if data == "systemd" { - data = cgroups.CgroupNamePrefix + data - source = "systemd" - } - return unix.Mount(source, procfd, "cgroup", uintptr(flags), data) - }); err != nil { - return err - } - } else { - if err := mountToRootfs(b, c); err != nil { - return err - } - } - } - for _, mc := range merged { - for _, ss := range strings.Split(mc, ",") { - // symlink(2) is very dumb, it will just shove the path into - // the link and doesn't do any checks or relative path - // conversion. Also, don't error out if the cgroup already exists. - if err := os.Symlink(mc, filepath.Join(c.root, m.Destination, ss)); err != nil && !os.IsExist(err) { - return err - } - } - } - return nil -} - -func mountCgroupV2(m *configs.Mount, c *mountConfig) error { - dest, err := securejoin.SecureJoin(c.root, m.Destination) - if err != nil { - return err - } - if err := os.MkdirAll(dest, 0o755); err != nil { - return err - } - return utils.WithProcfd(c.root, m.Destination, func(procfd string) error { - if err := unix.Mount(m.Source, procfd, "cgroup2", uintptr(m.Flags), m.Data); err != nil { - // when we are in UserNS but CgroupNS is not unshared, we cannot mount cgroup2 (#2158) - if err == unix.EPERM || err == unix.EBUSY { - src := fs2.UnifiedMountpoint - if c.cgroupns && c.cgroup2Path != "" { - // Emulate cgroupns by bind-mounting - // the container cgroup path rather than - // the whole /sys/fs/cgroup. - src = c.cgroup2Path - } - err = unix.Mount(src, procfd, "", uintptr(m.Flags)|unix.MS_BIND, "") - if err == unix.ENOENT && c.rootlessCgroups { - err = nil - } - } - return err - } - return nil - }) -} - -func doTmpfsCopyUp(m *configs.Mount, rootfs, mountLabel string) (Err error) { - // Set up a scratch dir for the tmpfs on the host. - tmpdir, err := prepareTmp("/tmp") - if err != nil { - return newSystemErrorWithCause(err, "tmpcopyup: failed to setup tmpdir") - } - defer cleanupTmp(tmpdir) - tmpDir, err := ioutil.TempDir(tmpdir, "runctmpdir") - if err != nil { - return newSystemErrorWithCause(err, "tmpcopyup: failed to create tmpdir") - } - defer os.RemoveAll(tmpDir) - - // Configure the *host* tmpdir as if it's the container mount. We change - // m.Destination since we are going to mount *on the host*. - oldDest := m.Destination - m.Destination = tmpDir - err = mountPropagate(m, "/", mountLabel) - m.Destination = oldDest - if err != nil { - return err - } - defer func() { - if Err != nil { - if err := unix.Unmount(tmpDir, unix.MNT_DETACH); err != nil { - logrus.Warnf("tmpcopyup: failed to unmount tmpdir on error: %v", err) - } - } - }() - - return utils.WithProcfd(rootfs, m.Destination, func(procfd string) (Err error) { - // Copy the container data to the host tmpdir. We append "/" to force - // CopyDirectory to resolve the symlink rather than trying to copy the - // symlink itself. - if err := fileutils.CopyDirectory(procfd+"/", tmpDir); err != nil { - return fmt.Errorf("tmpcopyup: failed to copy %s to %s (%s): %w", m.Destination, procfd, tmpDir, err) - } - // Now move the mount into the container. - if err := unix.Mount(tmpDir, procfd, "", unix.MS_MOVE, ""); err != nil { - return fmt.Errorf("tmpcopyup: failed to move mount %s to %s (%s): %w", tmpDir, procfd, m.Destination, err) - } - return nil - }) -} - -func mountToRootfs(m *configs.Mount, c *mountConfig) error { - rootfs := c.root - mountLabel := c.label - dest, err := securejoin.SecureJoin(rootfs, m.Destination) - if err != nil { - return err - } - - switch m.Device { - case "proc", "sysfs": - // If the destination already exists and is not a directory, we bail - // out This is to avoid mounting through a symlink or similar -- which - // has been a "fun" attack scenario in the past. - // TODO: This won't be necessary once we switch to libpathrs and we can - // stop all of these symlink-exchange attacks. - if fi, err := os.Lstat(dest); err != nil { - if !os.IsNotExist(err) { - return err - } - } else if fi.Mode()&os.ModeDir == 0 { - return fmt.Errorf("filesystem %q must be mounted on ordinary directory", m.Device) - } - if err := os.MkdirAll(dest, 0o755); err != nil { - return err - } - // Selinux kernels do not support labeling of /proc or /sys - return mountPropagate(m, rootfs, "") - case "mqueue": - if err := os.MkdirAll(dest, 0o755); err != nil { - return err - } - if err := mountPropagate(m, rootfs, ""); err != nil { - return err - } - return label.SetFileLabel(dest, mountLabel) - case "tmpfs": - stat, err := os.Stat(dest) - if err != nil { - if err := os.MkdirAll(dest, 0o755); err != nil { - return err - } - } - - if m.Extensions&configs.EXT_COPYUP == configs.EXT_COPYUP { - err = doTmpfsCopyUp(m, rootfs, mountLabel) - } else { - err = mountPropagate(m, rootfs, mountLabel) - } - if err != nil { - return err - } - if stat != nil { - if err = os.Chmod(dest, stat.Mode()); err != nil { - return err - } - } - // Initially mounted rw in mountPropagate, remount to ro if flag set. - if m.Flags&unix.MS_RDONLY != 0 { - if err := remount(m, rootfs); err != nil { - return err - } - } - return nil - case "bind": - if err := prepareBindMount(m, rootfs); err != nil { - return err - } - if err := mountPropagate(m, rootfs, mountLabel); err != nil { - return err - } - // bind mount won't change mount options, we need remount to make mount options effective. - // first check that we have non-default options required before attempting a remount - if m.Flags&^(unix.MS_REC|unix.MS_REMOUNT|unix.MS_BIND) != 0 { - // only remount if unique mount options are set - if err := remount(m, rootfs); err != nil { - return err - } - } - - if m.Relabel != "" { - if err := label.Validate(m.Relabel); err != nil { - return err - } - shared := label.IsShared(m.Relabel) - if err := label.Relabel(m.Source, mountLabel, shared); err != nil { - return err - } - } - case "cgroup": - if cgroups.IsCgroup2UnifiedMode() { - return mountCgroupV2(m, c) - } - return mountCgroupV1(m, c) - default: - if err := checkProcMount(rootfs, dest, m.Source); err != nil { - return err - } - if err := os.MkdirAll(dest, 0o755); err != nil { - return err - } - return mountPropagate(m, rootfs, mountLabel) - } - return nil -} - -func getCgroupMounts(m *configs.Mount) ([]*configs.Mount, error) { - mounts, err := cgroups.GetCgroupMounts(false) - if err != nil { - return nil, err - } - - cgroupPaths, err := cgroups.ParseCgroupFile("/proc/self/cgroup") - if err != nil { - return nil, err - } - - var binds []*configs.Mount - - for _, mm := range mounts { - dir, err := mm.GetOwnCgroup(cgroupPaths) - if err != nil { - return nil, err - } - relDir, err := filepath.Rel(mm.Root, dir) - if err != nil { - return nil, err - } - binds = append(binds, &configs.Mount{ - Device: "bind", - Source: filepath.Join(mm.Mountpoint, relDir), - Destination: filepath.Join(m.Destination, filepath.Base(mm.Mountpoint)), - Flags: unix.MS_BIND | unix.MS_REC | m.Flags, - PropagationFlags: m.PropagationFlags, - }) - } - - return binds, nil -} - -// checkProcMount checks to ensure that the mount destination is not over the top of /proc. -// dest is required to be an abs path and have any symlinks resolved before calling this function. -// -// if source is nil, don't stat the filesystem. This is used for restore of a checkpoint. -func checkProcMount(rootfs, dest, source string) error { - const procPath = "/proc" - path, err := filepath.Rel(filepath.Join(rootfs, procPath), dest) - if err != nil { - return err - } - // pass if the mount path is located outside of /proc - if strings.HasPrefix(path, "..") { - return nil - } - if path == "." { - // an empty source is pasted on restore - if source == "" { - return nil - } - // only allow a mount on-top of proc if it's source is "proc" - isproc, err := isProc(source) - if err != nil { - return err - } - // pass if the mount is happening on top of /proc and the source of - // the mount is a proc filesystem - if isproc { - return nil - } - return fmt.Errorf("%q cannot be mounted because it is not of type proc", dest) - } - - // Here dest is definitely under /proc. Do not allow those, - // except for a few specific entries emulated by lxcfs. - validProcMounts := []string{ - "/proc/cpuinfo", - "/proc/diskstats", - "/proc/meminfo", - "/proc/stat", - "/proc/swaps", - "/proc/uptime", - "/proc/loadavg", - "/proc/slabinfo", - "/proc/net/dev", - } - for _, valid := range validProcMounts { - path, err := filepath.Rel(filepath.Join(rootfs, valid), dest) - if err != nil { - return err - } - if path == "." { - return nil - } - } - - return fmt.Errorf("%q cannot be mounted because it is inside /proc", dest) -} - -func isProc(path string) (bool, error) { - var s unix.Statfs_t - if err := unix.Statfs(path, &s); err != nil { - return false, err - } - return s.Type == unix.PROC_SUPER_MAGIC, nil -} - -func setupDevSymlinks(rootfs string) error { - links := [][2]string{ - {"/proc/self/fd", "/dev/fd"}, - {"/proc/self/fd/0", "/dev/stdin"}, - {"/proc/self/fd/1", "/dev/stdout"}, - {"/proc/self/fd/2", "/dev/stderr"}, - } - // kcore support can be toggled with CONFIG_PROC_KCORE; only create a symlink - // in /dev if it exists in /proc. - if _, err := os.Stat("/proc/kcore"); err == nil { - links = append(links, [2]string{"/proc/kcore", "/dev/core"}) - } - for _, link := range links { - var ( - src = link[0] - dst = filepath.Join(rootfs, link[1]) - ) - if err := os.Symlink(src, dst); err != nil && !os.IsExist(err) { - return fmt.Errorf("symlink %s %s %s", src, dst, err) - } - } - return nil -} - -// If stdin, stdout, and/or stderr are pointing to `/dev/null` in the parent's rootfs -// this method will make them point to `/dev/null` in this container's rootfs. This -// needs to be called after we chroot/pivot into the container's rootfs so that any -// symlinks are resolved locally. -func reOpenDevNull() error { - var stat, devNullStat unix.Stat_t - file, err := os.OpenFile("/dev/null", os.O_RDWR, 0) - if err != nil { - return fmt.Errorf("Failed to open /dev/null - %s", err) - } - defer file.Close() //nolint: errcheck - if err := unix.Fstat(int(file.Fd()), &devNullStat); err != nil { - return err - } - for fd := 0; fd < 3; fd++ { - if err := unix.Fstat(fd, &stat); err != nil { - return err - } - if stat.Rdev == devNullStat.Rdev { - // Close and re-open the fd. - if err := unix.Dup3(int(file.Fd()), fd, 0); err != nil { - return err - } - } - } - return nil -} - -// Create the device nodes in the container. -func createDevices(config *configs.Config) error { - useBindMount := userns.RunningInUserNS() || config.Namespaces.Contains(configs.NEWUSER) - oldMask := unix.Umask(0o000) - for _, node := range config.Devices { - - // The /dev/ptmx device is setup by setupPtmx() - if utils.CleanPath(node.Path) == "/dev/ptmx" { - continue - } - - // containers running in a user namespace are not allowed to mknod - // devices so we can just bind mount it from the host. - if err := createDeviceNode(config.Rootfs, node, useBindMount); err != nil { - unix.Umask(oldMask) - return err - } - } - unix.Umask(oldMask) - return nil -} - -func bindMountDeviceNode(rootfs, dest string, node *devices.Device) error { - f, err := os.Create(dest) - if err != nil && !os.IsExist(err) { - return err - } - if f != nil { - _ = f.Close() - } - return utils.WithProcfd(rootfs, dest, func(procfd string) error { - return unix.Mount(node.Path, procfd, "bind", unix.MS_BIND, "") - }) -} - -// Creates the device node in the rootfs of the container. -func createDeviceNode(rootfs string, node *devices.Device, bind bool) error { - if node.Path == "" { - // The node only exists for cgroup reasons, ignore it here. - return nil - } - dest, err := securejoin.SecureJoin(rootfs, node.Path) - if err != nil { - return err - } - if err := os.MkdirAll(filepath.Dir(dest), 0o755); err != nil { - return err - } - if bind { - return bindMountDeviceNode(rootfs, dest, node) - } - if err := mknodDevice(dest, node); err != nil { - if os.IsExist(err) { - return nil - } else if os.IsPermission(err) { - return bindMountDeviceNode(rootfs, dest, node) - } - return err - } - return nil -} - -func mknodDevice(dest string, node *devices.Device) error { - fileMode := node.FileMode - switch node.Type { - case devices.BlockDevice: - fileMode |= unix.S_IFBLK - case devices.CharDevice: - fileMode |= unix.S_IFCHR - case devices.FifoDevice: - fileMode |= unix.S_IFIFO - default: - return fmt.Errorf("%c is not a valid device type for device %s", node.Type, node.Path) - } - dev, err := node.Mkdev() - if err != nil { - return err - } - if err := unix.Mknod(dest, uint32(fileMode), int(dev)); err != nil { - return err - } - return unix.Chown(dest, int(node.Uid), int(node.Gid)) -} - -// Get the parent mount point of directory passed in as argument. Also return -// optional fields. -func getParentMount(rootfs string) (string, string, error) { - mi, err := mountinfo.GetMounts(mountinfo.ParentsFilter(rootfs)) - if err != nil { - return "", "", err - } - if len(mi) < 1 { - return "", "", fmt.Errorf("could not find parent mount of %s", rootfs) - } - - // find the longest mount point - var idx, maxlen int - for i := range mi { - if len(mi[i].Mountpoint) > maxlen { - maxlen = len(mi[i].Mountpoint) - idx = i - } - } - return mi[idx].Mountpoint, mi[idx].Optional, nil -} - -// Make parent mount private if it was shared -func rootfsParentMountPrivate(rootfs string) error { - sharedMount := false - - parentMount, optionalOpts, err := getParentMount(rootfs) - if err != nil { - return err - } - - optsSplit := strings.Split(optionalOpts, " ") - for _, opt := range optsSplit { - if strings.HasPrefix(opt, "shared:") { - sharedMount = true - break - } - } - - // Make parent mount PRIVATE if it was shared. It is needed for two - // reasons. First of all pivot_root() will fail if parent mount is - // shared. Secondly when we bind mount rootfs it will propagate to - // parent namespace and we don't want that to happen. - if sharedMount { - return unix.Mount("", parentMount, "", unix.MS_PRIVATE, "") - } - - return nil -} - -func prepareRoot(config *configs.Config) error { - flag := unix.MS_SLAVE | unix.MS_REC - if config.RootPropagation != 0 { - flag = config.RootPropagation - } - if err := unix.Mount("", "/", "", uintptr(flag), ""); err != nil { - return err - } - - // Make parent mount private to make sure following bind mount does - // not propagate in other namespaces. Also it will help with kernel - // check pass in pivot_root. (IS_SHARED(new_mnt->mnt_parent)) - if err := rootfsParentMountPrivate(config.Rootfs); err != nil { - return err - } - - return unix.Mount(config.Rootfs, config.Rootfs, "bind", unix.MS_BIND|unix.MS_REC, "") -} - -func setReadonly() error { - flags := uintptr(unix.MS_BIND | unix.MS_REMOUNT | unix.MS_RDONLY) - - err := unix.Mount("", "/", "", flags, "") - if err == nil { - return nil - } - var s unix.Statfs_t - if err := unix.Statfs("/", &s); err != nil { - return &os.PathError{Op: "statfs", Path: "/", Err: err} - } - flags |= uintptr(s.Flags) - return unix.Mount("", "/", "", flags, "") -} - -func setupPtmx(config *configs.Config) error { - ptmx := filepath.Join(config.Rootfs, "dev/ptmx") - if err := os.Remove(ptmx); err != nil && !os.IsNotExist(err) { - return err - } - if err := os.Symlink("pts/ptmx", ptmx); err != nil { - return fmt.Errorf("symlink dev ptmx %s", err) - } - return nil -} - -// pivotRoot will call pivot_root such that rootfs becomes the new root -// filesystem, and everything else is cleaned up. -func pivotRoot(rootfs string) error { - // While the documentation may claim otherwise, pivot_root(".", ".") is - // actually valid. What this results in is / being the new root but - // /proc/self/cwd being the old root. Since we can play around with the cwd - // with pivot_root this allows us to pivot without creating directories in - // the rootfs. Shout-outs to the LXC developers for giving us this idea. - - oldroot, err := unix.Open("/", unix.O_DIRECTORY|unix.O_RDONLY, 0) - if err != nil { - return err - } - defer unix.Close(oldroot) //nolint: errcheck - - newroot, err := unix.Open(rootfs, unix.O_DIRECTORY|unix.O_RDONLY, 0) - if err != nil { - return err - } - defer unix.Close(newroot) //nolint: errcheck - - // Change to the new root so that the pivot_root actually acts on it. - if err := unix.Fchdir(newroot); err != nil { - return err - } - - if err := unix.PivotRoot(".", "."); err != nil { - return fmt.Errorf("pivot_root %s", err) - } - - // Currently our "." is oldroot (according to the current kernel code). - // However, purely for safety, we will fchdir(oldroot) since there isn't - // really any guarantee from the kernel what /proc/self/cwd will be after a - // pivot_root(2). - - if err := unix.Fchdir(oldroot); err != nil { - return err - } - - // Make oldroot rslave to make sure our unmounts don't propagate to the - // host (and thus bork the machine). We don't use rprivate because this is - // known to cause issues due to races where we still have a reference to a - // mount while a process in the host namespace are trying to operate on - // something they think has no mounts (devicemapper in particular). - if err := unix.Mount("", ".", "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil { - return err - } - // Preform the unmount. MNT_DETACH allows us to unmount /proc/self/cwd. - if err := unix.Unmount(".", unix.MNT_DETACH); err != nil { - return err - } - - // Switch back to our shiny new root. - if err := unix.Chdir("/"); err != nil { - return fmt.Errorf("chdir / %s", err) - } - return nil -} - -func msMoveRoot(rootfs string) error { - // Before we move the root and chroot we have to mask all "full" sysfs and - // procfs mounts which exist on the host. This is because while the kernel - // has protections against mounting procfs if it has masks, when using - // chroot(2) the *host* procfs mount is still reachable in the mount - // namespace and the kernel permits procfs mounts inside --no-pivot - // containers. - // - // Users shouldn't be using --no-pivot except in exceptional circumstances, - // but to avoid such a trivial security flaw we apply a best-effort - // protection here. The kernel only allows a mount of a pseudo-filesystem - // like procfs or sysfs if there is a *full* mount (the root of the - // filesystem is mounted) without any other locked mount points covering a - // subtree of the mount. - // - // So we try to unmount (or mount tmpfs on top of) any mountpoint which is - // a full mount of either sysfs or procfs (since those are the most - // concerning filesystems to us). - mountinfos, err := mountinfo.GetMounts(func(info *mountinfo.Info) (skip, stop bool) { - // Collect every sysfs and procfs filesystem, except for those which - // are non-full mounts or are inside the rootfs of the container. - if info.Root != "/" || - (info.FSType != "proc" && info.FSType != "sysfs") || - strings.HasPrefix(info.Mountpoint, rootfs) { - skip = true - } - return - }) - if err != nil { - return err - } - for _, info := range mountinfos { - p := info.Mountpoint - // Be sure umount events are not propagated to the host. - if err := unix.Mount("", p, "", unix.MS_SLAVE|unix.MS_REC, ""); err != nil { - if err == unix.ENOENT { - // If the mountpoint doesn't exist that means that we've - // already blasted away some parent directory of the mountpoint - // and so we don't care about this error. - continue - } - return err - } - if err := unix.Unmount(p, unix.MNT_DETACH); err != nil { - if err != unix.EINVAL && err != unix.EPERM { - return err - } else { - // If we have not privileges for umounting (e.g. rootless), then - // cover the path. - if err := unix.Mount("tmpfs", p, "tmpfs", 0, ""); err != nil { - return err - } - } - } - } - - // Move the rootfs on top of "/" in our mount namespace. - if err := unix.Mount(rootfs, "/", "", unix.MS_MOVE, ""); err != nil { - return err - } - return chroot() -} - -func chroot() error { - if err := unix.Chroot("."); err != nil { - return err - } - return unix.Chdir("/") -} - -// createIfNotExists creates a file or a directory only if it does not already exist. -func createIfNotExists(path string, isDir bool) error { - if _, err := os.Stat(path); err != nil { - if os.IsNotExist(err) { - if isDir { - return os.MkdirAll(path, 0o755) - } - if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { - return err - } - f, err := os.OpenFile(path, os.O_CREATE, 0o755) - if err != nil { - return err - } - _ = f.Close() - } - } - return nil -} - -// readonlyPath will make a path read only. -func readonlyPath(path string) error { - if err := unix.Mount(path, path, "", unix.MS_BIND|unix.MS_REC, ""); err != nil { - if os.IsNotExist(err) { - return nil - } - return &os.PathError{Op: "bind-mount", Path: path, Err: err} - } - - var s unix.Statfs_t - if err := unix.Statfs(path, &s); err != nil { - return &os.PathError{Op: "statfs", Path: path, Err: err} - } - flags := uintptr(s.Flags) & (unix.MS_NOSUID | unix.MS_NODEV | unix.MS_NOEXEC) - - if err := unix.Mount(path, path, "", flags|unix.MS_BIND|unix.MS_REMOUNT|unix.MS_RDONLY, ""); err != nil { - return &os.PathError{Op: "bind-mount-ro", Path: path, Err: err} - } - - return nil -} - -// remountReadonly will remount an existing mount point and ensure that it is read-only. -func remountReadonly(m *configs.Mount) error { - var ( - dest = m.Destination - flags = m.Flags - ) - for i := 0; i < 5; i++ { - // There is a special case in the kernel for - // MS_REMOUNT | MS_BIND, which allows us to change only the - // flags even as an unprivileged user (i.e. user namespace) - // assuming we don't drop any security related flags (nodev, - // nosuid, etc.). So, let's use that case so that we can do - // this re-mount without failing in a userns. - flags |= unix.MS_REMOUNT | unix.MS_BIND | unix.MS_RDONLY - if err := unix.Mount("", dest, "", uintptr(flags), ""); err != nil { - switch err { - case unix.EBUSY: - time.Sleep(100 * time.Millisecond) - continue - default: - return err - } - } - return nil - } - return fmt.Errorf("unable to mount %s as readonly max retries reached", dest) -} - -// maskPath masks the top of the specified path inside a container to avoid -// security issues from processes reading information from non-namespace aware -// mounts ( proc/kcore ). -// For files, maskPath bind mounts /dev/null over the top of the specified path. -// For directories, maskPath mounts read-only tmpfs over the top of the specified path. -func maskPath(path string, mountLabel string) error { - if err := unix.Mount("/dev/null", path, "", unix.MS_BIND, ""); err != nil && !os.IsNotExist(err) { - if err == unix.ENOTDIR { - return unix.Mount("tmpfs", path, "tmpfs", unix.MS_RDONLY, label.FormatMountLabel("", mountLabel)) - } - return err - } - return nil -} - -// writeSystemProperty writes the value to a path under /proc/sys as determined from the key. -// For e.g. net.ipv4.ip_forward translated to /proc/sys/net/ipv4/ip_forward. -func writeSystemProperty(key, value string) error { - keyPath := strings.Replace(key, ".", "/", -1) - return ioutil.WriteFile(path.Join("/proc/sys", keyPath), []byte(value), 0o644) -} - -func remount(m *configs.Mount, rootfs string) error { - return utils.WithProcfd(rootfs, m.Destination, func(procfd string) error { - return unix.Mount(m.Source, procfd, m.Device, uintptr(m.Flags|unix.MS_REMOUNT), "") - }) -} - -// Do the mount operation followed by additional mounts required to take care -// of propagation flags. This will always be scoped inside the container rootfs. -func mountPropagate(m *configs.Mount, rootfs string, mountLabel string) error { - var ( - data = label.FormatMountLabel(m.Data, mountLabel) - flags = m.Flags - ) - // Delay mounting the filesystem read-only if we need to do further - // operations on it. We need to set up files in "/dev" and tmpfs mounts may - // need to be chmod-ed after mounting. The mount will be remounted ro later - // in finalizeRootfs() if necessary. - if libcontainerUtils.CleanPath(m.Destination) == "/dev" || m.Device == "tmpfs" { - flags &= ^unix.MS_RDONLY - } - - // Because the destination is inside a container path which might be - // mutating underneath us, we verify that we are actually going to mount - // inside the container with WithProcfd() -- mounting through a procfd - // mounts on the target. - if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error { - return unix.Mount(m.Source, procfd, m.Device, uintptr(flags), data) - }); err != nil { - return fmt.Errorf("mount through procfd: %w", err) - } - // We have to apply mount propagation flags in a separate WithProcfd() call - // because the previous call invalidates the passed procfd -- the mount - // target needs to be re-opened. - if err := utils.WithProcfd(rootfs, m.Destination, func(procfd string) error { - for _, pflag := range m.PropagationFlags { - if err := unix.Mount("", procfd, "", uintptr(pflag), ""); err != nil { - return err - } - } - return nil - }); err != nil { - return fmt.Errorf("change mount propagation through procfd: %w", err) - } - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go deleted file mode 100644 index b54b7eead3..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/config.go +++ /dev/null @@ -1,77 +0,0 @@ -package seccomp - -import ( - "fmt" - - "github.com/opencontainers/runc/libcontainer/configs" -) - -var operators = map[string]configs.Operator{ - "SCMP_CMP_NE": configs.NotEqualTo, - "SCMP_CMP_LT": configs.LessThan, - "SCMP_CMP_LE": configs.LessThanOrEqualTo, - "SCMP_CMP_EQ": configs.EqualTo, - "SCMP_CMP_GE": configs.GreaterThanOrEqualTo, - "SCMP_CMP_GT": configs.GreaterThan, - "SCMP_CMP_MASKED_EQ": configs.MaskEqualTo, -} - -var actions = map[string]configs.Action{ - "SCMP_ACT_KILL": configs.Kill, - "SCMP_ACT_ERRNO": configs.Errno, - "SCMP_ACT_TRAP": configs.Trap, - "SCMP_ACT_ALLOW": configs.Allow, - "SCMP_ACT_TRACE": configs.Trace, - "SCMP_ACT_LOG": configs.Log, -} - -var archs = map[string]string{ - "SCMP_ARCH_X86": "x86", - "SCMP_ARCH_X86_64": "amd64", - "SCMP_ARCH_X32": "x32", - "SCMP_ARCH_ARM": "arm", - "SCMP_ARCH_AARCH64": "arm64", - "SCMP_ARCH_MIPS": "mips", - "SCMP_ARCH_MIPS64": "mips64", - "SCMP_ARCH_MIPS64N32": "mips64n32", - "SCMP_ARCH_MIPSEL": "mipsel", - "SCMP_ARCH_MIPSEL64": "mipsel64", - "SCMP_ARCH_MIPSEL64N32": "mipsel64n32", - "SCMP_ARCH_PPC": "ppc", - "SCMP_ARCH_PPC64": "ppc64", - "SCMP_ARCH_PPC64LE": "ppc64le", - "SCMP_ARCH_S390": "s390", - "SCMP_ARCH_S390X": "s390x", -} - -// ConvertStringToOperator converts a string into a Seccomp comparison operator. -// Comparison operators use the names they are assigned by Libseccomp's header. -// Attempting to convert a string that is not a valid operator results in an -// error. -func ConvertStringToOperator(in string) (configs.Operator, error) { - if op, ok := operators[in]; ok { - return op, nil - } - return 0, fmt.Errorf("string %s is not a valid operator for seccomp", in) -} - -// ConvertStringToAction converts a string into a Seccomp rule match action. -// Actions use the names they are assigned in Libseccomp's header, though some -// (notable, SCMP_ACT_TRACE) are not available in this implementation and will -// return errors. -// Attempting to convert a string that is not a valid action results in an -// error. -func ConvertStringToAction(in string) (configs.Action, error) { - if act, ok := actions[in]; ok { - return act, nil - } - return 0, fmt.Errorf("string %s is not a valid action for seccomp", in) -} - -// ConvertStringToArch converts a string into a Seccomp comparison arch. -func ConvertStringToArch(in string) (string, error) { - if arch, ok := archs[in]; ok { - return arch, nil - } - return "", fmt.Errorf("string %s is not a valid arch for seccomp", in) -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_linux.go deleted file mode 100644 index 3c6ef7a4c6..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_linux.go +++ /dev/null @@ -1,660 +0,0 @@ -// +build linux,cgo,seccomp - -package patchbpf - -import ( - "bytes" - "encoding/binary" - "io" - "os" - "runtime" - "unsafe" - - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/utils" - - "github.com/pkg/errors" - libseccomp "github.com/seccomp/libseccomp-golang" - "github.com/sirupsen/logrus" - "golang.org/x/net/bpf" - "golang.org/x/sys/unix" -) - -// #cgo pkg-config: libseccomp -/* -#include -#include -#include -#include - -const uint32_t C_ACT_ERRNO_ENOSYS = SCMP_ACT_ERRNO(ENOSYS); - -// Copied from . - -#ifndef SECCOMP_SET_MODE_FILTER -# define SECCOMP_SET_MODE_FILTER 1 -#endif -const uintptr_t C_SET_MODE_FILTER = SECCOMP_SET_MODE_FILTER; - -#ifndef SECCOMP_FILTER_FLAG_LOG -# define SECCOMP_FILTER_FLAG_LOG (1UL << 1) -#endif -const uintptr_t C_FILTER_FLAG_LOG = SECCOMP_FILTER_FLAG_LOG; - -// We use the AUDIT_ARCH_* values because those are the ones used by the kernel -// and SCMP_ARCH_* sometimes has fake values (such as SCMP_ARCH_X32). But we -// use so we get libseccomp's fallback definitions of AUDIT_ARCH_*. - -const uint32_t C_AUDIT_ARCH_I386 = AUDIT_ARCH_I386; -const uint32_t C_AUDIT_ARCH_X86_64 = AUDIT_ARCH_X86_64; -const uint32_t C_AUDIT_ARCH_ARM = AUDIT_ARCH_ARM; -const uint32_t C_AUDIT_ARCH_AARCH64 = AUDIT_ARCH_AARCH64; -const uint32_t C_AUDIT_ARCH_MIPS = AUDIT_ARCH_MIPS; -const uint32_t C_AUDIT_ARCH_MIPS64 = AUDIT_ARCH_MIPS64; -const uint32_t C_AUDIT_ARCH_MIPS64N32 = AUDIT_ARCH_MIPS64N32; -const uint32_t C_AUDIT_ARCH_MIPSEL = AUDIT_ARCH_MIPSEL; -const uint32_t C_AUDIT_ARCH_MIPSEL64 = AUDIT_ARCH_MIPSEL64; -const uint32_t C_AUDIT_ARCH_MIPSEL64N32 = AUDIT_ARCH_MIPSEL64N32; -const uint32_t C_AUDIT_ARCH_PPC = AUDIT_ARCH_PPC; -const uint32_t C_AUDIT_ARCH_PPC64 = AUDIT_ARCH_PPC64; -const uint32_t C_AUDIT_ARCH_PPC64LE = AUDIT_ARCH_PPC64LE; -const uint32_t C_AUDIT_ARCH_S390 = AUDIT_ARCH_S390; -const uint32_t C_AUDIT_ARCH_S390X = AUDIT_ARCH_S390X; -*/ -import "C" - -var retErrnoEnosys = uint32(C.C_ACT_ERRNO_ENOSYS) - -func isAllowAction(action configs.Action) bool { - switch action { - // Trace is considered an "allow" action because a good tracer should - // support future syscalls (by handling -ENOSYS on its own), and giving - // -ENOSYS will be disruptive for emulation. - case configs.Allow, configs.Log, configs.Trace: - return true - default: - return false - } -} - -func parseProgram(rdr io.Reader) ([]bpf.RawInstruction, error) { - var program []bpf.RawInstruction -loop: - for { - // Read the next instruction. We have to use NativeEndian because - // seccomp_export_bpf outputs the program in *host* endian-ness. - var insn unix.SockFilter - if err := binary.Read(rdr, utils.NativeEndian, &insn); err != nil { - switch err { - case io.EOF: - // Parsing complete. - break loop - case io.ErrUnexpectedEOF: - // Parsing stopped mid-instruction. - return nil, errors.Wrap(err, "program parsing halted mid-instruction") - default: - // All other errors. - return nil, errors.Wrap(err, "parsing instructions") - } - } - program = append(program, bpf.RawInstruction{ - Op: insn.Code, - Jt: insn.Jt, - Jf: insn.Jf, - K: insn.K, - }) - } - return program, nil -} - -func disassembleFilter(filter *libseccomp.ScmpFilter) ([]bpf.Instruction, error) { - rdr, wtr, err := os.Pipe() - if err != nil { - return nil, errors.Wrap(err, "creating scratch pipe") - } - defer wtr.Close() - defer rdr.Close() - - readerBuffer := new(bytes.Buffer) - errChan := make(chan error, 1) - go func() { - _, err := io.Copy(readerBuffer, rdr) - errChan <- err - close(errChan) - }() - - if err := filter.ExportBPF(wtr); err != nil { - return nil, errors.Wrap(err, "exporting BPF") - } - // Close so that the reader actually gets EOF. - _ = wtr.Close() - - if copyErr := <-errChan; copyErr != nil { - return nil, errors.Wrap(copyErr, "reading from ExportBPF pipe") - } - - // Parse the instructions. - rawProgram, err := parseProgram(readerBuffer) - if err != nil { - return nil, errors.Wrap(err, "parsing generated BPF filter") - } - program, ok := bpf.Disassemble(rawProgram) - if !ok { - return nil, errors.Errorf("could not disassemble entire BPF filter") - } - return program, nil -} - -type nativeArch uint32 - -const invalidArch nativeArch = 0 - -func archToNative(arch libseccomp.ScmpArch) (nativeArch, error) { - switch arch { - case libseccomp.ArchNative: - // Convert to actual native architecture. - arch, err := libseccomp.GetNativeArch() - if err != nil { - return invalidArch, errors.Wrap(err, "get native arch") - } - return archToNative(arch) - case libseccomp.ArchX86: - return nativeArch(C.C_AUDIT_ARCH_I386), nil - case libseccomp.ArchAMD64, libseccomp.ArchX32: - // NOTE: x32 is treated like x86_64 except all x32 syscalls have the - // 30th bit of the syscall number set to indicate that it's not a - // normal x86_64 syscall. - return nativeArch(C.C_AUDIT_ARCH_X86_64), nil - case libseccomp.ArchARM: - return nativeArch(C.C_AUDIT_ARCH_ARM), nil - case libseccomp.ArchARM64: - return nativeArch(C.C_AUDIT_ARCH_AARCH64), nil - case libseccomp.ArchMIPS: - return nativeArch(C.C_AUDIT_ARCH_MIPS), nil - case libseccomp.ArchMIPS64: - return nativeArch(C.C_AUDIT_ARCH_MIPS64), nil - case libseccomp.ArchMIPS64N32: - return nativeArch(C.C_AUDIT_ARCH_MIPS64N32), nil - case libseccomp.ArchMIPSEL: - return nativeArch(C.C_AUDIT_ARCH_MIPSEL), nil - case libseccomp.ArchMIPSEL64: - return nativeArch(C.C_AUDIT_ARCH_MIPSEL64), nil - case libseccomp.ArchMIPSEL64N32: - return nativeArch(C.C_AUDIT_ARCH_MIPSEL64N32), nil - case libseccomp.ArchPPC: - return nativeArch(C.C_AUDIT_ARCH_PPC), nil - case libseccomp.ArchPPC64: - return nativeArch(C.C_AUDIT_ARCH_PPC64), nil - case libseccomp.ArchPPC64LE: - return nativeArch(C.C_AUDIT_ARCH_PPC64LE), nil - case libseccomp.ArchS390: - return nativeArch(C.C_AUDIT_ARCH_S390), nil - case libseccomp.ArchS390X: - return nativeArch(C.C_AUDIT_ARCH_S390X), nil - default: - return invalidArch, errors.Errorf("unknown architecture: %v", arch) - } -} - -type lastSyscallMap map[nativeArch]map[libseccomp.ScmpArch]libseccomp.ScmpSyscall - -// Figure out largest syscall number referenced in the filter for each -// architecture. We will be generating code based on the native architecture -// representation, but SCMP_ARCH_X32 means we have to track cases where the -// same architecture has different largest syscalls based on the mode. -func findLastSyscalls(config *configs.Seccomp) (lastSyscallMap, error) { - lastSyscalls := make(lastSyscallMap) - // Only loop over architectures which are present in the filter. Any other - // architectures will get the libseccomp bad architecture action anyway. - for _, ociArch := range config.Architectures { - arch, err := libseccomp.GetArchFromString(ociArch) - if err != nil { - return nil, errors.Wrap(err, "validating seccomp architecture") - } - - // Map native architecture to a real architecture value to avoid - // doubling-up the lastSyscall mapping. - if arch == libseccomp.ArchNative { - nativeArch, err := libseccomp.GetNativeArch() - if err != nil { - return nil, errors.Wrap(err, "get native arch") - } - arch = nativeArch - } - - // Figure out native architecture representation of the architecture. - nativeArch, err := archToNative(arch) - if err != nil { - return nil, errors.Wrapf(err, "cannot map architecture %v to AUDIT_ARCH_ constant", arch) - } - - if _, ok := lastSyscalls[nativeArch]; !ok { - lastSyscalls[nativeArch] = map[libseccomp.ScmpArch]libseccomp.ScmpSyscall{} - } - if _, ok := lastSyscalls[nativeArch][arch]; ok { - // Because of ArchNative we may hit the same entry multiple times. - // Just skip it if we've seen this (nativeArch, ScmpArch) - // combination before. - continue - } - - // Find the largest syscall in the filter for this architecture. - var largestSyscall libseccomp.ScmpSyscall - for _, rule := range config.Syscalls { - sysno, err := libseccomp.GetSyscallFromNameByArch(rule.Name, arch) - if err != nil { - // Ignore unknown syscalls. - continue - } - if sysno > largestSyscall { - largestSyscall = sysno - } - } - if largestSyscall != 0 { - lastSyscalls[nativeArch][arch] = largestSyscall - } else { - logrus.Warnf("could not find any syscalls for arch %s", ociArch) - delete(lastSyscalls[nativeArch], arch) - } - } - return lastSyscalls, nil -} - -// FIXME FIXME FIXME -// -// This solution is less than ideal. In the future it would be great to have -// per-arch information about which syscalls were added in which kernel -// versions so we can create far more accurate filter rules (handling holes in -// the syscall table and determining -ENOSYS requirements based on kernel -// minimum version alone. -// -// This implementation can in principle cause issues with syscalls like -// close_range(2) which were added out-of-order in the syscall table between -// kernel releases. -func generateEnosysStub(lastSyscalls lastSyscallMap) ([]bpf.Instruction, error) { - // A jump-table for each nativeArch used to generate the initial - // conditional jumps -- measured from the *END* of the program so they - // remain valid after prepending to the tail. - archJumpTable := map[nativeArch]uint32{} - - // Generate our own -ENOSYS rules for each architecture. They have to be - // generated in reverse (prepended to the tail of the program) because the - // JumpIf jumps need to be computed from the end of the program. - programTail := []bpf.Instruction{ - // Fall-through rules jump into the filter. - bpf.Jump{Skip: 1}, - // Rules which jump to here get -ENOSYS. - bpf.RetConstant{Val: retErrnoEnosys}, - } - - // Generate the syscall -ENOSYS rules. - for nativeArch, maxSyscalls := range lastSyscalls { - // The number of instructions from the tail of this section which need - // to be jumped in order to reach the -ENOSYS return. If the section - // does not jump, it will fall through to the actual filter. - baseJumpEnosys := uint32(len(programTail) - 1) - baseJumpFilter := baseJumpEnosys + 1 - - // Add the load instruction for the syscall number -- we jump here - // directly from the arch code so we need to do it here. Sadly we can't - // share this code between architecture branches. - section := []bpf.Instruction{ - // load [0] - bpf.LoadAbsolute{Off: 0, Size: 4}, // NOTE: We assume sizeof(int) == 4. - } - - switch len(maxSyscalls) { - case 0: - // No syscalls found for this arch -- skip it and move on. - continue - case 1: - // Get the only syscall in the map. - var sysno libseccomp.ScmpSyscall - for _, no := range maxSyscalls { - sysno = no - } - - // The simplest case just boils down to a single jgt instruction, - // with special handling if baseJumpEnosys is larger than 255 (and - // thus a long jump is required). - var sectionTail []bpf.Instruction - if baseJumpEnosys+1 <= 255 { - sectionTail = []bpf.Instruction{ - // jgt [syscall],[baseJumpEnosys+1] - bpf.JumpIf{ - Cond: bpf.JumpGreaterThan, - Val: uint32(sysno), - SkipTrue: uint8(baseJumpEnosys + 1), - }, - // ja [baseJumpFilter] - bpf.Jump{Skip: baseJumpFilter}, - } - } else { - sectionTail = []bpf.Instruction{ - // jle [syscall],1 - bpf.JumpIf{Cond: bpf.JumpLessOrEqual, Val: uint32(sysno), SkipTrue: 1}, - // ja [baseJumpEnosys+1] - bpf.Jump{Skip: baseJumpEnosys + 1}, - // ja [baseJumpFilter] - bpf.Jump{Skip: baseJumpFilter}, - } - } - - // If we're on x86 we need to add a check for x32 and if we're in - // the wrong mode we jump over the section. - if uint32(nativeArch) == uint32(C.C_AUDIT_ARCH_X86_64) { - // Grab the only architecture in the map. - var scmpArch libseccomp.ScmpArch - for arch := range maxSyscalls { - scmpArch = arch - } - - // Generate a prefix to check the mode. - switch scmpArch { - case libseccomp.ArchAMD64: - sectionTail = append([]bpf.Instruction{ - // jset (1<<30),[len(tail)-1] - bpf.JumpIf{ - Cond: bpf.JumpBitsSet, - Val: 1 << 30, - SkipTrue: uint8(len(sectionTail) - 1), - }, - }, sectionTail...) - case libseccomp.ArchX32: - sectionTail = append([]bpf.Instruction{ - // jset (1<<30),0,[len(tail)-1] - bpf.JumpIf{ - Cond: bpf.JumpBitsNotSet, - Val: 1 << 30, - SkipTrue: uint8(len(sectionTail) - 1), - }, - }, sectionTail...) - default: - return nil, errors.Errorf("unknown amd64 native architecture %#x", scmpArch) - } - } - - section = append(section, sectionTail...) - case 2: - // x32 and x86_64 are a unique case, we can't handle any others. - if uint32(nativeArch) != uint32(C.C_AUDIT_ARCH_X86_64) { - return nil, errors.Errorf("unknown architecture overlap on native arch %#x", nativeArch) - } - - x32sysno, ok := maxSyscalls[libseccomp.ArchX32] - if !ok { - return nil, errors.Errorf("missing %v in overlapping x86_64 arch: %v", libseccomp.ArchX32, maxSyscalls) - } - x86sysno, ok := maxSyscalls[libseccomp.ArchAMD64] - if !ok { - return nil, errors.Errorf("missing %v in overlapping x86_64 arch: %v", libseccomp.ArchAMD64, maxSyscalls) - } - - // The x32 ABI indicates that a syscall is being made by an x32 - // process by setting the 30th bit of the syscall number, but we - // need to do some special-casing depending on whether we need to - // do long jumps. - if baseJumpEnosys+2 <= 255 { - // For the simple case we want to have something like: - // jset (1<<30),1 - // jgt [x86 syscall],[baseJumpEnosys+2],1 - // jgt [x32 syscall],[baseJumpEnosys+1] - // ja [baseJumpFilter] - section = append(section, []bpf.Instruction{ - // jset (1<<30),1 - bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 1 << 30, SkipTrue: 1}, - // jgt [x86 syscall],[baseJumpEnosys+1],1 - bpf.JumpIf{ - Cond: bpf.JumpGreaterThan, - Val: uint32(x86sysno), - SkipTrue: uint8(baseJumpEnosys + 2), SkipFalse: 1, - }, - // jgt [x32 syscall],[baseJumpEnosys] - bpf.JumpIf{ - Cond: bpf.JumpGreaterThan, - Val: uint32(x32sysno), - SkipTrue: uint8(baseJumpEnosys + 1), - }, - // ja [baseJumpFilter] - bpf.Jump{Skip: baseJumpFilter}, - }...) - } else { - // But if the [baseJumpEnosys+2] jump is larger than 255 we - // need to do a long jump like so: - // jset (1<<30),1 - // jgt [x86 syscall],1,2 - // jle [x32 syscall],1 - // ja [baseJumpEnosys+1] - // ja [baseJumpFilter] - section = append(section, []bpf.Instruction{ - // jset (1<<30),1 - bpf.JumpIf{Cond: bpf.JumpBitsSet, Val: 1 << 30, SkipTrue: 1}, - // jgt [x86 syscall],1,2 - bpf.JumpIf{ - Cond: bpf.JumpGreaterThan, - Val: uint32(x86sysno), - SkipTrue: 1, SkipFalse: 2, - }, - // jle [x32 syscall],[baseJumpEnosys] - bpf.JumpIf{ - Cond: bpf.JumpLessOrEqual, - Val: uint32(x32sysno), - SkipTrue: 1, - }, - // ja [baseJumpEnosys+1] - bpf.Jump{Skip: baseJumpEnosys + 1}, - // ja [baseJumpFilter] - bpf.Jump{Skip: baseJumpFilter}, - }...) - } - default: - return nil, errors.Errorf("invalid number of architecture overlaps: %v", len(maxSyscalls)) - } - - // Prepend this section to the tail. - programTail = append(section, programTail...) - - // Update jump table. - archJumpTable[nativeArch] = uint32(len(programTail)) - } - - // Add a dummy "jump to filter" for any architecture we might miss below. - // Such architectures will probably get the BadArch action of the filter - // regardless. - programTail = append([]bpf.Instruction{ - // ja [end of stub and start of filter] - bpf.Jump{Skip: uint32(len(programTail))}, - }, programTail...) - - // Generate the jump rules for each architecture. This has to be done in - // reverse as well for the same reason as above. We add to programTail - // directly because the jumps are impacted by each architecture rule we add - // as well. - // - // TODO: Maybe we want to optimise to avoid long jumps here? So sort the - // architectures based on how large the jumps are going to be, or - // re-sort the candidate architectures each time to make sure that we - // pick the largest jump which is going to be smaller than 255. - for nativeArch := range lastSyscalls { - // We jump forwards but the jump table is calculated from the *END*. - jump := uint32(len(programTail)) - archJumpTable[nativeArch] - - // Same routine as above -- this is a basic jeq check, complicated - // slightly if it turns out that we need to do a long jump. - if jump <= 255 { - programTail = append([]bpf.Instruction{ - // jeq [arch],[jump] - bpf.JumpIf{ - Cond: bpf.JumpEqual, - Val: uint32(nativeArch), - SkipTrue: uint8(jump), - }, - }, programTail...) - } else { - programTail = append([]bpf.Instruction{ - // jne [arch],1 - bpf.JumpIf{ - Cond: bpf.JumpNotEqual, - Val: uint32(nativeArch), - SkipTrue: 1, - }, - // ja [jump] - bpf.Jump{Skip: jump}, - }, programTail...) - } - } - - // Prepend the load instruction for the architecture. - programTail = append([]bpf.Instruction{ - // load [4] - bpf.LoadAbsolute{Off: 4, Size: 4}, // NOTE: We assume sizeof(int) == 4. - }, programTail...) - - // And that's all folks! - return programTail, nil -} - -func assemble(program []bpf.Instruction) ([]unix.SockFilter, error) { - rawProgram, err := bpf.Assemble(program) - if err != nil { - return nil, errors.Wrap(err, "assembling program") - } - - // Convert to []unix.SockFilter for unix.SockFilter. - var filter []unix.SockFilter - for _, insn := range rawProgram { - filter = append(filter, unix.SockFilter{ - Code: insn.Op, - Jt: insn.Jt, - Jf: insn.Jf, - K: insn.K, - }) - } - return filter, nil -} - -func generatePatch(config *configs.Seccomp) ([]bpf.Instruction, error) { - // Patch the generated cBPF only when there is not a defaultErrnoRet set - // and it is different from ENOSYS - if config.DefaultErrnoRet != nil && *config.DefaultErrnoRet == uint(retErrnoEnosys) { - return nil, nil - } - // We only add the stub if the default action is not permissive. - if isAllowAction(config.DefaultAction) { - logrus.Debugf("seccomp: skipping -ENOSYS stub filter generation") - return nil, nil - } - - lastSyscalls, err := findLastSyscalls(config) - if err != nil { - return nil, errors.Wrap(err, "finding last syscalls for -ENOSYS stub") - } - stubProgram, err := generateEnosysStub(lastSyscalls) - if err != nil { - return nil, errors.Wrap(err, "generating -ENOSYS stub") - } - return stubProgram, nil -} - -func enosysPatchFilter(config *configs.Seccomp, filter *libseccomp.ScmpFilter) ([]unix.SockFilter, error) { - program, err := disassembleFilter(filter) - if err != nil { - return nil, errors.Wrap(err, "disassembling original filter") - } - - patch, err := generatePatch(config) - if err != nil { - return nil, errors.Wrap(err, "generating patch for filter") - } - fullProgram := append(patch, program...) - - logrus.Debugf("seccomp: prepending -ENOSYS stub filter to user filter...") - for idx, insn := range patch { - logrus.Debugf(" [%4.1d] %s", idx, insn) - } - logrus.Debugf(" [....] --- original filter ---") - - fprog, err := assemble(fullProgram) - if err != nil { - return nil, errors.Wrap(err, "assembling modified filter") - } - return fprog, nil -} - -func filterFlags(filter *libseccomp.ScmpFilter) (flags uint, noNewPrivs bool, err error) { - // Ignore the error since pre-2.4 libseccomp is treated as API level 0. - apiLevel, _ := libseccomp.GetApi() - - noNewPrivs, err = filter.GetNoNewPrivsBit() - if err != nil { - return 0, false, errors.Wrap(err, "fetch no_new_privs filter bit") - } - - if apiLevel >= 3 { - if logBit, err := filter.GetLogBit(); err != nil { - return 0, false, errors.Wrap(err, "fetch SECCOMP_FILTER_FLAG_LOG bit") - } else if logBit { - flags |= uint(C.C_FILTER_FLAG_LOG) - } - } - - // TODO: Support seccomp flags not yet added to libseccomp-golang... - return -} - -func sysSeccompSetFilter(flags uint, filter []unix.SockFilter) (err error) { - fprog := unix.SockFprog{ - Len: uint16(len(filter)), - Filter: &filter[0], - } - // If no seccomp flags were requested we can use the old-school prctl(2). - if flags == 0 { - err = unix.Prctl(unix.PR_SET_SECCOMP, - unix.SECCOMP_MODE_FILTER, - uintptr(unsafe.Pointer(&fprog)), 0, 0) - } else { - _, _, errno := unix.RawSyscall(unix.SYS_SECCOMP, - uintptr(C.C_SET_MODE_FILTER), - uintptr(flags), uintptr(unsafe.Pointer(&fprog))) - if errno != 0 { - err = errno - } - } - runtime.KeepAlive(filter) - runtime.KeepAlive(fprog) - return -} - -// PatchAndLoad takes a seccomp configuration and a libseccomp filter which has -// been pre-configured with the set of rules in the seccomp config. It then -// patches said filter to handle -ENOSYS in a much nicer manner than the -// default libseccomp default action behaviour, and loads the patched filter -// into the kernel for the current process. -func PatchAndLoad(config *configs.Seccomp, filter *libseccomp.ScmpFilter) error { - // Generate a patched filter. - fprog, err := enosysPatchFilter(config, filter) - if err != nil { - return errors.Wrap(err, "patching filter") - } - - // Get the set of libseccomp flags set. - seccompFlags, noNewPrivs, err := filterFlags(filter) - if err != nil { - return errors.Wrap(err, "fetch seccomp filter flags") - } - - // Set no_new_privs if it was requested, though in runc we handle - // no_new_privs separately so warn if we hit this path. - if noNewPrivs { - logrus.Warnf("potentially misconfigured filter -- setting no_new_privs in seccomp path") - if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { - return errors.Wrap(err, "enable no_new_privs bit") - } - } - - // Finally, load the filter. - if err := sysSeccompSetFilter(seccompFlags, fprog); err != nil { - return errors.Wrap(err, "loading seccomp filter") - } - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_unsupported.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_unsupported.go deleted file mode 100644 index 682131e49c..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/patchbpf/enosys_unsupported.go +++ /dev/null @@ -1,3 +0,0 @@ -// +build !linux !cgo !seccomp - -package patchbpf diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go deleted file mode 100644 index b14d0ede3b..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_linux.go +++ /dev/null @@ -1,222 +0,0 @@ -// +build linux,cgo,seccomp - -package seccomp - -import ( - "errors" - "fmt" - - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/seccomp/patchbpf" - - libseccomp "github.com/seccomp/libseccomp-golang" - "golang.org/x/sys/unix" -) - -var ( - actAllow = libseccomp.ActAllow - actTrap = libseccomp.ActTrap - actKill = libseccomp.ActKill - actTrace = libseccomp.ActTrace.SetReturnCode(int16(unix.EPERM)) - actLog = libseccomp.ActLog - actErrno = libseccomp.ActErrno.SetReturnCode(int16(unix.EPERM)) -) - -const ( - // Linux system calls can have at most 6 arguments - syscallMaxArguments int = 6 -) - -// Filters given syscalls in a container, preventing them from being used -// Started in the container init process, and carried over to all child processes -// Setns calls, however, require a separate invocation, as they are not children -// of the init until they join the namespace -func InitSeccomp(config *configs.Seccomp) error { - if config == nil { - return errors.New("cannot initialize Seccomp - nil config passed") - } - - defaultAction, err := getAction(config.DefaultAction, config.DefaultErrnoRet) - if err != nil { - return errors.New("error initializing seccomp - invalid default action") - } - - filter, err := libseccomp.NewFilter(defaultAction) - if err != nil { - return fmt.Errorf("error creating filter: %s", err) - } - - // Add extra architectures - for _, arch := range config.Architectures { - scmpArch, err := libseccomp.GetArchFromString(arch) - if err != nil { - return fmt.Errorf("error validating Seccomp architecture: %s", err) - } - if err := filter.AddArch(scmpArch); err != nil { - return fmt.Errorf("error adding architecture to seccomp filter: %s", err) - } - } - - // Unset no new privs bit - if err := filter.SetNoNewPrivsBit(false); err != nil { - return fmt.Errorf("error setting no new privileges: %s", err) - } - - // Add a rule for each syscall - for _, call := range config.Syscalls { - if call == nil { - return errors.New("encountered nil syscall while initializing Seccomp") - } - if err := matchCall(filter, call); err != nil { - return err - } - } - if err := patchbpf.PatchAndLoad(config, filter); err != nil { - return fmt.Errorf("error loading seccomp filter into kernel: %s", err) - } - return nil -} - -// Convert Libcontainer Action to Libseccomp ScmpAction -func getAction(act configs.Action, errnoRet *uint) (libseccomp.ScmpAction, error) { - switch act { - case configs.Kill: - return actKill, nil - case configs.Errno: - if errnoRet != nil { - return libseccomp.ActErrno.SetReturnCode(int16(*errnoRet)), nil - } - return actErrno, nil - case configs.Trap: - return actTrap, nil - case configs.Allow: - return actAllow, nil - case configs.Trace: - if errnoRet != nil { - return libseccomp.ActTrace.SetReturnCode(int16(*errnoRet)), nil - } - return actTrace, nil - case configs.Log: - return actLog, nil - default: - return libseccomp.ActInvalid, errors.New("invalid action, cannot use in rule") - } -} - -// Convert Libcontainer Operator to Libseccomp ScmpCompareOp -func getOperator(op configs.Operator) (libseccomp.ScmpCompareOp, error) { - switch op { - case configs.EqualTo: - return libseccomp.CompareEqual, nil - case configs.NotEqualTo: - return libseccomp.CompareNotEqual, nil - case configs.GreaterThan: - return libseccomp.CompareGreater, nil - case configs.GreaterThanOrEqualTo: - return libseccomp.CompareGreaterEqual, nil - case configs.LessThan: - return libseccomp.CompareLess, nil - case configs.LessThanOrEqualTo: - return libseccomp.CompareLessOrEqual, nil - case configs.MaskEqualTo: - return libseccomp.CompareMaskedEqual, nil - default: - return libseccomp.CompareInvalid, errors.New("invalid operator, cannot use in rule") - } -} - -// Convert Libcontainer Arg to Libseccomp ScmpCondition -func getCondition(arg *configs.Arg) (libseccomp.ScmpCondition, error) { - cond := libseccomp.ScmpCondition{} - - if arg == nil { - return cond, errors.New("cannot convert nil to syscall condition") - } - - op, err := getOperator(arg.Op) - if err != nil { - return cond, err - } - - return libseccomp.MakeCondition(arg.Index, op, arg.Value, arg.ValueTwo) -} - -// Add a rule to match a single syscall -func matchCall(filter *libseccomp.ScmpFilter, call *configs.Syscall) error { - if call == nil || filter == nil { - return errors.New("cannot use nil as syscall to block") - } - - if len(call.Name) == 0 { - return errors.New("empty string is not a valid syscall") - } - - // If we can't resolve the syscall, assume it's not supported on this kernel - // Ignore it, don't error out - callNum, err := libseccomp.GetSyscallFromName(call.Name) - if err != nil { - return nil - } - - // Convert the call's action to the libseccomp equivalent - callAct, err := getAction(call.Action, call.ErrnoRet) - if err != nil { - return fmt.Errorf("action in seccomp profile is invalid: %s", err) - } - - // Unconditional match - just add the rule - if len(call.Args) == 0 { - if err := filter.AddRule(callNum, callAct); err != nil { - return fmt.Errorf("error adding seccomp filter rule for syscall %s: %s", call.Name, err) - } - } else { - // If two or more arguments have the same condition, - // Revert to old behavior, adding each condition as a separate rule - argCounts := make([]uint, syscallMaxArguments) - conditions := []libseccomp.ScmpCondition{} - - for _, cond := range call.Args { - newCond, err := getCondition(cond) - if err != nil { - return fmt.Errorf("error creating seccomp syscall condition for syscall %s: %s", call.Name, err) - } - - argCounts[cond.Index] += 1 - - conditions = append(conditions, newCond) - } - - hasMultipleArgs := false - for _, count := range argCounts { - if count > 1 { - hasMultipleArgs = true - break - } - } - - if hasMultipleArgs { - // Revert to old behavior - // Add each condition attached to a separate rule - for _, cond := range conditions { - condArr := []libseccomp.ScmpCondition{cond} - - if err := filter.AddRuleConditional(callNum, callAct, condArr); err != nil { - return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err) - } - } - } else { - // No conditions share same argument - // Use new, proper behavior - if err := filter.AddRuleConditional(callNum, callAct, conditions); err != nil { - return fmt.Errorf("error adding seccomp rule for syscall %s: %s", call.Name, err) - } - } - } - - return nil -} - -// Version returns major, minor, and micro. -func Version() (uint, uint, uint) { - return libseccomp.GetLibraryVersion() -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go deleted file mode 100644 index 8b7973e9a1..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/seccomp/seccomp_unsupported.go +++ /dev/null @@ -1,24 +0,0 @@ -// +build !linux !cgo !seccomp - -package seccomp - -import ( - "errors" - - "github.com/opencontainers/runc/libcontainer/configs" -) - -var ErrSeccompNotEnabled = errors.New("seccomp: config provided but seccomp not supported") - -// InitSeccomp does nothing because seccomp is not supported. -func InitSeccomp(config *configs.Seccomp) error { - if config != nil { - return ErrSeccompNotEnabled - } - return nil -} - -// Version returns major, minor, and micro. -func Version() (uint, uint, uint) { - return 0, 0, 0 -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go deleted file mode 100644 index 2d7e5814e8..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/setns_init_linux.go +++ /dev/null @@ -1,98 +0,0 @@ -// +build linux - -package libcontainer - -import ( - "os" - "runtime" - - "github.com/opencontainers/runc/libcontainer/apparmor" - "github.com/opencontainers/runc/libcontainer/keys" - "github.com/opencontainers/runc/libcontainer/seccomp" - "github.com/opencontainers/runc/libcontainer/system" - "github.com/opencontainers/selinux/go-selinux" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -// linuxSetnsInit performs the container's initialization for running a new process -// inside an existing container. -type linuxSetnsInit struct { - pipe *os.File - consoleSocket *os.File - config *initConfig - logFd int -} - -func (l *linuxSetnsInit) getSessionRingName() string { - return "_ses." + l.config.ContainerId -} - -func (l *linuxSetnsInit) Init() error { - runtime.LockOSThread() - defer runtime.UnlockOSThread() - - if !l.config.Config.NoNewKeyring { - if err := selinux.SetKeyLabel(l.config.ProcessLabel); err != nil { - return err - } - defer selinux.SetKeyLabel("") //nolint: errcheck - // Do not inherit the parent's session keyring. - if _, err := keys.JoinSessionKeyring(l.getSessionRingName()); err != nil { - // Same justification as in standart_init_linux.go as to why we - // don't bail on ENOSYS. - // - // TODO(cyphar): And we should have logging here too. - if errors.Cause(err) != unix.ENOSYS { - return errors.Wrap(err, "join session keyring") - } - } - } - if l.config.CreateConsole { - if err := setupConsole(l.consoleSocket, l.config, false); err != nil { - return err - } - if err := system.Setctty(); err != nil { - return err - } - } - if l.config.NoNewPrivileges { - if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { - return err - } - } - if err := selinux.SetExecLabel(l.config.ProcessLabel); err != nil { - return err - } - defer selinux.SetExecLabel("") //nolint: errcheck - // Without NoNewPrivileges seccomp is a privileged operation, so we need to - // do this before dropping capabilities; otherwise do it as late as possible - // just before execve so as few syscalls take place after it as possible. - if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges { - if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { - return err - } - } - if err := finalizeNamespace(l.config); err != nil { - return err - } - if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { - return err - } - // Set seccomp as close to execve as possible, so as few syscalls take - // place afterward (reducing the amount of syscalls that users need to - // enable in their seccomp profiles). - if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges { - if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { - return newSystemErrorWithCause(err, "init seccomp") - } - } - logrus.Debugf("setns_init: about to exec") - // Close the log pipe fd so the parent's ForwardLogs can exit. - if err := unix.Close(l.logFd); err != nil { - return newSystemErrorWithCause(err, "closing log pipe fd") - } - - return system.Execv(l.config.Args[0], l.config.Args[0:], os.Environ()) -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/specconv/example.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/specconv/example.go deleted file mode 100644 index 56bab3bfbf..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/specconv/example.go +++ /dev/null @@ -1,230 +0,0 @@ -package specconv - -import ( - "os" - "path/filepath" - "strings" - - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runtime-spec/specs-go" -) - -// Example returns an example spec file, with many options set so a user can -// see what a standard spec file looks like. -func Example() *specs.Spec { - spec := &specs.Spec{ - Version: specs.Version, - Root: &specs.Root{ - Path: "rootfs", - Readonly: true, - }, - Process: &specs.Process{ - Terminal: true, - User: specs.User{}, - Args: []string{ - "sh", - }, - Env: []string{ - "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", - "TERM=xterm", - }, - Cwd: "/", - NoNewPrivileges: true, - Capabilities: &specs.LinuxCapabilities{ - Bounding: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - Permitted: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - Inheritable: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - Ambient: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - Effective: []string{ - "CAP_AUDIT_WRITE", - "CAP_KILL", - "CAP_NET_BIND_SERVICE", - }, - }, - Rlimits: []specs.POSIXRlimit{ - { - Type: "RLIMIT_NOFILE", - Hard: uint64(1024), - Soft: uint64(1024), - }, - }, - }, - Hostname: "runc", - Mounts: []specs.Mount{ - { - Destination: "/proc", - Type: "proc", - Source: "proc", - Options: nil, - }, - { - Destination: "/dev", - Type: "tmpfs", - Source: "tmpfs", - Options: []string{"nosuid", "strictatime", "mode=755", "size=65536k"}, - }, - { - Destination: "/dev/pts", - Type: "devpts", - Source: "devpts", - Options: []string{"nosuid", "noexec", "newinstance", "ptmxmode=0666", "mode=0620", "gid=5"}, - }, - { - Destination: "/dev/shm", - Type: "tmpfs", - Source: "shm", - Options: []string{"nosuid", "noexec", "nodev", "mode=1777", "size=65536k"}, - }, - { - Destination: "/dev/mqueue", - Type: "mqueue", - Source: "mqueue", - Options: []string{"nosuid", "noexec", "nodev"}, - }, - { - Destination: "/sys", - Type: "sysfs", - Source: "sysfs", - Options: []string{"nosuid", "noexec", "nodev", "ro"}, - }, - { - Destination: "/sys/fs/cgroup", - Type: "cgroup", - Source: "cgroup", - Options: []string{"nosuid", "noexec", "nodev", "relatime", "ro"}, - }, - }, - Linux: &specs.Linux{ - MaskedPaths: []string{ - "/proc/acpi", - "/proc/asound", - "/proc/kcore", - "/proc/keys", - "/proc/latency_stats", - "/proc/timer_list", - "/proc/timer_stats", - "/proc/sched_debug", - "/sys/firmware", - "/proc/scsi", - }, - ReadonlyPaths: []string{ - "/proc/bus", - "/proc/fs", - "/proc/irq", - "/proc/sys", - "/proc/sysrq-trigger", - }, - Resources: &specs.LinuxResources{ - Devices: []specs.LinuxDeviceCgroup{ - { - Allow: false, - Access: "rwm", - }, - }, - }, - Namespaces: []specs.LinuxNamespace{ - { - Type: specs.PIDNamespace, - }, - { - Type: specs.NetworkNamespace, - }, - { - Type: specs.IPCNamespace, - }, - { - Type: specs.UTSNamespace, - }, - { - Type: specs.MountNamespace, - }, - }, - }, - } - if cgroups.IsCgroup2UnifiedMode() { - spec.Linux.Namespaces = append(spec.Linux.Namespaces, specs.LinuxNamespace{ - Type: specs.CgroupNamespace, - }) - } - return spec -} - -// ToRootless converts the given spec file into one that should work with -// rootless containers (euid != 0), by removing incompatible options and adding others that -// are needed. -func ToRootless(spec *specs.Spec) { - var namespaces []specs.LinuxNamespace - - // Remove networkns from the spec. - for _, ns := range spec.Linux.Namespaces { - switch ns.Type { - case specs.NetworkNamespace, specs.UserNamespace: - // Do nothing. - default: - namespaces = append(namespaces, ns) - } - } - // Add userns to the spec. - namespaces = append(namespaces, specs.LinuxNamespace{ - Type: specs.UserNamespace, - }) - spec.Linux.Namespaces = namespaces - - // Add mappings for the current user. - spec.Linux.UIDMappings = []specs.LinuxIDMapping{{ - HostID: uint32(os.Geteuid()), - ContainerID: 0, - Size: 1, - }} - spec.Linux.GIDMappings = []specs.LinuxIDMapping{{ - HostID: uint32(os.Getegid()), - ContainerID: 0, - Size: 1, - }} - - // Fix up mounts. - var mounts []specs.Mount - for _, mount := range spec.Mounts { - // Replace the /sys mount with an rbind. - if filepath.Clean(mount.Destination) == "/sys" { - mounts = append(mounts, specs.Mount{ - Source: "/sys", - Destination: "/sys", - Type: "none", - Options: []string{"rbind", "nosuid", "noexec", "nodev", "ro"}, - }) - continue - } - - // Remove all gid= and uid= mappings. - var options []string - for _, option := range mount.Options { - if !strings.HasPrefix(option, "gid=") && !strings.HasPrefix(option, "uid=") { - options = append(options, option) - } - } - - mount.Options = options - mounts = append(mounts, mount) - } - spec.Mounts = mounts - - // Remove cgroup settings. - spec.Linux.Resources = nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/specconv/spec_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/specconv/spec_linux.go deleted file mode 100644 index 8474769c94..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/specconv/spec_linux.go +++ /dev/null @@ -1,964 +0,0 @@ -// +build linux - -// Package specconv implements conversion of specifications to libcontainer -// configurations -package specconv - -import ( - "errors" - "fmt" - "os" - "path/filepath" - "regexp" - "strings" - "time" - - systemdDbus "github.com/coreos/go-systemd/v22/dbus" - dbus "github.com/godbus/dbus/v5" - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/devices" - "github.com/opencontainers/runc/libcontainer/seccomp" - libcontainerUtils "github.com/opencontainers/runc/libcontainer/utils" - "github.com/opencontainers/runtime-spec/specs-go" - "github.com/sirupsen/logrus" - - "golang.org/x/sys/unix" -) - -var namespaceMapping = map[specs.LinuxNamespaceType]configs.NamespaceType{ - specs.PIDNamespace: configs.NEWPID, - specs.NetworkNamespace: configs.NEWNET, - specs.MountNamespace: configs.NEWNS, - specs.UserNamespace: configs.NEWUSER, - specs.IPCNamespace: configs.NEWIPC, - specs.UTSNamespace: configs.NEWUTS, - specs.CgroupNamespace: configs.NEWCGROUP, -} - -var mountPropagationMapping = map[string]int{ - "rprivate": unix.MS_PRIVATE | unix.MS_REC, - "private": unix.MS_PRIVATE, - "rslave": unix.MS_SLAVE | unix.MS_REC, - "slave": unix.MS_SLAVE, - "rshared": unix.MS_SHARED | unix.MS_REC, - "shared": unix.MS_SHARED, - "runbindable": unix.MS_UNBINDABLE | unix.MS_REC, - "unbindable": unix.MS_UNBINDABLE, - "": 0, -} - -// AllowedDevices is the set of devices which are automatically included for -// all containers. -// -// XXX (cyphar) -// This behaviour is at the very least "questionable" (if not outright -// wrong) according to the runtime-spec. -// -// Yes, we have to include certain devices other than the ones the user -// specifies, but several devices listed here are not part of the spec -// (including "mknod for any device"?!). In addition, these rules are -// appended to the user-provided set which means that users *cannot disable -// this behaviour*. -// -// ... unfortunately I'm too scared to change this now because who knows how -// many people depend on this (incorrect and arguably insecure) behaviour. -var AllowedDevices = []*devices.Device{ - // allow mknod for any device - { - Rule: devices.Rule{ - Type: devices.CharDevice, - Major: devices.Wildcard, - Minor: devices.Wildcard, - Permissions: "m", - Allow: true, - }, - }, - { - Rule: devices.Rule{ - Type: devices.BlockDevice, - Major: devices.Wildcard, - Minor: devices.Wildcard, - Permissions: "m", - Allow: true, - }, - }, - { - Path: "/dev/null", - FileMode: 0o666, - Uid: 0, - Gid: 0, - Rule: devices.Rule{ - Type: devices.CharDevice, - Major: 1, - Minor: 3, - Permissions: "rwm", - Allow: true, - }, - }, - { - Path: "/dev/random", - FileMode: 0o666, - Uid: 0, - Gid: 0, - Rule: devices.Rule{ - Type: devices.CharDevice, - Major: 1, - Minor: 8, - Permissions: "rwm", - Allow: true, - }, - }, - { - Path: "/dev/full", - FileMode: 0o666, - Uid: 0, - Gid: 0, - Rule: devices.Rule{ - Type: devices.CharDevice, - Major: 1, - Minor: 7, - Permissions: "rwm", - Allow: true, - }, - }, - { - Path: "/dev/tty", - FileMode: 0o666, - Uid: 0, - Gid: 0, - Rule: devices.Rule{ - Type: devices.CharDevice, - Major: 5, - Minor: 0, - Permissions: "rwm", - Allow: true, - }, - }, - { - Path: "/dev/zero", - FileMode: 0o666, - Uid: 0, - Gid: 0, - Rule: devices.Rule{ - Type: devices.CharDevice, - Major: 1, - Minor: 5, - Permissions: "rwm", - Allow: true, - }, - }, - { - Path: "/dev/urandom", - FileMode: 0o666, - Uid: 0, - Gid: 0, - Rule: devices.Rule{ - Type: devices.CharDevice, - Major: 1, - Minor: 9, - Permissions: "rwm", - Allow: true, - }, - }, - // /dev/pts/ - pts namespaces are "coming soon" - { - Rule: devices.Rule{ - Type: devices.CharDevice, - Major: 136, - Minor: devices.Wildcard, - Permissions: "rwm", - Allow: true, - }, - }, - { - Rule: devices.Rule{ - Type: devices.CharDevice, - Major: 5, - Minor: 2, - Permissions: "rwm", - Allow: true, - }, - }, - // tuntap - { - Rule: devices.Rule{ - Type: devices.CharDevice, - Major: 10, - Minor: 200, - Permissions: "rwm", - Allow: true, - }, - }, -} - -type CreateOpts struct { - CgroupName string - UseSystemdCgroup bool - NoPivotRoot bool - NoNewKeyring bool - Spec *specs.Spec - RootlessEUID bool - RootlessCgroups bool -} - -// CreateLibcontainerConfig creates a new libcontainer configuration from a -// given specification and a cgroup name -func CreateLibcontainerConfig(opts *CreateOpts) (*configs.Config, error) { - // runc's cwd will always be the bundle path - rcwd, err := os.Getwd() - if err != nil { - return nil, err - } - cwd, err := filepath.Abs(rcwd) - if err != nil { - return nil, err - } - spec := opts.Spec - if spec.Root == nil { - return nil, fmt.Errorf("Root must be specified") - } - rootfsPath := spec.Root.Path - if !filepath.IsAbs(rootfsPath) { - rootfsPath = filepath.Join(cwd, rootfsPath) - } - labels := []string{} - for k, v := range spec.Annotations { - labels = append(labels, k+"="+v) - } - config := &configs.Config{ - Rootfs: rootfsPath, - NoPivotRoot: opts.NoPivotRoot, - Readonlyfs: spec.Root.Readonly, - Hostname: spec.Hostname, - Labels: append(labels, "bundle="+cwd), - NoNewKeyring: opts.NoNewKeyring, - RootlessEUID: opts.RootlessEUID, - RootlessCgroups: opts.RootlessCgroups, - } - - for _, m := range spec.Mounts { - cm, err := createLibcontainerMount(cwd, m) - if err != nil { - return nil, fmt.Errorf("invalid mount %+v: %w", m, err) - } - config.Mounts = append(config.Mounts, cm) - } - - defaultDevs, err := createDevices(spec, config) - if err != nil { - return nil, err - } - - c, err := CreateCgroupConfig(opts, defaultDevs) - if err != nil { - return nil, err - } - - config.Cgroups = c - // set linux-specific config - if spec.Linux != nil { - var exists bool - if config.RootPropagation, exists = mountPropagationMapping[spec.Linux.RootfsPropagation]; !exists { - return nil, fmt.Errorf("rootfsPropagation=%v is not supported", spec.Linux.RootfsPropagation) - } - if config.NoPivotRoot && (config.RootPropagation&unix.MS_PRIVATE != 0) { - return nil, fmt.Errorf("rootfsPropagation of [r]private is not safe without pivot_root") - } - - for _, ns := range spec.Linux.Namespaces { - t, exists := namespaceMapping[ns.Type] - if !exists { - return nil, fmt.Errorf("namespace %q does not exist", ns) - } - if config.Namespaces.Contains(t) { - return nil, fmt.Errorf("malformed spec file: duplicated ns %q", ns) - } - config.Namespaces.Add(t, ns.Path) - } - if config.Namespaces.Contains(configs.NEWNET) && config.Namespaces.PathOf(configs.NEWNET) == "" { - config.Networks = []*configs.Network{ - { - Type: "loopback", - }, - } - } - if config.Namespaces.Contains(configs.NEWUSER) { - if err := setupUserNamespace(spec, config); err != nil { - return nil, err - } - } - config.MaskPaths = spec.Linux.MaskedPaths - config.ReadonlyPaths = spec.Linux.ReadonlyPaths - config.MountLabel = spec.Linux.MountLabel - config.Sysctl = spec.Linux.Sysctl - if spec.Linux.Seccomp != nil { - seccomp, err := SetupSeccomp(spec.Linux.Seccomp) - if err != nil { - return nil, err - } - config.Seccomp = seccomp - } - if spec.Linux.IntelRdt != nil { - config.IntelRdt = &configs.IntelRdt{} - if spec.Linux.IntelRdt.L3CacheSchema != "" { - config.IntelRdt.L3CacheSchema = spec.Linux.IntelRdt.L3CacheSchema - } - if spec.Linux.IntelRdt.MemBwSchema != "" { - config.IntelRdt.MemBwSchema = spec.Linux.IntelRdt.MemBwSchema - } - } - } - if spec.Process != nil { - config.OomScoreAdj = spec.Process.OOMScoreAdj - config.NoNewPrivileges = spec.Process.NoNewPrivileges - config.Umask = spec.Process.User.Umask - if spec.Process.SelinuxLabel != "" { - config.ProcessLabel = spec.Process.SelinuxLabel - } - if spec.Process.Capabilities != nil { - config.Capabilities = &configs.Capabilities{ - Bounding: spec.Process.Capabilities.Bounding, - Effective: spec.Process.Capabilities.Effective, - Permitted: spec.Process.Capabilities.Permitted, - Inheritable: spec.Process.Capabilities.Inheritable, - Ambient: spec.Process.Capabilities.Ambient, - } - } - } - createHooks(spec, config) - config.Version = specs.Version - return config, nil -} - -func createLibcontainerMount(cwd string, m specs.Mount) (*configs.Mount, error) { - if !filepath.IsAbs(m.Destination) { - // Relax validation for backward compatibility - // TODO (runc v1.x.x): change warning to an error - // return nil, fmt.Errorf("mount destination %s is not absolute", m.Destination) - logrus.Warnf("mount destination %s is not absolute. Support for non-absolute mount destinations will be removed in a future release.", m.Destination) - } - flags, pgflags, data, ext := parseMountOptions(m.Options) - source := m.Source - device := m.Type - if flags&unix.MS_BIND != 0 { - // Any "type" the user specified is meaningless (and ignored) for - // bind-mounts -- so we set it to "bind" because rootfs_linux.go - // (incorrectly) relies on this for some checks. - device = "bind" - if !filepath.IsAbs(source) { - source = filepath.Join(cwd, m.Source) - } - } - return &configs.Mount{ - Device: device, - Source: source, - Destination: m.Destination, - Data: data, - Flags: flags, - PropagationFlags: pgflags, - Extensions: ext, - }, nil -} - -// systemd property name check: latin letters only, at least 3 of them -var isValidName = regexp.MustCompile(`^[a-zA-Z]{3,}$`).MatchString - -var isSecSuffix = regexp.MustCompile(`[a-z]Sec$`).MatchString - -// Some systemd properties are documented as having "Sec" suffix -// (e.g. TimeoutStopSec) but are expected to have "USec" suffix -// here, so let's provide conversion to improve compatibility. -func convertSecToUSec(value dbus.Variant) (dbus.Variant, error) { - var sec uint64 - const M = 1000000 - vi := value.Value() - switch value.Signature().String() { - case "y": - sec = uint64(vi.(byte)) * M - case "n": - sec = uint64(vi.(int16)) * M - case "q": - sec = uint64(vi.(uint16)) * M - case "i": - sec = uint64(vi.(int32)) * M - case "u": - sec = uint64(vi.(uint32)) * M - case "x": - sec = uint64(vi.(int64)) * M - case "t": - sec = vi.(uint64) * M - case "d": - sec = uint64(vi.(float64) * M) - default: - return value, errors.New("not a number") - } - return dbus.MakeVariant(sec), nil -} - -func initSystemdProps(spec *specs.Spec) ([]systemdDbus.Property, error) { - const keyPrefix = "org.systemd.property." - var sp []systemdDbus.Property - - for k, v := range spec.Annotations { - name := strings.TrimPrefix(k, keyPrefix) - if len(name) == len(k) { // prefix not there - continue - } - if !isValidName(name) { - return nil, fmt.Errorf("Annotation %s name incorrect: %s", k, name) - } - value, err := dbus.ParseVariant(v, dbus.Signature{}) - if err != nil { - return nil, fmt.Errorf("Annotation %s=%s value parse error: %v", k, v, err) - } - if isSecSuffix(name) { - name = strings.TrimSuffix(name, "Sec") + "USec" - value, err = convertSecToUSec(value) - if err != nil { - return nil, fmt.Errorf("Annotation %s=%s value parse error: %v", k, v, err) - } - } - sp = append(sp, systemdDbus.Property{Name: name, Value: value}) - } - - return sp, nil -} - -func CreateCgroupConfig(opts *CreateOpts, defaultDevs []*devices.Device) (*configs.Cgroup, error) { - var ( - myCgroupPath string - - spec = opts.Spec - useSystemdCgroup = opts.UseSystemdCgroup - name = opts.CgroupName - ) - - c := &configs.Cgroup{ - Resources: &configs.Resources{}, - } - - if useSystemdCgroup { - sp, err := initSystemdProps(spec) - if err != nil { - return nil, err - } - c.SystemdProps = sp - } - - if spec.Linux != nil && spec.Linux.CgroupsPath != "" { - if useSystemdCgroup { - myCgroupPath = spec.Linux.CgroupsPath - } else { - myCgroupPath = libcontainerUtils.CleanPath(spec.Linux.CgroupsPath) - } - } - - if useSystemdCgroup { - if myCgroupPath == "" { - // Default for c.Parent is set by systemd cgroup drivers. - c.ScopePrefix = "runc" - c.Name = name - } else { - // Parse the path from expected "slice:prefix:name" - // for e.g. "system.slice:docker:1234" - parts := strings.Split(myCgroupPath, ":") - if len(parts) != 3 { - return nil, fmt.Errorf("expected cgroupsPath to be of format \"slice:prefix:name\" for systemd cgroups, got %q instead", myCgroupPath) - } - c.Parent = parts[0] - c.ScopePrefix = parts[1] - c.Name = parts[2] - } - } else { - if myCgroupPath == "" { - c.Name = name - } - c.Path = myCgroupPath - } - - // In rootless containers, any attempt to make cgroup changes is likely to fail. - // libcontainer will validate this but ignores the error. - if spec.Linux != nil { - r := spec.Linux.Resources - if r != nil { - for i, d := range spec.Linux.Resources.Devices { - var ( - t = "a" - major = int64(-1) - minor = int64(-1) - ) - if d.Type != "" { - t = d.Type - } - if d.Major != nil { - major = *d.Major - } - if d.Minor != nil { - minor = *d.Minor - } - if d.Access == "" { - return nil, fmt.Errorf("device access at %d field cannot be empty", i) - } - dt, err := stringToCgroupDeviceRune(t) - if err != nil { - return nil, err - } - c.Resources.Devices = append(c.Resources.Devices, &devices.Rule{ - Type: dt, - Major: major, - Minor: minor, - Permissions: devices.Permissions(d.Access), - Allow: d.Allow, - }) - } - if r.Memory != nil { - if r.Memory.Limit != nil { - c.Resources.Memory = *r.Memory.Limit - } - if r.Memory.Reservation != nil { - c.Resources.MemoryReservation = *r.Memory.Reservation - } - if r.Memory.Swap != nil { - c.Resources.MemorySwap = *r.Memory.Swap - } - if r.Memory.Kernel != nil || r.Memory.KernelTCP != nil { - logrus.Warn("Kernel memory settings are ignored and will be removed") - } - if r.Memory.Swappiness != nil { - c.Resources.MemorySwappiness = r.Memory.Swappiness - } - if r.Memory.DisableOOMKiller != nil { - c.Resources.OomKillDisable = *r.Memory.DisableOOMKiller - } - } - if r.CPU != nil { - if r.CPU.Shares != nil { - c.Resources.CpuShares = *r.CPU.Shares - - // CpuWeight is used for cgroupv2 and should be converted - c.Resources.CpuWeight = cgroups.ConvertCPUSharesToCgroupV2Value(c.Resources.CpuShares) - } - if r.CPU.Quota != nil { - c.Resources.CpuQuota = *r.CPU.Quota - } - if r.CPU.Period != nil { - c.Resources.CpuPeriod = *r.CPU.Period - } - if r.CPU.RealtimeRuntime != nil { - c.Resources.CpuRtRuntime = *r.CPU.RealtimeRuntime - } - if r.CPU.RealtimePeriod != nil { - c.Resources.CpuRtPeriod = *r.CPU.RealtimePeriod - } - if r.CPU.Cpus != "" { - c.Resources.CpusetCpus = r.CPU.Cpus - } - if r.CPU.Mems != "" { - c.Resources.CpusetMems = r.CPU.Mems - } - } - if r.Pids != nil { - c.Resources.PidsLimit = r.Pids.Limit - } - if r.BlockIO != nil { - if r.BlockIO.Weight != nil { - c.Resources.BlkioWeight = *r.BlockIO.Weight - } - if r.BlockIO.LeafWeight != nil { - c.Resources.BlkioLeafWeight = *r.BlockIO.LeafWeight - } - if r.BlockIO.WeightDevice != nil { - for _, wd := range r.BlockIO.WeightDevice { - var weight, leafWeight uint16 - if wd.Weight != nil { - weight = *wd.Weight - } - if wd.LeafWeight != nil { - leafWeight = *wd.LeafWeight - } - weightDevice := configs.NewWeightDevice(wd.Major, wd.Minor, weight, leafWeight) - c.Resources.BlkioWeightDevice = append(c.Resources.BlkioWeightDevice, weightDevice) - } - } - if r.BlockIO.ThrottleReadBpsDevice != nil { - for _, td := range r.BlockIO.ThrottleReadBpsDevice { - rate := td.Rate - throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) - c.Resources.BlkioThrottleReadBpsDevice = append(c.Resources.BlkioThrottleReadBpsDevice, throttleDevice) - } - } - if r.BlockIO.ThrottleWriteBpsDevice != nil { - for _, td := range r.BlockIO.ThrottleWriteBpsDevice { - rate := td.Rate - throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) - c.Resources.BlkioThrottleWriteBpsDevice = append(c.Resources.BlkioThrottleWriteBpsDevice, throttleDevice) - } - } - if r.BlockIO.ThrottleReadIOPSDevice != nil { - for _, td := range r.BlockIO.ThrottleReadIOPSDevice { - rate := td.Rate - throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) - c.Resources.BlkioThrottleReadIOPSDevice = append(c.Resources.BlkioThrottleReadIOPSDevice, throttleDevice) - } - } - if r.BlockIO.ThrottleWriteIOPSDevice != nil { - for _, td := range r.BlockIO.ThrottleWriteIOPSDevice { - rate := td.Rate - throttleDevice := configs.NewThrottleDevice(td.Major, td.Minor, rate) - c.Resources.BlkioThrottleWriteIOPSDevice = append(c.Resources.BlkioThrottleWriteIOPSDevice, throttleDevice) - } - } - } - for _, l := range r.HugepageLimits { - c.Resources.HugetlbLimit = append(c.Resources.HugetlbLimit, &configs.HugepageLimit{ - Pagesize: l.Pagesize, - Limit: l.Limit, - }) - } - if r.Network != nil { - if r.Network.ClassID != nil { - c.Resources.NetClsClassid = *r.Network.ClassID - } - for _, m := range r.Network.Priorities { - c.Resources.NetPrioIfpriomap = append(c.Resources.NetPrioIfpriomap, &configs.IfPrioMap{ - Interface: m.Name, - Priority: int64(m.Priority), - }) - } - } - if len(r.Unified) > 0 { - // copy the map - c.Resources.Unified = make(map[string]string, len(r.Unified)) - for k, v := range r.Unified { - c.Resources.Unified[k] = v - } - } - } - } - - // Append the default allowed devices to the end of the list. - for _, device := range defaultDevs { - c.Resources.Devices = append(c.Resources.Devices, &device.Rule) - } - return c, nil -} - -func stringToCgroupDeviceRune(s string) (devices.Type, error) { - switch s { - case "a": - return devices.WildcardDevice, nil - case "b": - return devices.BlockDevice, nil - case "c": - return devices.CharDevice, nil - default: - return 0, fmt.Errorf("invalid cgroup device type %q", s) - } -} - -func stringToDeviceRune(s string) (devices.Type, error) { - switch s { - case "p": - return devices.FifoDevice, nil - case "u", "c": - return devices.CharDevice, nil - case "b": - return devices.BlockDevice, nil - default: - return 0, fmt.Errorf("invalid device type %q", s) - } -} - -func createDevices(spec *specs.Spec, config *configs.Config) ([]*devices.Device, error) { - // If a spec device is redundant with a default device, remove that default - // device (the spec one takes priority). - dedupedAllowDevs := []*devices.Device{} - -next: - for _, ad := range AllowedDevices { - if ad.Path != "" { - for _, sd := range spec.Linux.Devices { - if sd.Path == ad.Path { - continue next - } - } - } - dedupedAllowDevs = append(dedupedAllowDevs, ad) - if ad.Path != "" { - config.Devices = append(config.Devices, ad) - } - } - - // Merge in additional devices from the spec. - if spec.Linux != nil { - for _, d := range spec.Linux.Devices { - var uid, gid uint32 - var filemode os.FileMode = 0o666 - - if d.UID != nil { - uid = *d.UID - } - if d.GID != nil { - gid = *d.GID - } - dt, err := stringToDeviceRune(d.Type) - if err != nil { - return nil, err - } - if d.FileMode != nil { - filemode = *d.FileMode &^ unix.S_IFMT - } - device := &devices.Device{ - Rule: devices.Rule{ - Type: dt, - Major: d.Major, - Minor: d.Minor, - }, - Path: d.Path, - FileMode: filemode, - Uid: uid, - Gid: gid, - } - config.Devices = append(config.Devices, device) - } - } - - return dedupedAllowDevs, nil -} - -func setupUserNamespace(spec *specs.Spec, config *configs.Config) error { - create := func(m specs.LinuxIDMapping) configs.IDMap { - return configs.IDMap{ - HostID: int(m.HostID), - ContainerID: int(m.ContainerID), - Size: int(m.Size), - } - } - if spec.Linux != nil { - for _, m := range spec.Linux.UIDMappings { - config.UidMappings = append(config.UidMappings, create(m)) - } - for _, m := range spec.Linux.GIDMappings { - config.GidMappings = append(config.GidMappings, create(m)) - } - } - rootUID, err := config.HostRootUID() - if err != nil { - return err - } - rootGID, err := config.HostRootGID() - if err != nil { - return err - } - for _, node := range config.Devices { - node.Uid = uint32(rootUID) - node.Gid = uint32(rootGID) - } - return nil -} - -// parseMountOptions parses the string and returns the flags, propagation -// flags and any mount data that it contains. -func parseMountOptions(options []string) (int, []int, string, int) { - var ( - flag int - pgflag []int - data []string - extFlags int - ) - flags := map[string]struct { - clear bool - flag int - }{ - "acl": {false, unix.MS_POSIXACL}, - "async": {true, unix.MS_SYNCHRONOUS}, - "atime": {true, unix.MS_NOATIME}, - "bind": {false, unix.MS_BIND}, - "defaults": {false, 0}, - "dev": {true, unix.MS_NODEV}, - "diratime": {true, unix.MS_NODIRATIME}, - "dirsync": {false, unix.MS_DIRSYNC}, - "exec": {true, unix.MS_NOEXEC}, - "iversion": {false, unix.MS_I_VERSION}, - "lazytime": {false, unix.MS_LAZYTIME}, - "loud": {true, unix.MS_SILENT}, - "mand": {false, unix.MS_MANDLOCK}, - "noacl": {true, unix.MS_POSIXACL}, - "noatime": {false, unix.MS_NOATIME}, - "nodev": {false, unix.MS_NODEV}, - "nodiratime": {false, unix.MS_NODIRATIME}, - "noexec": {false, unix.MS_NOEXEC}, - "noiversion": {true, unix.MS_I_VERSION}, - "nolazytime": {true, unix.MS_LAZYTIME}, - "nomand": {true, unix.MS_MANDLOCK}, - "norelatime": {true, unix.MS_RELATIME}, - "nostrictatime": {true, unix.MS_STRICTATIME}, - "nosuid": {false, unix.MS_NOSUID}, - "rbind": {false, unix.MS_BIND | unix.MS_REC}, - "relatime": {false, unix.MS_RELATIME}, - "remount": {false, unix.MS_REMOUNT}, - "ro": {false, unix.MS_RDONLY}, - "rw": {true, unix.MS_RDONLY}, - "silent": {false, unix.MS_SILENT}, - "strictatime": {false, unix.MS_STRICTATIME}, - "suid": {true, unix.MS_NOSUID}, - "sync": {false, unix.MS_SYNCHRONOUS}, - } - propagationFlags := map[string]int{ - "private": unix.MS_PRIVATE, - "shared": unix.MS_SHARED, - "slave": unix.MS_SLAVE, - "unbindable": unix.MS_UNBINDABLE, - "rprivate": unix.MS_PRIVATE | unix.MS_REC, - "rshared": unix.MS_SHARED | unix.MS_REC, - "rslave": unix.MS_SLAVE | unix.MS_REC, - "runbindable": unix.MS_UNBINDABLE | unix.MS_REC, - } - extensionFlags := map[string]struct { - clear bool - flag int - }{ - "tmpcopyup": {false, configs.EXT_COPYUP}, - } - for _, o := range options { - // If the option does not exist in the flags table or the flag - // is not supported on the platform, - // then it is a data value for a specific fs type - if f, exists := flags[o]; exists && f.flag != 0 { - if f.clear { - flag &= ^f.flag - } else { - flag |= f.flag - } - } else if f, exists := propagationFlags[o]; exists && f != 0 { - pgflag = append(pgflag, f) - } else if f, exists := extensionFlags[o]; exists && f.flag != 0 { - if f.clear { - extFlags &= ^f.flag - } else { - extFlags |= f.flag - } - } else { - data = append(data, o) - } - } - return flag, pgflag, strings.Join(data, ","), extFlags -} - -func SetupSeccomp(config *specs.LinuxSeccomp) (*configs.Seccomp, error) { - if config == nil { - return nil, nil - } - - // No default action specified, no syscalls listed, assume seccomp disabled - if config.DefaultAction == "" && len(config.Syscalls) == 0 { - return nil, nil - } - - // We don't currently support seccomp flags. - if len(config.Flags) != 0 { - return nil, fmt.Errorf("seccomp flags are not yet supported by runc") - } - - newConfig := new(configs.Seccomp) - newConfig.Syscalls = []*configs.Syscall{} - - if len(config.Architectures) > 0 { - newConfig.Architectures = []string{} - for _, arch := range config.Architectures { - newArch, err := seccomp.ConvertStringToArch(string(arch)) - if err != nil { - return nil, err - } - newConfig.Architectures = append(newConfig.Architectures, newArch) - } - } - - // Convert default action from string representation - newDefaultAction, err := seccomp.ConvertStringToAction(string(config.DefaultAction)) - if err != nil { - return nil, err - } - newConfig.DefaultAction = newDefaultAction - newConfig.DefaultErrnoRet = config.DefaultErrnoRet - - // Loop through all syscall blocks and convert them to libcontainer format - for _, call := range config.Syscalls { - newAction, err := seccomp.ConvertStringToAction(string(call.Action)) - if err != nil { - return nil, err - } - - for _, name := range call.Names { - newCall := configs.Syscall{ - Name: name, - Action: newAction, - ErrnoRet: call.ErrnoRet, - Args: []*configs.Arg{}, - } - // Loop through all the arguments of the syscall and convert them - for _, arg := range call.Args { - newOp, err := seccomp.ConvertStringToOperator(string(arg.Op)) - if err != nil { - return nil, err - } - - newArg := configs.Arg{ - Index: arg.Index, - Value: arg.Value, - ValueTwo: arg.ValueTwo, - Op: newOp, - } - - newCall.Args = append(newCall.Args, &newArg) - } - newConfig.Syscalls = append(newConfig.Syscalls, &newCall) - } - } - - return newConfig, nil -} - -func createHooks(rspec *specs.Spec, config *configs.Config) { - config.Hooks = configs.Hooks{} - if rspec.Hooks != nil { - for _, h := range rspec.Hooks.Prestart { - cmd := createCommandHook(h) - config.Hooks[configs.Prestart] = append(config.Hooks[configs.Prestart], configs.NewCommandHook(cmd)) - } - for _, h := range rspec.Hooks.CreateRuntime { - cmd := createCommandHook(h) - config.Hooks[configs.CreateRuntime] = append(config.Hooks[configs.CreateRuntime], configs.NewCommandHook(cmd)) - } - for _, h := range rspec.Hooks.CreateContainer { - cmd := createCommandHook(h) - config.Hooks[configs.CreateContainer] = append(config.Hooks[configs.CreateContainer], configs.NewCommandHook(cmd)) - } - for _, h := range rspec.Hooks.StartContainer { - cmd := createCommandHook(h) - config.Hooks[configs.StartContainer] = append(config.Hooks[configs.StartContainer], configs.NewCommandHook(cmd)) - } - for _, h := range rspec.Hooks.Poststart { - cmd := createCommandHook(h) - config.Hooks[configs.Poststart] = append(config.Hooks[configs.Poststart], configs.NewCommandHook(cmd)) - } - for _, h := range rspec.Hooks.Poststop { - cmd := createCommandHook(h) - config.Hooks[configs.Poststop] = append(config.Hooks[configs.Poststop], configs.NewCommandHook(cmd)) - } - } -} - -func createCommandHook(h specs.Hook) configs.Command { - cmd := configs.Command{ - Path: h.Path, - Args: h.Args, - Env: h.Env, - } - if h.Timeout != nil { - d := time.Duration(*h.Timeout) * time.Second - cmd.Timeout = &d - } - return cmd -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/capture.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/capture.go deleted file mode 100644 index bfb89157b4..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/capture.go +++ /dev/null @@ -1,27 +0,0 @@ -package stacktrace - -import "runtime" - -// Capture captures a stacktrace for the current calling go program -// -// skip is the number of frames to skip -func Capture(userSkip int) Stacktrace { - var ( - skip = userSkip + 1 // add one for our own function - frames []Frame - prevPc uintptr - ) - for i := skip; ; i++ { - pc, file, line, ok := runtime.Caller(i) - // detect if caller is repeated to avoid loop, gccgo - // currently runs into a loop without this check - if !ok || pc == prevPc { - break - } - frames = append(frames, NewFrame(pc, file, line)) - prevPc = pc - } - return Stacktrace{ - Frames: frames, - } -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/frame.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/frame.go deleted file mode 100644 index 0d590d9a54..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/frame.go +++ /dev/null @@ -1,38 +0,0 @@ -package stacktrace - -import ( - "path/filepath" - "runtime" - "strings" -) - -// NewFrame returns a new stack frame for the provided information -func NewFrame(pc uintptr, file string, line int) Frame { - fn := runtime.FuncForPC(pc) - if fn == nil { - return Frame{} - } - pack, name := parseFunctionName(fn.Name()) - return Frame{ - Line: line, - File: filepath.Base(file), - Package: pack, - Function: name, - } -} - -func parseFunctionName(name string) (string, string) { - i := strings.LastIndex(name, ".") - if i == -1 { - return "", name - } - return name[:i], name[i+1:] -} - -// Frame contains all the information for a stack frame within a go program -type Frame struct { - File string - Function string - Package string - Line int -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/stacktrace.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/stacktrace.go deleted file mode 100644 index 5e8b58d2d2..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/stacktrace/stacktrace.go +++ /dev/null @@ -1,5 +0,0 @@ -package stacktrace - -type Stacktrace struct { - Frames []Frame -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go deleted file mode 100644 index 98c4860541..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/standard_init_linux.go +++ /dev/null @@ -1,231 +0,0 @@ -// +build linux - -package libcontainer - -import ( - "os" - "os/exec" - "runtime" - "strconv" - - "github.com/opencontainers/runc/libcontainer/apparmor" - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runc/libcontainer/keys" - "github.com/opencontainers/runc/libcontainer/seccomp" - "github.com/opencontainers/runc/libcontainer/system" - "github.com/opencontainers/runtime-spec/specs-go" - "github.com/opencontainers/selinux/go-selinux" - "github.com/pkg/errors" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -type linuxStandardInit struct { - pipe *os.File - consoleSocket *os.File - parentPid int - fifoFd int - logFd int - config *initConfig -} - -func (l *linuxStandardInit) getSessionRingParams() (string, uint32, uint32) { - var newperms uint32 - - if l.config.Config.Namespaces.Contains(configs.NEWUSER) { - // With user ns we need 'other' search permissions. - newperms = 0x8 - } else { - // Without user ns we need 'UID' search permissions. - newperms = 0x80000 - } - - // Create a unique per session container name that we can join in setns; - // However, other containers can also join it. - return "_ses." + l.config.ContainerId, 0xffffffff, newperms -} - -func (l *linuxStandardInit) Init() error { - runtime.LockOSThread() - defer runtime.UnlockOSThread() - if !l.config.Config.NoNewKeyring { - if err := selinux.SetKeyLabel(l.config.ProcessLabel); err != nil { - return err - } - defer selinux.SetKeyLabel("") //nolint: errcheck - ringname, keepperms, newperms := l.getSessionRingParams() - - // Do not inherit the parent's session keyring. - if sessKeyId, err := keys.JoinSessionKeyring(ringname); err != nil { - // If keyrings aren't supported then it is likely we are on an - // older kernel (or inside an LXC container). While we could bail, - // the security feature we are using here is best-effort (it only - // really provides marginal protection since VFS credentials are - // the only significant protection of keyrings). - // - // TODO(cyphar): Log this so people know what's going on, once we - // have proper logging in 'runc init'. - if errors.Cause(err) != unix.ENOSYS { - return errors.Wrap(err, "join session keyring") - } - } else { - // Make session keyring searcheable. If we've gotten this far we - // bail on any error -- we don't want to have a keyring with bad - // permissions. - if err := keys.ModKeyringPerm(sessKeyId, keepperms, newperms); err != nil { - return errors.Wrap(err, "mod keyring permissions") - } - } - } - - if err := setupNetwork(l.config); err != nil { - return err - } - if err := setupRoute(l.config.Config); err != nil { - return err - } - - // initialises the labeling system - selinux.GetEnabled() - if err := prepareRootfs(l.pipe, l.config); err != nil { - return err - } - // Set up the console. This has to be done *before* we finalize the rootfs, - // but *after* we've given the user the chance to set up all of the mounts - // they wanted. - if l.config.CreateConsole { - if err := setupConsole(l.consoleSocket, l.config, true); err != nil { - return err - } - if err := system.Setctty(); err != nil { - return errors.Wrap(err, "setctty") - } - } - - // Finish the rootfs setup. - if l.config.Config.Namespaces.Contains(configs.NEWNS) { - if err := finalizeRootfs(l.config.Config); err != nil { - return err - } - } - - if hostname := l.config.Config.Hostname; hostname != "" { - if err := unix.Sethostname([]byte(hostname)); err != nil { - return errors.Wrap(err, "sethostname") - } - } - if err := apparmor.ApplyProfile(l.config.AppArmorProfile); err != nil { - return errors.Wrap(err, "apply apparmor profile") - } - - for key, value := range l.config.Config.Sysctl { - if err := writeSystemProperty(key, value); err != nil { - return errors.Wrapf(err, "write sysctl key %s", key) - } - } - for _, path := range l.config.Config.ReadonlyPaths { - if err := readonlyPath(path); err != nil { - return errors.Wrapf(err, "readonly path %s", path) - } - } - for _, path := range l.config.Config.MaskPaths { - if err := maskPath(path, l.config.Config.MountLabel); err != nil { - return errors.Wrapf(err, "mask path %s", path) - } - } - pdeath, err := system.GetParentDeathSignal() - if err != nil { - return errors.Wrap(err, "get pdeath signal") - } - if l.config.NoNewPrivileges { - if err := unix.Prctl(unix.PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); err != nil { - return errors.Wrap(err, "set nonewprivileges") - } - } - // Tell our parent that we're ready to Execv. This must be done before the - // Seccomp rules have been applied, because we need to be able to read and - // write to a socket. - if err := syncParentReady(l.pipe); err != nil { - return errors.Wrap(err, "sync ready") - } - if err := selinux.SetExecLabel(l.config.ProcessLabel); err != nil { - return errors.Wrap(err, "set process label") - } - defer selinux.SetExecLabel("") //nolint: errcheck - // Without NoNewPrivileges seccomp is a privileged operation, so we need to - // do this before dropping capabilities; otherwise do it as late as possible - // just before execve so as few syscalls take place after it as possible. - if l.config.Config.Seccomp != nil && !l.config.NoNewPrivileges { - if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { - return err - } - } - if err := finalizeNamespace(l.config); err != nil { - return err - } - // finalizeNamespace can change user/group which clears the parent death - // signal, so we restore it here. - if err := pdeath.Restore(); err != nil { - return errors.Wrap(err, "restore pdeath signal") - } - // Compare the parent from the initial start of the init process and make - // sure that it did not change. if the parent changes that means it died - // and we were reparented to something else so we should just kill ourself - // and not cause problems for someone else. - if unix.Getppid() != l.parentPid { - return unix.Kill(unix.Getpid(), unix.SIGKILL) - } - // Check for the arg before waiting to make sure it exists and it is - // returned as a create time error. - name, err := exec.LookPath(l.config.Args[0]) - if err != nil { - return err - } - // Close the pipe to signal that we have completed our init. - logrus.Debugf("init: closing the pipe to signal completion") - _ = l.pipe.Close() - - // Close the log pipe fd so the parent's ForwardLogs can exit. - if err := unix.Close(l.logFd); err != nil { - return newSystemErrorWithCause(err, "closing log pipe fd") - } - - // Wait for the FIFO to be opened on the other side before exec-ing the - // user process. We open it through /proc/self/fd/$fd, because the fd that - // was given to us was an O_PATH fd to the fifo itself. Linux allows us to - // re-open an O_PATH fd through /proc. - fd, err := unix.Open("/proc/self/fd/"+strconv.Itoa(l.fifoFd), unix.O_WRONLY|unix.O_CLOEXEC, 0) - if err != nil { - return newSystemErrorWithCause(err, "open exec fifo") - } - if _, err := unix.Write(fd, []byte("0")); err != nil { - return newSystemErrorWithCause(err, "write 0 exec fifo") - } - // Close the O_PATH fifofd fd before exec because the kernel resets - // dumpable in the wrong order. This has been fixed in newer kernels, but - // we keep this to ensure CVE-2016-9962 doesn't re-emerge on older kernels. - // N.B. the core issue itself (passing dirfds to the host filesystem) has - // since been resolved. - // https://github.com/torvalds/linux/blob/v4.9/fs/exec.c#L1290-L1318 - _ = unix.Close(l.fifoFd) - // Set seccomp as close to execve as possible, so as few syscalls take - // place afterward (reducing the amount of syscalls that users need to - // enable in their seccomp profiles). - if l.config.Config.Seccomp != nil && l.config.NoNewPrivileges { - if err := seccomp.InitSeccomp(l.config.Config.Seccomp); err != nil { - return newSystemErrorWithCause(err, "init seccomp") - } - } - - s := l.config.SpecState - s.Pid = unix.Getpid() - s.Status = specs.StateCreated - if err := l.config.Config.Hooks[configs.StartContainer].RunHooks(s); err != nil { - return err - } - - if err := system.Exec(name, l.config.Args[0:], os.Environ()); err != nil { - return newSystemErrorWithCause(err, "exec user process") - } - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go deleted file mode 100644 index 43c040c857..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/state_linux.go +++ /dev/null @@ -1,245 +0,0 @@ -// +build linux - -package libcontainer - -import ( - "fmt" - "os" - "path/filepath" - - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/opencontainers/runtime-spec/specs-go" - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" -) - -func newStateTransitionError(from, to containerState) error { - return &stateTransitionError{ - From: from.status().String(), - To: to.status().String(), - } -} - -// stateTransitionError is returned when an invalid state transition happens from one -// state to another. -type stateTransitionError struct { - From string - To string -} - -func (s *stateTransitionError) Error() string { - return fmt.Sprintf("invalid state transition from %s to %s", s.From, s.To) -} - -type containerState interface { - transition(containerState) error - destroy() error - status() Status -} - -func destroy(c *linuxContainer) error { - if !c.config.Namespaces.Contains(configs.NEWPID) || - c.config.Namespaces.PathOf(configs.NEWPID) != "" { - if err := signalAllProcesses(c.cgroupManager, unix.SIGKILL); err != nil { - logrus.Warn(err) - } - } - err := c.cgroupManager.Destroy() - if c.intelRdtManager != nil { - if ierr := c.intelRdtManager.Destroy(); err == nil { - err = ierr - } - } - if rerr := os.RemoveAll(c.root); err == nil { - err = rerr - } - c.initProcess = nil - if herr := runPoststopHooks(c); err == nil { - err = herr - } - c.state = &stoppedState{c: c} - return err -} - -func runPoststopHooks(c *linuxContainer) error { - hooks := c.config.Hooks - if hooks == nil { - return nil - } - - s, err := c.currentOCIState() - if err != nil { - return err - } - s.Status = specs.StateStopped - - if err := hooks[configs.Poststop].RunHooks(s); err != nil { - return err - } - - return nil -} - -// stoppedState represents a container is a stopped/destroyed state. -type stoppedState struct { - c *linuxContainer -} - -func (b *stoppedState) status() Status { - return Stopped -} - -func (b *stoppedState) transition(s containerState) error { - switch s.(type) { - case *runningState, *restoredState: - b.c.state = s - return nil - case *stoppedState: - return nil - } - return newStateTransitionError(b, s) -} - -func (b *stoppedState) destroy() error { - return destroy(b.c) -} - -// runningState represents a container that is currently running. -type runningState struct { - c *linuxContainer -} - -func (r *runningState) status() Status { - return Running -} - -func (r *runningState) transition(s containerState) error { - switch s.(type) { - case *stoppedState: - if r.c.runType() == Running { - return newGenericError(fmt.Errorf("container still running"), ContainerNotStopped) - } - r.c.state = s - return nil - case *pausedState: - r.c.state = s - return nil - case *runningState: - return nil - } - return newStateTransitionError(r, s) -} - -func (r *runningState) destroy() error { - if r.c.runType() == Running { - return newGenericError(fmt.Errorf("container is not destroyed"), ContainerNotStopped) - } - return destroy(r.c) -} - -type createdState struct { - c *linuxContainer -} - -func (i *createdState) status() Status { - return Created -} - -func (i *createdState) transition(s containerState) error { - switch s.(type) { - case *runningState, *pausedState, *stoppedState: - i.c.state = s - return nil - case *createdState: - return nil - } - return newStateTransitionError(i, s) -} - -func (i *createdState) destroy() error { - _ = i.c.initProcess.signal(unix.SIGKILL) - return destroy(i.c) -} - -// pausedState represents a container that is currently pause. It cannot be destroyed in a -// paused state and must transition back to running first. -type pausedState struct { - c *linuxContainer -} - -func (p *pausedState) status() Status { - return Paused -} - -func (p *pausedState) transition(s containerState) error { - switch s.(type) { - case *runningState, *stoppedState: - p.c.state = s - return nil - case *pausedState: - return nil - } - return newStateTransitionError(p, s) -} - -func (p *pausedState) destroy() error { - t := p.c.runType() - if t != Running && t != Created { - if err := p.c.cgroupManager.Freeze(configs.Thawed); err != nil { - return err - } - return destroy(p.c) - } - return newGenericError(fmt.Errorf("container is paused"), ContainerPaused) -} - -// restoredState is the same as the running state but also has associated checkpoint -// information that maybe need destroyed when the container is stopped and destroy is called. -type restoredState struct { - imageDir string - c *linuxContainer -} - -func (r *restoredState) status() Status { - return Running -} - -func (r *restoredState) transition(s containerState) error { - switch s.(type) { - case *stoppedState, *runningState: - return nil - } - return newStateTransitionError(r, s) -} - -func (r *restoredState) destroy() error { - if _, err := os.Stat(filepath.Join(r.c.root, "checkpoint")); err != nil { - if !os.IsNotExist(err) { - return err - } - } - return destroy(r.c) -} - -// loadedState is used whenever a container is restored, loaded, or setting additional -// processes inside and it should not be destroyed when it is exiting. -type loadedState struct { - c *linuxContainer - s Status -} - -func (n *loadedState) status() Status { - return n.s -} - -func (n *loadedState) transition(s containerState) error { - n.c.state = s - return nil -} - -func (n *loadedState) destroy() error { - if err := n.c.refreshState(); err != nil { - return err - } - return n.c.state.destroy() -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go deleted file mode 100644 index fff9dd37af..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/stats_linux.go +++ /dev/null @@ -1,13 +0,0 @@ -package libcontainer - -import ( - "github.com/opencontainers/runc/libcontainer/cgroups" - "github.com/opencontainers/runc/libcontainer/intelrdt" - "github.com/opencontainers/runc/types" -) - -type Stats struct { - Interfaces []*types.NetworkInterface - CgroupStats *cgroups.Stats - IntelRdtStats *intelrdt.Stats -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/sync.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/sync.go deleted file mode 100644 index ac88ad22a8..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/sync.go +++ /dev/null @@ -1,101 +0,0 @@ -package libcontainer - -import ( - "encoding/json" - "errors" - "fmt" - "io" - - "github.com/opencontainers/runc/libcontainer/utils" -) - -type syncType string - -// Constants that are used for synchronisation between the parent and child -// during container setup. They come in pairs (with procError being a generic -// response which is followed by a &genericError). -// -// [ child ] <-> [ parent ] -// -// procHooks --> [run hooks] -// <-- procResume -// -// procReady --> [final setup] -// <-- procRun -const ( - procError syncType = "procError" - procReady syncType = "procReady" - procRun syncType = "procRun" - procHooks syncType = "procHooks" - procResume syncType = "procResume" -) - -type syncT struct { - Type syncType `json:"type"` -} - -// writeSync is used to write to a synchronisation pipe. An error is returned -// if there was a problem writing the payload. -func writeSync(pipe io.Writer, sync syncType) error { - return utils.WriteJSON(pipe, syncT{sync}) -} - -// readSync is used to read from a synchronisation pipe. An error is returned -// if we got a genericError, the pipe was closed, or we got an unexpected flag. -func readSync(pipe io.Reader, expected syncType) error { - var procSync syncT - if err := json.NewDecoder(pipe).Decode(&procSync); err != nil { - if err == io.EOF { - return errors.New("parent closed synchronisation channel") - } - return fmt.Errorf("failed reading error from parent: %v", err) - } - - if procSync.Type == procError { - var ierr genericError - - if err := json.NewDecoder(pipe).Decode(&ierr); err != nil { - return fmt.Errorf("failed reading error from parent: %v", err) - } - - return &ierr - } - - if procSync.Type != expected { - return errors.New("invalid synchronisation flag from parent") - } - return nil -} - -// parseSync runs the given callback function on each syncT received from the -// child. It will return once io.EOF is returned from the given pipe. -func parseSync(pipe io.Reader, fn func(*syncT) error) error { - dec := json.NewDecoder(pipe) - for { - var sync syncT - if err := dec.Decode(&sync); err != nil { - if err == io.EOF { - break - } - return err - } - - // We handle this case outside fn for cleanliness reasons. - var ierr *genericError - if sync.Type == procError { - if err := dec.Decode(&ierr); err != nil && err != io.EOF { - return newSystemErrorWithCause(err, "decoding proc error from init") - } - if ierr != nil { - return ierr - } - // Programmer error. - panic("No error following JSON procError payload.") - } - - if err := fn(&sync); err != nil { - return err - } - } - return nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go deleted file mode 100644 index b9fd0832d5..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/linux.go +++ /dev/null @@ -1,110 +0,0 @@ -// +build linux - -package system - -import ( - "os/exec" - "unsafe" - - "golang.org/x/sys/unix" -) - -type ParentDeathSignal int - -func (p ParentDeathSignal) Restore() error { - if p == 0 { - return nil - } - current, err := GetParentDeathSignal() - if err != nil { - return err - } - if p == current { - return nil - } - return p.Set() -} - -func (p ParentDeathSignal) Set() error { - return SetParentDeathSignal(uintptr(p)) -} - -func Execv(cmd string, args []string, env []string) error { - name, err := exec.LookPath(cmd) - if err != nil { - return err - } - - return Exec(name, args, env) -} - -func Exec(cmd string, args []string, env []string) error { - for { - err := unix.Exec(cmd, args, env) - if err != unix.EINTR { //nolint:errorlint // unix errors are bare - return err - } - } -} - -func Prlimit(pid, resource int, limit unix.Rlimit) error { - _, _, err := unix.RawSyscall6(unix.SYS_PRLIMIT64, uintptr(pid), uintptr(resource), uintptr(unsafe.Pointer(&limit)), uintptr(unsafe.Pointer(&limit)), 0, 0) - if err != 0 { - return err - } - return nil -} - -func SetParentDeathSignal(sig uintptr) error { - if err := unix.Prctl(unix.PR_SET_PDEATHSIG, sig, 0, 0, 0); err != nil { - return err - } - return nil -} - -func GetParentDeathSignal() (ParentDeathSignal, error) { - var sig int - if err := unix.Prctl(unix.PR_GET_PDEATHSIG, uintptr(unsafe.Pointer(&sig)), 0, 0, 0); err != nil { - return -1, err - } - return ParentDeathSignal(sig), nil -} - -func SetKeepCaps() error { - if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 1, 0, 0, 0); err != nil { - return err - } - - return nil -} - -func ClearKeepCaps() error { - if err := unix.Prctl(unix.PR_SET_KEEPCAPS, 0, 0, 0, 0); err != nil { - return err - } - - return nil -} - -func Setctty() error { - if err := unix.IoctlSetInt(0, unix.TIOCSCTTY, 0); err != nil { - return err - } - return nil -} - -// SetSubreaper sets the value i as the subreaper setting for the calling process -func SetSubreaper(i int) error { - return unix.Prctl(unix.PR_SET_CHILD_SUBREAPER, uintptr(i), 0, 0, 0) -} - -// GetSubreaper returns the subreaper setting for the calling process -func GetSubreaper() (int, error) { - var i uintptr - - if err := unix.Prctl(unix.PR_GET_CHILD_SUBREAPER, uintptr(unsafe.Pointer(&i)), 0, 0, 0); err != nil { - return -1, err - } - - return int(i), nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go deleted file mode 100644 index d0407cfe42..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/proc.go +++ /dev/null @@ -1,103 +0,0 @@ -package system - -import ( - "fmt" - "io/ioutil" - "path/filepath" - "strconv" - "strings" -) - -// State is the status of a process. -type State rune - -const ( // Only values for Linux 3.14 and later are listed here - Dead State = 'X' - DiskSleep State = 'D' - Running State = 'R' - Sleeping State = 'S' - Stopped State = 'T' - TracingStop State = 't' - Zombie State = 'Z' -) - -// String forms of the state from proc(5)'s documentation for -// /proc/[pid]/status' "State" field. -func (s State) String() string { - switch s { - case Dead: - return "dead" - case DiskSleep: - return "disk sleep" - case Running: - return "running" - case Sleeping: - return "sleeping" - case Stopped: - return "stopped" - case TracingStop: - return "tracing stop" - case Zombie: - return "zombie" - default: - return fmt.Sprintf("unknown (%c)", s) - } -} - -// Stat_t represents the information from /proc/[pid]/stat, as -// described in proc(5) with names based on the /proc/[pid]/status -// fields. -type Stat_t struct { - // PID is the process ID. - PID uint - - // Name is the command run by the process. - Name string - - // State is the state of the process. - State State - - // StartTime is the number of clock ticks after system boot (since - // Linux 2.6). - StartTime uint64 -} - -// Stat returns a Stat_t instance for the specified process. -func Stat(pid int) (stat Stat_t, err error) { - bytes, err := ioutil.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "stat")) - if err != nil { - return stat, err - } - return parseStat(string(bytes)) -} - -func parseStat(data string) (stat Stat_t, err error) { - // From proc(5), field 2 could contain space and is inside `(` and `)`. - // The following is an example: - // 89653 (gunicorn: maste) S 89630 89653 89653 0 -1 4194560 29689 28896 0 3 146 32 76 19 20 0 1 0 2971844 52965376 3920 18446744073709551615 1 1 0 0 0 0 0 16781312 137447943 0 0 0 17 1 0 0 0 0 0 0 0 0 0 0 0 0 0 - i := strings.LastIndex(data, ")") - if i <= 2 || i >= len(data)-1 { - return stat, fmt.Errorf("invalid stat data: %q", data) - } - - parts := strings.SplitN(data[:i], "(", 2) - if len(parts) != 2 { - return stat, fmt.Errorf("invalid stat data: %q", data) - } - - stat.Name = parts[1] - _, err = fmt.Sscanf(parts[0], "%d", &stat.PID) - if err != nil { - return stat, err - } - - // parts indexes should be offset by 3 from the field number given - // proc(5), because parts is zero-indexed and we've removed fields - // one (PID) and two (Name) in the paren-split. - parts = strings.Split(data[i+2:], " ") - var state int - fmt.Sscanf(parts[3-3], "%c", &state) //nolint:staticcheck // "3-3" is more readable in this context. - stat.State = State(state) - fmt.Sscanf(parts[22-3], "%d", &stat.StartTime) - return stat, nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_32.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_32.go deleted file mode 100644 index c5ca5d8623..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_32.go +++ /dev/null @@ -1,26 +0,0 @@ -// +build linux -// +build 386 arm - -package system - -import ( - "golang.org/x/sys/unix" -) - -// Setuid sets the uid of the calling thread to the specified uid. -func Setuid(uid int) (err error) { - _, _, e1 := unix.RawSyscall(unix.SYS_SETUID32, uintptr(uid), 0, 0) - if e1 != 0 { - err = e1 - } - return -} - -// Setgid sets the gid of the calling thread to the specified gid. -func Setgid(gid int) (err error) { - _, _, e1 := unix.RawSyscall(unix.SYS_SETGID32, uintptr(gid), 0, 0) - if e1 != 0 { - err = e1 - } - return -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go deleted file mode 100644 index e05e30adc3..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/syscall_linux_64.go +++ /dev/null @@ -1,26 +0,0 @@ -// +build linux -// +build arm64 amd64 mips mipsle mips64 mips64le ppc ppc64 ppc64le riscv64 s390x - -package system - -import ( - "golang.org/x/sys/unix" -) - -// Setuid sets the uid of the calling thread to the specified uid. -func Setuid(uid int) (err error) { - _, _, e1 := unix.RawSyscall(unix.SYS_SETUID, uintptr(uid), 0, 0) - if e1 != 0 { - err = e1 - } - return -} - -// Setgid sets the gid of the calling thread to the specified gid. -func Setgid(gid int) (err error) { - _, _, e1 := unix.RawSyscall(unix.SYS_SETGID, uintptr(gid), 0, 0) - if e1 != 0 { - err = e1 - } - return -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go deleted file mode 100644 index a6823fc99b..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/system/xattrs_linux.go +++ /dev/null @@ -1,35 +0,0 @@ -package system - -import "golang.org/x/sys/unix" - -// Returns a []byte slice if the xattr is set and nil otherwise -// Requires path and its attribute as arguments -func Lgetxattr(path string, attr string) ([]byte, error) { - var sz int - // Start with a 128 length byte array - dest := make([]byte, 128) - sz, errno := unix.Lgetxattr(path, attr, dest) - - switch { - case errno == unix.ENODATA: - return nil, errno - case errno == unix.ENOTSUP: - return nil, errno - case errno == unix.ERANGE: - // 128 byte array might just not be good enough, - // A dummy buffer is used to get the real size - // of the xattrs on disk - sz, errno = unix.Lgetxattr(path, attr, []byte{}) - if errno != nil { - return nil, errno - } - dest = make([]byte, sz) - sz, errno = unix.Lgetxattr(path, attr, dest) - if errno != nil { - return nil, errno - } - case errno != nil: - return nil, errno - } - return dest[:sz], nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go deleted file mode 100644 index c8a9364d54..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/utils/cmsg.go +++ /dev/null @@ -1,93 +0,0 @@ -// +build linux - -package utils - -/* - * Copyright 2016, 2017 SUSE LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import ( - "fmt" - "os" - - "golang.org/x/sys/unix" -) - -// MaxSendfdLen is the maximum length of the name of a file descriptor being -// sent using SendFd. The name of the file handle returned by RecvFd will never -// be larger than this value. -const MaxNameLen = 4096 - -// oobSpace is the size of the oob slice required to store a single FD. Note -// that unix.UnixRights appears to make the assumption that fd is always int32, -// so sizeof(fd) = 4. -var oobSpace = unix.CmsgSpace(4) - -// RecvFd waits for a file descriptor to be sent over the given AF_UNIX -// socket. The file name of the remote file descriptor will be recreated -// locally (it is sent as non-auxiliary data in the same payload). -func RecvFd(socket *os.File) (*os.File, error) { - // For some reason, unix.Recvmsg uses the length rather than the capacity - // when passing the msg_controllen and other attributes to recvmsg. So we - // have to actually set the length. - name := make([]byte, MaxNameLen) - oob := make([]byte, oobSpace) - - sockfd := socket.Fd() - n, oobn, _, _, err := unix.Recvmsg(int(sockfd), name, oob, 0) - if err != nil { - return nil, err - } - - if n >= MaxNameLen || oobn != oobSpace { - return nil, fmt.Errorf("recvfd: incorrect number of bytes read (n=%d oobn=%d)", n, oobn) - } - - // Truncate. - name = name[:n] - oob = oob[:oobn] - - scms, err := unix.ParseSocketControlMessage(oob) - if err != nil { - return nil, err - } - if len(scms) != 1 { - return nil, fmt.Errorf("recvfd: number of SCMs is not 1: %d", len(scms)) - } - scm := scms[0] - - fds, err := unix.ParseUnixRights(&scm) - if err != nil { - return nil, err - } - if len(fds) != 1 { - return nil, fmt.Errorf("recvfd: number of fds is not 1: %d", len(fds)) - } - fd := uintptr(fds[0]) - - return os.NewFile(fd, string(name)), nil -} - -// SendFd sends a file descriptor over the given AF_UNIX socket. In -// addition, the file.Name() of the given file will also be sent as -// non-auxiliary data in the same payload (allowing to send contextual -// information for a file descriptor). -func SendFd(socket *os.File, name string, fd uintptr) error { - if len(name) >= MaxNameLen { - return fmt.Errorf("sendfd: filename too long: %s", name) - } - oob := unix.UnixRights(int(fd)) - return unix.Sendmsg(int(socket.Fd()), []byte(name), oob, nil, 0) -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go deleted file mode 100644 index cd78f23e1b..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/utils/utils.go +++ /dev/null @@ -1,177 +0,0 @@ -package utils - -import ( - "encoding/binary" - "encoding/json" - "fmt" - "io" - "os" - "path/filepath" - "strconv" - "strings" - "unsafe" - - "github.com/cyphar/filepath-securejoin" - "golang.org/x/sys/unix" -) - -const ( - exitSignalOffset = 128 -) - -// NativeEndian is the native byte order of the host system. -var NativeEndian binary.ByteOrder - -func init() { - // Copied from . - i := uint32(1) - b := (*[4]byte)(unsafe.Pointer(&i)) - if b[0] == 1 { - NativeEndian = binary.LittleEndian - } else { - NativeEndian = binary.BigEndian - } -} - -// ResolveRootfs ensures that the current working directory is -// not a symlink and returns the absolute path to the rootfs -func ResolveRootfs(uncleanRootfs string) (string, error) { - rootfs, err := filepath.Abs(uncleanRootfs) - if err != nil { - return "", err - } - return filepath.EvalSymlinks(rootfs) -} - -// ExitStatus returns the correct exit status for a process based on if it -// was signaled or exited cleanly -func ExitStatus(status unix.WaitStatus) int { - if status.Signaled() { - return exitSignalOffset + int(status.Signal()) - } - return status.ExitStatus() -} - -// WriteJSON writes the provided struct v to w using standard json marshaling -func WriteJSON(w io.Writer, v interface{}) error { - data, err := json.Marshal(v) - if err != nil { - return err - } - _, err = w.Write(data) - return err -} - -// CleanPath makes a path safe for use with filepath.Join. This is done by not -// only cleaning the path, but also (if the path is relative) adding a leading -// '/' and cleaning it (then removing the leading '/'). This ensures that a -// path resulting from prepending another path will always resolve to lexically -// be a subdirectory of the prefixed path. This is all done lexically, so paths -// that include symlinks won't be safe as a result of using CleanPath. -func CleanPath(path string) string { - // Deal with empty strings nicely. - if path == "" { - return "" - } - - // Ensure that all paths are cleaned (especially problematic ones like - // "/../../../../../" which can cause lots of issues). - path = filepath.Clean(path) - - // If the path isn't absolute, we need to do more processing to fix paths - // such as "../../../..//some/path". We also shouldn't convert absolute - // paths to relative ones. - if !filepath.IsAbs(path) { - path = filepath.Clean(string(os.PathSeparator) + path) - // This can't fail, as (by definition) all paths are relative to root. - path, _ = filepath.Rel(string(os.PathSeparator), path) - } - - // Clean the path again for good measure. - return filepath.Clean(path) -} - -// stripRoot returns the passed path, stripping the root path if it was -// (lexicially) inside it. Note that both passed paths will always be treated -// as absolute, and the returned path will also always be absolute. In -// addition, the paths are cleaned before stripping the root. -func stripRoot(root, path string) string { - // Make the paths clean and absolute. - root, path = CleanPath("/"+root), CleanPath("/"+path) - switch { - case path == root: - path = "/" - case root == "/": - // do nothing - case strings.HasPrefix(path, root+"/"): - path = strings.TrimPrefix(path, root+"/") - } - return CleanPath("/" + path) -} - -// WithProcfd runs the passed closure with a procfd path (/proc/self/fd/...) -// corresponding to the unsafePath resolved within the root. Before passing the -// fd, this path is verified to have been inside the root -- so operating on it -// through the passed fdpath should be safe. Do not access this path through -// the original path strings, and do not attempt to use the pathname outside of -// the passed closure (the file handle will be freed once the closure returns). -func WithProcfd(root, unsafePath string, fn func(procfd string) error) error { - // Remove the root then forcefully resolve inside the root. - unsafePath = stripRoot(root, unsafePath) - path, err := securejoin.SecureJoin(root, unsafePath) - if err != nil { - return fmt.Errorf("resolving path inside rootfs failed: %v", err) - } - - // Open the target path. - fh, err := os.OpenFile(path, unix.O_PATH|unix.O_CLOEXEC, 0) - if err != nil { - return fmt.Errorf("open o_path procfd: %w", err) - } - defer fh.Close() - - // Double-check the path is the one we expected. - procfd := "/proc/self/fd/" + strconv.Itoa(int(fh.Fd())) - if realpath, err := os.Readlink(procfd); err != nil { - return fmt.Errorf("procfd verification failed: %w", err) - } else if realpath != path { - return fmt.Errorf("possibly malicious path detected -- refusing to operate on %s", realpath) - } - - // Run the closure. - return fn(procfd) -} - -// SearchLabels searches a list of key-value pairs for the provided key and -// returns the corresponding value. The pairs must be separated with '='. -func SearchLabels(labels []string, query string) string { - for _, l := range labels { - parts := strings.SplitN(l, "=", 2) - if len(parts) < 2 { - continue - } - if parts[0] == query { - return parts[1] - } - } - return "" -} - -// Annotations returns the bundle path and user defined annotations from the -// libcontainer state. We need to remove the bundle because that is a label -// added by libcontainer. -func Annotations(labels []string) (bundle string, userAnnotations map[string]string) { - userAnnotations = make(map[string]string) - for _, l := range labels { - parts := strings.SplitN(l, "=", 2) - if len(parts) < 2 { - continue - } - if parts[0] == "bundle" { - bundle = parts[1] - } else { - userAnnotations[parts[0]] = parts[1] - } - } - return -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go b/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go deleted file mode 100644 index 1576f2d4ab..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go +++ /dev/null @@ -1,68 +0,0 @@ -// +build !windows - -package utils - -import ( - "fmt" - "os" - "strconv" - - "golang.org/x/sys/unix" -) - -// EnsureProcHandle returns whether or not the given file handle is on procfs. -func EnsureProcHandle(fh *os.File) error { - var buf unix.Statfs_t - if err := unix.Fstatfs(int(fh.Fd()), &buf); err != nil { - return fmt.Errorf("ensure %s is on procfs: %v", fh.Name(), err) - } - if buf.Type != unix.PROC_SUPER_MAGIC { - return fmt.Errorf("%s is not on procfs", fh.Name()) - } - return nil -} - -// CloseExecFrom applies O_CLOEXEC to all file descriptors currently open for -// the process (except for those below the given fd value). -func CloseExecFrom(minFd int) error { - fdDir, err := os.Open("/proc/self/fd") - if err != nil { - return err - } - defer fdDir.Close() - - if err := EnsureProcHandle(fdDir); err != nil { - return err - } - - fdList, err := fdDir.Readdirnames(-1) - if err != nil { - return err - } - for _, fdStr := range fdList { - fd, err := strconv.Atoi(fdStr) - // Ignore non-numeric file names. - if err != nil { - continue - } - // Ignore descriptors lower than our specified minimum. - if fd < minFd { - continue - } - // Intentionally ignore errors from unix.CloseOnExec -- the cases where - // this might fail are basically file descriptors that have already - // been closed (including and especially the one that was created when - // ioutil.ReadDir did the "opendir" syscall). - unix.CloseOnExec(fd) - } - return nil -} - -// NewSockPair returns a new unix socket pair -func NewSockPair(name string) (parent *os.File, child *os.File, err error) { - fds, err := unix.Socketpair(unix.AF_LOCAL, unix.SOCK_STREAM|unix.SOCK_CLOEXEC, 0) - if err != nil { - return nil, nil, err - } - return os.NewFile(uintptr(fds[1]), name+"-p"), os.NewFile(uintptr(fds[0]), name+"-c"), nil -} diff --git a/src/runtime/vendor/github.com/opencontainers/runc/types/events.go b/src/runtime/vendor/github.com/opencontainers/runc/types/events.go deleted file mode 100644 index 81bde829da..0000000000 --- a/src/runtime/vendor/github.com/opencontainers/runc/types/events.go +++ /dev/null @@ -1,155 +0,0 @@ -package types - -import "github.com/opencontainers/runc/libcontainer/intelrdt" - -// Event struct for encoding the event data to json. -type Event struct { - Type string `json:"type"` - ID string `json:"id"` - Data interface{} `json:"data,omitempty"` -} - -// stats is the runc specific stats structure for stability when encoding and decoding stats. -type Stats struct { - CPU Cpu `json:"cpu"` - CPUSet CPUSet `json:"cpuset"` - Memory Memory `json:"memory"` - Pids Pids `json:"pids"` - Blkio Blkio `json:"blkio"` - Hugetlb map[string]Hugetlb `json:"hugetlb"` - IntelRdt IntelRdt `json:"intel_rdt"` - NetworkInterfaces []*NetworkInterface `json:"network_interfaces"` -} - -type Hugetlb struct { - Usage uint64 `json:"usage,omitempty"` - Max uint64 `json:"max,omitempty"` - Failcnt uint64 `json:"failcnt"` -} - -type BlkioEntry struct { - Major uint64 `json:"major,omitempty"` - Minor uint64 `json:"minor,omitempty"` - Op string `json:"op,omitempty"` - Value uint64 `json:"value,omitempty"` -} - -type Blkio struct { - IoServiceBytesRecursive []BlkioEntry `json:"ioServiceBytesRecursive,omitempty"` - IoServicedRecursive []BlkioEntry `json:"ioServicedRecursive,omitempty"` - IoQueuedRecursive []BlkioEntry `json:"ioQueueRecursive,omitempty"` - IoServiceTimeRecursive []BlkioEntry `json:"ioServiceTimeRecursive,omitempty"` - IoWaitTimeRecursive []BlkioEntry `json:"ioWaitTimeRecursive,omitempty"` - IoMergedRecursive []BlkioEntry `json:"ioMergedRecursive,omitempty"` - IoTimeRecursive []BlkioEntry `json:"ioTimeRecursive,omitempty"` - SectorsRecursive []BlkioEntry `json:"sectorsRecursive,omitempty"` -} - -type Pids struct { - Current uint64 `json:"current,omitempty"` - Limit uint64 `json:"limit,omitempty"` -} - -type Throttling struct { - Periods uint64 `json:"periods,omitempty"` - ThrottledPeriods uint64 `json:"throttledPeriods,omitempty"` - ThrottledTime uint64 `json:"throttledTime,omitempty"` -} - -type CpuUsage struct { - // Units: nanoseconds. - Total uint64 `json:"total,omitempty"` - Percpu []uint64 `json:"percpu,omitempty"` - PercpuKernel []uint64 `json:"percpu_kernel,omitempty"` - PercpuUser []uint64 `json:"percpu_user,omitempty"` - Kernel uint64 `json:"kernel"` - User uint64 `json:"user"` -} - -type Cpu struct { - Usage CpuUsage `json:"usage,omitempty"` - Throttling Throttling `json:"throttling,omitempty"` -} - -type CPUSet struct { - CPUs []uint16 `json:"cpus,omitempty"` - CPUExclusive uint64 `json:"cpu_exclusive"` - Mems []uint16 `json:"mems,omitempty"` - MemHardwall uint64 `json:"mem_hardwall"` - MemExclusive uint64 `json:"mem_exclusive"` - MemoryMigrate uint64 `json:"memory_migrate"` - MemorySpreadPage uint64 `json:"memory_spread_page"` - MemorySpreadSlab uint64 `json:"memory_spread_slab"` - MemoryPressure uint64 `json:"memory_pressure"` - SchedLoadBalance uint64 `json:"sched_load_balance"` - SchedRelaxDomainLevel int64 `json:"sched_relax_domain_level"` -} - -type MemoryEntry struct { - Limit uint64 `json:"limit"` - Usage uint64 `json:"usage,omitempty"` - Max uint64 `json:"max,omitempty"` - Failcnt uint64 `json:"failcnt"` -} - -type Memory struct { - Cache uint64 `json:"cache,omitempty"` - Usage MemoryEntry `json:"usage,omitempty"` - Swap MemoryEntry `json:"swap,omitempty"` - Kernel MemoryEntry `json:"kernel,omitempty"` - KernelTCP MemoryEntry `json:"kernelTCP,omitempty"` - Raw map[string]uint64 `json:"raw,omitempty"` -} - -type L3CacheInfo struct { - CbmMask string `json:"cbm_mask,omitempty"` - MinCbmBits uint64 `json:"min_cbm_bits,omitempty"` - NumClosids uint64 `json:"num_closids,omitempty"` -} - -type MemBwInfo struct { - BandwidthGran uint64 `json:"bandwidth_gran,omitempty"` - DelayLinear uint64 `json:"delay_linear,omitempty"` - MinBandwidth uint64 `json:"min_bandwidth,omitempty"` - NumClosids uint64 `json:"num_closids,omitempty"` -} - -type IntelRdt struct { - // The read-only L3 cache information - L3CacheInfo *L3CacheInfo `json:"l3_cache_info,omitempty"` - - // The read-only L3 cache schema in root - L3CacheSchemaRoot string `json:"l3_cache_schema_root,omitempty"` - - // The L3 cache schema in 'container_id' group - L3CacheSchema string `json:"l3_cache_schema,omitempty"` - - // The read-only memory bandwidth information - MemBwInfo *MemBwInfo `json:"mem_bw_info,omitempty"` - - // The read-only memory bandwidth schema in root - MemBwSchemaRoot string `json:"mem_bw_schema_root,omitempty"` - - // The memory bandwidth schema in 'container_id' group - MemBwSchema string `json:"mem_bw_schema,omitempty"` - - // The memory bandwidth monitoring statistics from NUMA nodes in 'container_id' group - MBMStats *[]intelrdt.MBMNumaNodeStats `json:"mbm_stats,omitempty"` - - // The cache monitoring technology statistics from NUMA nodes in 'container_id' group - CMTStats *[]intelrdt.CMTNumaNodeStats `json:"cmt_stats,omitempty"` -} - -type NetworkInterface struct { - // Name is the name of the network interface. - Name string - - RxBytes uint64 - RxPackets uint64 - RxErrors uint64 - RxDropped uint64 - TxBytes uint64 - TxPackets uint64 - TxErrors uint64 - TxDropped uint64 -} diff --git a/src/runtime/vendor/github.com/seccomp/libseccomp-golang/.gitignore b/src/runtime/vendor/github.com/seccomp/libseccomp-golang/.gitignore deleted file mode 100644 index b4826968b8..0000000000 --- a/src/runtime/vendor/github.com/seccomp/libseccomp-golang/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -*~ -*.swp -*.orig -tags diff --git a/src/runtime/vendor/github.com/seccomp/libseccomp-golang/CHANGELOG b/src/runtime/vendor/github.com/seccomp/libseccomp-golang/CHANGELOG deleted file mode 100644 index a01d9a722d..0000000000 --- a/src/runtime/vendor/github.com/seccomp/libseccomp-golang/CHANGELOG +++ /dev/null @@ -1,17 +0,0 @@ -libseccomp-golang: Releases -=============================================================================== -https://github.com/seccomp/libseccomp-golang - -* Version 0.9.1 - May 21, 2019 -- Minimum supported version of libseccomp bumped to v2.2.0 -- Use Libseccomp's `seccomp_version` API to retrieve library version -- Unconditionally set TSync attribute for filters, due to Go's heavily threaded nature -- Fix CVE-2017-18367 - Multiple syscall arguments were incorrectly combined with logical-OR, instead of logical-AND -- Fix a failure to build on Debian-based distributions due to CGo code -- Fix unit test failures on 32-bit architectures -- Improve several errors to be more verbose about their causes -- Add support for SCMP_ACT_LOG (with libseccomp versions 2.4.x and higher), permitting syscalls but logging their execution -- Add support for SCMP_FLTATR_CTL_LOG (with libseccomp versions 2.4.x and higher), logging not-allowed actions when they are denied - -* Version 0.9.0 - January 5, 2017 -- Initial tagged release diff --git a/src/runtime/vendor/github.com/seccomp/libseccomp-golang/LICENSE b/src/runtime/vendor/github.com/seccomp/libseccomp-golang/LICENSE deleted file mode 100644 index 81cf60de29..0000000000 --- a/src/runtime/vendor/github.com/seccomp/libseccomp-golang/LICENSE +++ /dev/null @@ -1,22 +0,0 @@ -Copyright (c) 2015 Matthew Heon -Copyright (c) 2015 Paul Moore -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: -- Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. -- Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/runtime/vendor/github.com/seccomp/libseccomp-golang/Makefile b/src/runtime/vendor/github.com/seccomp/libseccomp-golang/Makefile deleted file mode 100644 index 1ff4cc8985..0000000000 --- a/src/runtime/vendor/github.com/seccomp/libseccomp-golang/Makefile +++ /dev/null @@ -1,26 +0,0 @@ -# libseccomp-golang - -.PHONY: all check check-build check-syntax fix-syntax vet test lint - -all: check-build - -check: vet test - -check-build: - go build - -check-syntax: - gofmt -d . - -fix-syntax: - gofmt -w . - -vet: - go vet -v - -test: - go test -v - -lint: - @$(if $(shell which golint),true,$(error "install golint and include it in your PATH")) - golint -set_exit_status diff --git a/src/runtime/vendor/github.com/seccomp/libseccomp-golang/README b/src/runtime/vendor/github.com/seccomp/libseccomp-golang/README deleted file mode 100644 index 66839a4668..0000000000 --- a/src/runtime/vendor/github.com/seccomp/libseccomp-golang/README +++ /dev/null @@ -1,51 +0,0 @@ -libseccomp-golang: Go Language Bindings for the libseccomp Project -=============================================================================== -https://github.com/seccomp/libseccomp-golang -https://github.com/seccomp/libseccomp - -The libseccomp library provides an easy to use, platform independent, interface -to the Linux Kernel's syscall filtering mechanism. The libseccomp API is -designed to abstract away the underlying BPF based syscall filter language and -present a more conventional function-call based filtering interface that should -be familiar to, and easily adopted by, application developers. - -The libseccomp-golang library provides a Go based interface to the libseccomp -library. - -* Online Resources - -The library source repository currently lives on GitHub at the following URLs: - - -> https://github.com/seccomp/libseccomp-golang - -> https://github.com/seccomp/libseccomp - -The project mailing list is currently hosted on Google Groups at the URL below, -please note that a Google account is not required to subscribe to the mailing -list. - - -> https://groups.google.com/d/forum/libseccomp - -Documentation is also available at: - - -> https://godoc.org/github.com/seccomp/libseccomp-golang - -* Installing the package - -The libseccomp-golang bindings require at least Go v1.2.1 and GCC v4.8.4; -earlier versions may yield unpredictable results. If you meet these -requirements you can install this package using the command below: - - $ go get github.com/seccomp/libseccomp-golang - -* Testing the Library - -A number of tests and lint related recipes are provided in the Makefile, if -you want to run the standard regression tests, you can excute the following: - - $ make check - -In order to execute the 'make lint' recipe the 'golint' tool is needed, it -can be found at: - - -> https://github.com/golang/lint - diff --git a/src/runtime/vendor/github.com/seccomp/libseccomp-golang/SUBMITTING_PATCHES b/src/runtime/vendor/github.com/seccomp/libseccomp-golang/SUBMITTING_PATCHES deleted file mode 100644 index 744e5cd64f..0000000000 --- a/src/runtime/vendor/github.com/seccomp/libseccomp-golang/SUBMITTING_PATCHES +++ /dev/null @@ -1,112 +0,0 @@ -How to Submit Patches to the libseccomp Project -=============================================================================== -https://github.com/seccomp/libseccomp-golang - -This document is intended to act as a guide to help you contribute to the -libseccomp project. It is not perfect, and there will always be exceptions -to the rules described here, but by following the instructions below you -should have a much easier time getting your work merged with the upstream -project. - -* Test Your Code - -There are two possible tests you can run to verify your code. The first test -is used to check the formatting and coding style of your changes, you can run -the test with the following command: - - # make check-syntax - -... if there are any problems with your changes a diff/patch will be shown -which indicates the problems and how to fix them. - -The second possible test is used to ensure the sanity of your code changes -and to test these changes against the included tests. You can run the test -with the following command: - - # make check - -... if there are any faults or errors they will be displayed. - -* Generate the Patch(es) - -Depending on how you decided to work with the libseccomp code base and what -tools you are using there are different ways to generate your patch(es). -However, regardless of what tools you use, you should always generate your -patches using the "unified" diff/patch format and the patches should always -apply to the libseccomp source tree using the following command from the top -directory of the libseccomp sources: - - # patch -p1 < changes.patch - -If you are not using git, stacked git (stgit), or some other tool which can -generate patch files for you automatically, you may find the following command -helpful in generating patches, where "libseccomp.orig/" is the unmodified -source code directory and "libseccomp/" is the source code directory with your -changes: - - # diff -purN libseccomp-golang.orig/ libseccomp-golang/ - -When in doubt please generate your patch and try applying it to an unmodified -copy of the libseccomp sources; if it fails for you, it will fail for the rest -of us. - -* Explain Your Work - -At the top of every patch you should include a description of the problem you -are trying to solve, how you solved it, and why you chose the solution you -implemented. If you are submitting a bug fix, it is also incredibly helpful -if you can describe/include a reproducer for the problem in the description as -well as instructions on how to test for the bug and verify that it has been -fixed. - -* Sign Your Work - -The sign-off is a simple line at the end of the patch description, which -certifies that you wrote it or otherwise have the right to pass it on as an -open-source patch. The "Developer's Certificate of Origin" pledge is taken -from the Linux Kernel and the rules are pretty simple: - - Developer's Certificate of Origin 1.1 - - By making a contribution to this project, I certify that: - - (a) The contribution was created in whole or in part by me and I - have the right to submit it under the open source license - indicated in the file; or - - (b) The contribution is based upon previous work that, to the best - of my knowledge, is covered under an appropriate open source - license and I have the right under that license to submit that - work with modifications, whether created in whole or in part - by me, under the same open source license (unless I am - permitted to submit under a different license), as indicated - in the file; or - - (c) The contribution was provided directly to me by some other - person who certified (a), (b) or (c) and I have not modified - it. - - (d) I understand and agree that this project and the contribution - are public and that a record of the contribution (including all - personal information I submit with it, including my sign-off) is - maintained indefinitely and may be redistributed consistent with - this project or the open source license(s) involved. - -... then you just add a line to the bottom of your patch description, with -your real name, saying: - - Signed-off-by: Random J Developer - -* Email Your Patch(es) - -Finally, you will need to email your patches to the mailing list so they can -be reviewed and potentially merged into the main libseccomp-golang repository. -When sending patches to the mailing list it is important to send your email in -text form, no HTML mail please, and ensure that your email client does not -mangle your patches. It should be possible to save your raw email to disk and -apply it directly to the libseccomp source code; if that fails then you likely -have a problem with your email client. When in doubt try a test first by -sending yourself an email with your patch and attempting to apply the emailed -patch to the libseccomp-golang repository; if it fails for you, it will fail -for the rest of us trying to test your patch and include it in the main -libseccomp-golang repository. diff --git a/src/runtime/vendor/github.com/seccomp/libseccomp-golang/seccomp.go b/src/runtime/vendor/github.com/seccomp/libseccomp-golang/seccomp.go deleted file mode 100644 index a3cc53822c..0000000000 --- a/src/runtime/vendor/github.com/seccomp/libseccomp-golang/seccomp.go +++ /dev/null @@ -1,935 +0,0 @@ -// +build linux - -// Public API specification for libseccomp Go bindings -// Contains public API for the bindings - -// Package seccomp provides bindings for libseccomp, a library wrapping the Linux -// seccomp syscall. Seccomp enables an application to restrict system call use -// for itself and its children. -package seccomp - -import ( - "fmt" - "os" - "runtime" - "strings" - "sync" - "syscall" - "unsafe" -) - -// C wrapping code - -// #cgo pkg-config: libseccomp -// #include -// #include -import "C" - -// Exported types - -// VersionError denotes that the system libseccomp version is incompatible -// with this package. -type VersionError struct { - message string - minimum string -} - -func (e VersionError) Error() string { - format := "Libseccomp version too low: " - if e.message != "" { - format += e.message + ": " - } - format += "minimum supported is " - if e.minimum != "" { - format += e.minimum + ": " - } else { - format += "2.2.0: " - } - format += "detected %d.%d.%d" - return fmt.Sprintf(format, verMajor, verMinor, verMicro) -} - -// ScmpArch represents a CPU architecture. Seccomp can restrict syscalls on a -// per-architecture basis. -type ScmpArch uint - -// ScmpAction represents an action to be taken on a filter rule match in -// libseccomp -type ScmpAction uint - -// ScmpCompareOp represents a comparison operator which can be used in a filter -// rule -type ScmpCompareOp uint - -// ScmpCondition represents a rule in a libseccomp filter context -type ScmpCondition struct { - Argument uint `json:"argument,omitempty"` - Op ScmpCompareOp `json:"operator,omitempty"` - Operand1 uint64 `json:"operand_one,omitempty"` - Operand2 uint64 `json:"operand_two,omitempty"` -} - -// ScmpSyscall represents a Linux System Call -type ScmpSyscall int32 - -// Exported Constants - -const ( - // Valid architectures recognized by libseccomp - // PowerPC and S390(x) architectures are unavailable below library version - // v2.3.0 and will returns errors if used with incompatible libraries - - // ArchInvalid is a placeholder to ensure uninitialized ScmpArch - // variables are invalid - ArchInvalid ScmpArch = iota - // ArchNative is the native architecture of the kernel - ArchNative ScmpArch = iota - // ArchX86 represents 32-bit x86 syscalls - ArchX86 ScmpArch = iota - // ArchAMD64 represents 64-bit x86-64 syscalls - ArchAMD64 ScmpArch = iota - // ArchX32 represents 64-bit x86-64 syscalls (32-bit pointers) - ArchX32 ScmpArch = iota - // ArchARM represents 32-bit ARM syscalls - ArchARM ScmpArch = iota - // ArchARM64 represents 64-bit ARM syscalls - ArchARM64 ScmpArch = iota - // ArchMIPS represents 32-bit MIPS syscalls - ArchMIPS ScmpArch = iota - // ArchMIPS64 represents 64-bit MIPS syscalls - ArchMIPS64 ScmpArch = iota - // ArchMIPS64N32 represents 64-bit MIPS syscalls (32-bit pointers) - ArchMIPS64N32 ScmpArch = iota - // ArchMIPSEL represents 32-bit MIPS syscalls (little endian) - ArchMIPSEL ScmpArch = iota - // ArchMIPSEL64 represents 64-bit MIPS syscalls (little endian) - ArchMIPSEL64 ScmpArch = iota - // ArchMIPSEL64N32 represents 64-bit MIPS syscalls (little endian, - // 32-bit pointers) - ArchMIPSEL64N32 ScmpArch = iota - // ArchPPC represents 32-bit POWERPC syscalls - ArchPPC ScmpArch = iota - // ArchPPC64 represents 64-bit POWER syscalls (big endian) - ArchPPC64 ScmpArch = iota - // ArchPPC64LE represents 64-bit POWER syscalls (little endian) - ArchPPC64LE ScmpArch = iota - // ArchS390 represents 31-bit System z/390 syscalls - ArchS390 ScmpArch = iota - // ArchS390X represents 64-bit System z/390 syscalls - ArchS390X ScmpArch = iota -) - -const ( - // Supported actions on filter match - - // ActInvalid is a placeholder to ensure uninitialized ScmpAction - // variables are invalid - ActInvalid ScmpAction = iota - // ActKill kills the process - ActKill ScmpAction = iota - // ActTrap throws SIGSYS - ActTrap ScmpAction = iota - // ActErrno causes the syscall to return a negative error code. This - // code can be set with the SetReturnCode method - ActErrno ScmpAction = iota - // ActTrace causes the syscall to notify tracing processes with the - // given error code. This code can be set with the SetReturnCode method - ActTrace ScmpAction = iota - // ActAllow permits the syscall to continue execution - ActAllow ScmpAction = iota - // ActLog permits the syscall to continue execution after logging it. - // This action is only usable when libseccomp API level 3 or higher is - // supported. - ActLog ScmpAction = iota -) - -const ( - // These are comparison operators used in conditional seccomp rules - // They are used to compare the value of a single argument of a syscall - // against a user-defined constant - - // CompareInvalid is a placeholder to ensure uninitialized ScmpCompareOp - // variables are invalid - CompareInvalid ScmpCompareOp = iota - // CompareNotEqual returns true if the argument is not equal to the - // given value - CompareNotEqual ScmpCompareOp = iota - // CompareLess returns true if the argument is less than the given value - CompareLess ScmpCompareOp = iota - // CompareLessOrEqual returns true if the argument is less than or equal - // to the given value - CompareLessOrEqual ScmpCompareOp = iota - // CompareEqual returns true if the argument is equal to the given value - CompareEqual ScmpCompareOp = iota - // CompareGreaterEqual returns true if the argument is greater than or - // equal to the given value - CompareGreaterEqual ScmpCompareOp = iota - // CompareGreater returns true if the argument is greater than the given - // value - CompareGreater ScmpCompareOp = iota - // CompareMaskedEqual returns true if the argument is equal to the given - // value, when masked (bitwise &) against the second given value - CompareMaskedEqual ScmpCompareOp = iota -) - -// Helpers for types - -// GetArchFromString returns an ScmpArch constant from a string representing an -// architecture -func GetArchFromString(arch string) (ScmpArch, error) { - if err := ensureSupportedVersion(); err != nil { - return ArchInvalid, err - } - - switch strings.ToLower(arch) { - case "x86": - return ArchX86, nil - case "amd64", "x86-64", "x86_64", "x64": - return ArchAMD64, nil - case "x32": - return ArchX32, nil - case "arm": - return ArchARM, nil - case "arm64", "aarch64": - return ArchARM64, nil - case "mips": - return ArchMIPS, nil - case "mips64": - return ArchMIPS64, nil - case "mips64n32": - return ArchMIPS64N32, nil - case "mipsel": - return ArchMIPSEL, nil - case "mipsel64": - return ArchMIPSEL64, nil - case "mipsel64n32": - return ArchMIPSEL64N32, nil - case "ppc": - return ArchPPC, nil - case "ppc64": - return ArchPPC64, nil - case "ppc64le": - return ArchPPC64LE, nil - case "s390": - return ArchS390, nil - case "s390x": - return ArchS390X, nil - default: - return ArchInvalid, fmt.Errorf("cannot convert unrecognized string %q", arch) - } -} - -// String returns a string representation of an architecture constant -func (a ScmpArch) String() string { - switch a { - case ArchX86: - return "x86" - case ArchAMD64: - return "amd64" - case ArchX32: - return "x32" - case ArchARM: - return "arm" - case ArchARM64: - return "arm64" - case ArchMIPS: - return "mips" - case ArchMIPS64: - return "mips64" - case ArchMIPS64N32: - return "mips64n32" - case ArchMIPSEL: - return "mipsel" - case ArchMIPSEL64: - return "mipsel64" - case ArchMIPSEL64N32: - return "mipsel64n32" - case ArchPPC: - return "ppc" - case ArchPPC64: - return "ppc64" - case ArchPPC64LE: - return "ppc64le" - case ArchS390: - return "s390" - case ArchS390X: - return "s390x" - case ArchNative: - return "native" - case ArchInvalid: - return "Invalid architecture" - default: - return fmt.Sprintf("Unknown architecture %#x", uint(a)) - } -} - -// String returns a string representation of a comparison operator constant -func (a ScmpCompareOp) String() string { - switch a { - case CompareNotEqual: - return "Not equal" - case CompareLess: - return "Less than" - case CompareLessOrEqual: - return "Less than or equal to" - case CompareEqual: - return "Equal" - case CompareGreaterEqual: - return "Greater than or equal to" - case CompareGreater: - return "Greater than" - case CompareMaskedEqual: - return "Masked equality" - case CompareInvalid: - return "Invalid comparison operator" - default: - return fmt.Sprintf("Unrecognized comparison operator %#x", uint(a)) - } -} - -// String returns a string representation of a seccomp match action -func (a ScmpAction) String() string { - switch a & 0xFFFF { - case ActKill: - return "Action: Kill Process" - case ActTrap: - return "Action: Send SIGSYS" - case ActErrno: - return fmt.Sprintf("Action: Return error code %d", (a >> 16)) - case ActTrace: - return fmt.Sprintf("Action: Notify tracing processes with code %d", - (a >> 16)) - case ActLog: - return "Action: Log system call" - case ActAllow: - return "Action: Allow system call" - default: - return fmt.Sprintf("Unrecognized Action %#x", uint(a)) - } -} - -// SetReturnCode adds a return code to a supporting ScmpAction, clearing any -// existing code Only valid on ActErrno and ActTrace. Takes no action otherwise. -// Accepts 16-bit return code as argument. -// Returns a valid ScmpAction of the original type with the new error code set. -func (a ScmpAction) SetReturnCode(code int16) ScmpAction { - aTmp := a & 0x0000FFFF - if aTmp == ActErrno || aTmp == ActTrace { - return (aTmp | (ScmpAction(code)&0xFFFF)<<16) - } - return a -} - -// GetReturnCode returns the return code of an ScmpAction -func (a ScmpAction) GetReturnCode() int16 { - return int16(a >> 16) -} - -// General utility functions - -// GetLibraryVersion returns the version of the library the bindings are built -// against. -// The version is formatted as follows: Major.Minor.Micro -func GetLibraryVersion() (major, minor, micro uint) { - return verMajor, verMinor, verMicro -} - -// GetApi returns the API level supported by the system. -// Returns a positive int containing the API level, or 0 with an error if the -// API level could not be detected due to the library being older than v2.4.0. -// See the seccomp_api_get(3) man page for details on available API levels: -// https://github.com/seccomp/libseccomp/blob/master/doc/man/man3/seccomp_api_get.3 -func GetApi() (uint, error) { - return getApi() -} - -// SetApi forcibly sets the API level. General use of this function is strongly -// discouraged. -// Returns an error if the API level could not be set. An error is always -// returned if the library is older than v2.4.0 -// See the seccomp_api_get(3) man page for details on available API levels: -// https://github.com/seccomp/libseccomp/blob/master/doc/man/man3/seccomp_api_get.3 -func SetApi(api uint) error { - return setApi(api) -} - -// Syscall functions - -// GetName retrieves the name of a syscall from its number. -// Acts on any syscall number. -// Returns either a string containing the name of the syscall, or an error. -func (s ScmpSyscall) GetName() (string, error) { - return s.GetNameByArch(ArchNative) -} - -// GetNameByArch retrieves the name of a syscall from its number for a given -// architecture. -// Acts on any syscall number. -// Accepts a valid architecture constant. -// Returns either a string containing the name of the syscall, or an error. -// if the syscall is unrecognized or an issue occurred. -func (s ScmpSyscall) GetNameByArch(arch ScmpArch) (string, error) { - if err := sanitizeArch(arch); err != nil { - return "", err - } - - cString := C.seccomp_syscall_resolve_num_arch(arch.toNative(), C.int(s)) - if cString == nil { - return "", fmt.Errorf("could not resolve syscall name for %#x", int32(s)) - } - defer C.free(unsafe.Pointer(cString)) - - finalStr := C.GoString(cString) - return finalStr, nil -} - -// GetSyscallFromName returns the number of a syscall by name on the kernel's -// native architecture. -// Accepts a string containing the name of a syscall. -// Returns the number of the syscall, or an error if no syscall with that name -// was found. -func GetSyscallFromName(name string) (ScmpSyscall, error) { - if err := ensureSupportedVersion(); err != nil { - return 0, err - } - - cString := C.CString(name) - defer C.free(unsafe.Pointer(cString)) - - result := C.seccomp_syscall_resolve_name(cString) - if result == scmpError { - return 0, fmt.Errorf("could not resolve name to syscall: %q", name) - } - - return ScmpSyscall(result), nil -} - -// GetSyscallFromNameByArch returns the number of a syscall by name for a given -// architecture's ABI. -// Accepts the name of a syscall and an architecture constant. -// Returns the number of the syscall, or an error if an invalid architecture is -// passed or a syscall with that name was not found. -func GetSyscallFromNameByArch(name string, arch ScmpArch) (ScmpSyscall, error) { - if err := ensureSupportedVersion(); err != nil { - return 0, err - } - if err := sanitizeArch(arch); err != nil { - return 0, err - } - - cString := C.CString(name) - defer C.free(unsafe.Pointer(cString)) - - result := C.seccomp_syscall_resolve_name_arch(arch.toNative(), cString) - if result == scmpError { - return 0, fmt.Errorf("could not resolve name to syscall: %q on %v", name, arch) - } - - return ScmpSyscall(result), nil -} - -// MakeCondition creates and returns a new condition to attach to a filter rule. -// Associated rules will only match if this condition is true. -// Accepts the number the argument we are checking, and a comparison operator -// and value to compare to. -// The rule will match if argument $arg (zero-indexed) of the syscall is -// $COMPARE_OP the provided comparison value. -// Some comparison operators accept two values. Masked equals, for example, -// will mask $arg of the syscall with the second value provided (via bitwise -// AND) and then compare against the first value provided. -// For example, in the less than or equal case, if the syscall argument was -// 0 and the value provided was 1, the condition would match, as 0 is less -// than or equal to 1. -// Return either an error on bad argument or a valid ScmpCondition struct. -func MakeCondition(arg uint, comparison ScmpCompareOp, values ...uint64) (ScmpCondition, error) { - var condStruct ScmpCondition - - if err := ensureSupportedVersion(); err != nil { - return condStruct, err - } - - if comparison == CompareInvalid { - return condStruct, fmt.Errorf("invalid comparison operator") - } else if arg > 5 { - return condStruct, fmt.Errorf("syscalls only have up to 6 arguments (%d given)", arg) - } else if len(values) > 2 { - return condStruct, fmt.Errorf("conditions can have at most 2 arguments (%d given)", len(values)) - } else if len(values) == 0 { - return condStruct, fmt.Errorf("must provide at least one value to compare against") - } - - condStruct.Argument = arg - condStruct.Op = comparison - condStruct.Operand1 = values[0] - if len(values) == 2 { - condStruct.Operand2 = values[1] - } else { - condStruct.Operand2 = 0 // Unused - } - - return condStruct, nil -} - -// Utility Functions - -// GetNativeArch returns architecture token representing the native kernel -// architecture -func GetNativeArch() (ScmpArch, error) { - if err := ensureSupportedVersion(); err != nil { - return ArchInvalid, err - } - - arch := C.seccomp_arch_native() - - return archFromNative(arch) -} - -// Public Filter API - -// ScmpFilter represents a filter context in libseccomp. -// A filter context is initially empty. Rules can be added to it, and it can -// then be loaded into the kernel. -type ScmpFilter struct { - filterCtx C.scmp_filter_ctx - valid bool - lock sync.Mutex -} - -// NewFilter creates and returns a new filter context. -// Accepts a default action to be taken for syscalls which match no rules in -// the filter. -// Returns a reference to a valid filter context, or nil and an error if the -// filter context could not be created or an invalid default action was given. -func NewFilter(defaultAction ScmpAction) (*ScmpFilter, error) { - if err := ensureSupportedVersion(); err != nil { - return nil, err - } - - if err := sanitizeAction(defaultAction); err != nil { - return nil, err - } - - fPtr := C.seccomp_init(defaultAction.toNative()) - if fPtr == nil { - return nil, fmt.Errorf("could not create filter") - } - - filter := new(ScmpFilter) - filter.filterCtx = fPtr - filter.valid = true - runtime.SetFinalizer(filter, filterFinalizer) - - // Enable TSync so all goroutines will receive the same rules - // If the kernel does not support TSYNC, allow us to continue without error - if err := filter.setFilterAttr(filterAttrTsync, 0x1); err != nil && err != syscall.ENOTSUP { - filter.Release() - return nil, fmt.Errorf("could not create filter - error setting tsync bit: %v", err) - } - - return filter, nil -} - -// IsValid determines whether a filter context is valid to use. -// Some operations (Release and Merge) render filter contexts invalid and -// consequently prevent further use. -func (f *ScmpFilter) IsValid() bool { - f.lock.Lock() - defer f.lock.Unlock() - - return f.valid -} - -// Reset resets a filter context, removing all its existing state. -// Accepts a new default action to be taken for syscalls which do not match. -// Returns an error if the filter or action provided are invalid. -func (f *ScmpFilter) Reset(defaultAction ScmpAction) error { - f.lock.Lock() - defer f.lock.Unlock() - - if err := sanitizeAction(defaultAction); err != nil { - return err - } else if !f.valid { - return errBadFilter - } - - retCode := C.seccomp_reset(f.filterCtx, defaultAction.toNative()) - if retCode != 0 { - return syscall.Errno(-1 * retCode) - } - - return nil -} - -// Release releases a filter context, freeing its memory. Should be called after -// loading into the kernel, when the filter is no longer needed. -// After calling this function, the given filter is no longer valid and cannot -// be used. -// Release() will be invoked automatically when a filter context is garbage -// collected, but can also be called manually to free memory. -func (f *ScmpFilter) Release() { - f.lock.Lock() - defer f.lock.Unlock() - - if !f.valid { - return - } - - f.valid = false - C.seccomp_release(f.filterCtx) -} - -// Merge merges two filter contexts. -// The source filter src will be released as part of the process, and will no -// longer be usable or valid after this call. -// To be merged, filters must NOT share any architectures, and all their -// attributes (Default Action, Bad Arch Action, and No New Privs bools) -// must match. -// The filter src will be merged into the filter this is called on. -// The architectures of the src filter not present in the destination, and all -// associated rules, will be added to the destination. -// Returns an error if merging the filters failed. -func (f *ScmpFilter) Merge(src *ScmpFilter) error { - f.lock.Lock() - defer f.lock.Unlock() - - src.lock.Lock() - defer src.lock.Unlock() - - if !src.valid || !f.valid { - return fmt.Errorf("one or more of the filter contexts is invalid or uninitialized") - } - - // Merge the filters - retCode := C.seccomp_merge(f.filterCtx, src.filterCtx) - if syscall.Errno(-1*retCode) == syscall.EINVAL { - return fmt.Errorf("filters could not be merged due to a mismatch in attributes or invalid filter") - } else if retCode != 0 { - return syscall.Errno(-1 * retCode) - } - - src.valid = false - - return nil -} - -// IsArchPresent checks if an architecture is present in a filter. -// If a filter contains an architecture, it uses its default action for -// syscalls which do not match rules in it, and its rules can match syscalls -// for that ABI. -// If a filter does not contain an architecture, all syscalls made to that -// kernel ABI will fail with the filter's default Bad Architecture Action -// (by default, killing the process). -// Accepts an architecture constant. -// Returns true if the architecture is present in the filter, false otherwise, -// and an error on an invalid filter context, architecture constant, or an -// issue with the call to libseccomp. -func (f *ScmpFilter) IsArchPresent(arch ScmpArch) (bool, error) { - f.lock.Lock() - defer f.lock.Unlock() - - if err := sanitizeArch(arch); err != nil { - return false, err - } else if !f.valid { - return false, errBadFilter - } - - retCode := C.seccomp_arch_exist(f.filterCtx, arch.toNative()) - if syscall.Errno(-1*retCode) == syscall.EEXIST { - // -EEXIST is "arch not present" - return false, nil - } else if retCode != 0 { - return false, syscall.Errno(-1 * retCode) - } - - return true, nil -} - -// AddArch adds an architecture to the filter. -// Accepts an architecture constant. -// Returns an error on invalid filter context or architecture token, or an -// issue with the call to libseccomp. -func (f *ScmpFilter) AddArch(arch ScmpArch) error { - f.lock.Lock() - defer f.lock.Unlock() - - if err := sanitizeArch(arch); err != nil { - return err - } else if !f.valid { - return errBadFilter - } - - // Libseccomp returns -EEXIST if the specified architecture is already - // present. Succeed silently in this case, as it's not fatal, and the - // architecture is present already. - retCode := C.seccomp_arch_add(f.filterCtx, arch.toNative()) - if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST { - return syscall.Errno(-1 * retCode) - } - - return nil -} - -// RemoveArch removes an architecture from the filter. -// Accepts an architecture constant. -// Returns an error on invalid filter context or architecture token, or an -// issue with the call to libseccomp. -func (f *ScmpFilter) RemoveArch(arch ScmpArch) error { - f.lock.Lock() - defer f.lock.Unlock() - - if err := sanitizeArch(arch); err != nil { - return err - } else if !f.valid { - return errBadFilter - } - - // Similar to AddArch, -EEXIST is returned if the arch is not present - // Succeed silently in that case, this is not fatal and the architecture - // is not present in the filter after RemoveArch - retCode := C.seccomp_arch_remove(f.filterCtx, arch.toNative()) - if retCode != 0 && syscall.Errno(-1*retCode) != syscall.EEXIST { - return syscall.Errno(-1 * retCode) - } - - return nil -} - -// Load loads a filter context into the kernel. -// Returns an error if the filter context is invalid or the syscall failed. -func (f *ScmpFilter) Load() error { - f.lock.Lock() - defer f.lock.Unlock() - - if !f.valid { - return errBadFilter - } - - if retCode := C.seccomp_load(f.filterCtx); retCode != 0 { - return syscall.Errno(-1 * retCode) - } - - return nil -} - -// GetDefaultAction returns the default action taken on a syscall which does not -// match a rule in the filter, or an error if an issue was encountered -// retrieving the value. -func (f *ScmpFilter) GetDefaultAction() (ScmpAction, error) { - action, err := f.getFilterAttr(filterAttrActDefault) - if err != nil { - return 0x0, err - } - - return actionFromNative(action) -} - -// GetBadArchAction returns the default action taken on a syscall for an -// architecture not in the filter, or an error if an issue was encountered -// retrieving the value. -func (f *ScmpFilter) GetBadArchAction() (ScmpAction, error) { - action, err := f.getFilterAttr(filterAttrActBadArch) - if err != nil { - return 0x0, err - } - - return actionFromNative(action) -} - -// GetNoNewPrivsBit returns the current state the No New Privileges bit will be set -// to on the filter being loaded, or an error if an issue was encountered -// retrieving the value. -// The No New Privileges bit tells the kernel that new processes run with exec() -// cannot gain more privileges than the process that ran exec(). -// For example, a process with No New Privileges set would be unable to exec -// setuid/setgid executables. -func (f *ScmpFilter) GetNoNewPrivsBit() (bool, error) { - noNewPrivs, err := f.getFilterAttr(filterAttrNNP) - if err != nil { - return false, err - } - - if noNewPrivs == 0 { - return false, nil - } - - return true, nil -} - -// GetLogBit returns the current state the Log bit will be set to on the filter -// being loaded, or an error if an issue was encountered retrieving the value. -// The Log bit tells the kernel that all actions taken by the filter, with the -// exception of ActAllow, should be logged. -// The Log bit is only usable when libseccomp API level 3 or higher is -// supported. -func (f *ScmpFilter) GetLogBit() (bool, error) { - log, err := f.getFilterAttr(filterAttrLog) - if err != nil { - api, apiErr := getApi() - if (apiErr != nil && api == 0) || (apiErr == nil && api < 3) { - return false, fmt.Errorf("getting the log bit is only supported in libseccomp 2.4.0 and newer with API level 3 or higher") - } - - return false, err - } - - if log == 0 { - return false, nil - } - - return true, nil -} - -// SetBadArchAction sets the default action taken on a syscall for an -// architecture not in the filter, or an error if an issue was encountered -// setting the value. -func (f *ScmpFilter) SetBadArchAction(action ScmpAction) error { - if err := sanitizeAction(action); err != nil { - return err - } - - return f.setFilterAttr(filterAttrActBadArch, action.toNative()) -} - -// SetNoNewPrivsBit sets the state of the No New Privileges bit, which will be -// applied on filter load, or an error if an issue was encountered setting the -// value. -// Filters with No New Privileges set to 0 can only be loaded if the process -// has the CAP_SYS_ADMIN capability. -func (f *ScmpFilter) SetNoNewPrivsBit(state bool) error { - var toSet C.uint32_t = 0x0 - - if state { - toSet = 0x1 - } - - return f.setFilterAttr(filterAttrNNP, toSet) -} - -// SetLogBit sets the state of the Log bit, which will be applied on filter -// load, or an error if an issue was encountered setting the value. -// The Log bit is only usable when libseccomp API level 3 or higher is -// supported. -func (f *ScmpFilter) SetLogBit(state bool) error { - var toSet C.uint32_t = 0x0 - - if state { - toSet = 0x1 - } - - err := f.setFilterAttr(filterAttrLog, toSet) - if err != nil { - api, apiErr := getApi() - if (apiErr != nil && api == 0) || (apiErr == nil && api < 3) { - return fmt.Errorf("setting the log bit is only supported in libseccomp 2.4.0 and newer with API level 3 or higher") - } - } - - return err -} - -// SetSyscallPriority sets a syscall's priority. -// This provides a hint to the filter generator in libseccomp about the -// importance of this syscall. High-priority syscalls are placed -// first in the filter code, and incur less overhead (at the expense of -// lower-priority syscalls). -func (f *ScmpFilter) SetSyscallPriority(call ScmpSyscall, priority uint8) error { - f.lock.Lock() - defer f.lock.Unlock() - - if !f.valid { - return errBadFilter - } - - if retCode := C.seccomp_syscall_priority(f.filterCtx, C.int(call), - C.uint8_t(priority)); retCode != 0 { - return syscall.Errno(-1 * retCode) - } - - return nil -} - -// AddRule adds a single rule for an unconditional action on a syscall. -// Accepts the number of the syscall and the action to be taken on the call -// being made. -// Returns an error if an issue was encountered adding the rule. -func (f *ScmpFilter) AddRule(call ScmpSyscall, action ScmpAction) error { - return f.addRuleGeneric(call, action, false, nil) -} - -// AddRuleExact adds a single rule for an unconditional action on a syscall. -// Accepts the number of the syscall and the action to be taken on the call -// being made. -// No modifications will be made to the rule, and it will fail to add if it -// cannot be applied to the current architecture without modification. -// The rule will function exactly as described, but it may not function identically -// (or be able to be applied to) all architectures. -// Returns an error if an issue was encountered adding the rule. -func (f *ScmpFilter) AddRuleExact(call ScmpSyscall, action ScmpAction) error { - return f.addRuleGeneric(call, action, true, nil) -} - -// AddRuleConditional adds a single rule for a conditional action on a syscall. -// Returns an error if an issue was encountered adding the rule. -// All conditions must match for the rule to match. -// There is a bug in library versions below v2.2.1 which can, in some cases, -// cause conditions to be lost when more than one are used. Consequently, -// AddRuleConditional is disabled on library versions lower than v2.2.1 -func (f *ScmpFilter) AddRuleConditional(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error { - return f.addRuleGeneric(call, action, false, conds) -} - -// AddRuleConditionalExact adds a single rule for a conditional action on a -// syscall. -// No modifications will be made to the rule, and it will fail to add if it -// cannot be applied to the current architecture without modification. -// The rule will function exactly as described, but it may not function identically -// (or be able to be applied to) all architectures. -// Returns an error if an issue was encountered adding the rule. -// There is a bug in library versions below v2.2.1 which can, in some cases, -// cause conditions to be lost when more than one are used. Consequently, -// AddRuleConditionalExact is disabled on library versions lower than v2.2.1 -func (f *ScmpFilter) AddRuleConditionalExact(call ScmpSyscall, action ScmpAction, conds []ScmpCondition) error { - return f.addRuleGeneric(call, action, true, conds) -} - -// ExportPFC output PFC-formatted, human-readable dump of a filter context's -// rules to a file. -// Accepts file to write to (must be open for writing). -// Returns an error if writing to the file fails. -func (f *ScmpFilter) ExportPFC(file *os.File) error { - f.lock.Lock() - defer f.lock.Unlock() - - fd := file.Fd() - - if !f.valid { - return errBadFilter - } - - if retCode := C.seccomp_export_pfc(f.filterCtx, C.int(fd)); retCode != 0 { - return syscall.Errno(-1 * retCode) - } - - return nil -} - -// ExportBPF outputs Berkeley Packet Filter-formatted, kernel-readable dump of a -// filter context's rules to a file. -// Accepts file to write to (must be open for writing). -// Returns an error if writing to the file fails. -func (f *ScmpFilter) ExportBPF(file *os.File) error { - f.lock.Lock() - defer f.lock.Unlock() - - fd := file.Fd() - - if !f.valid { - return errBadFilter - } - - if retCode := C.seccomp_export_bpf(f.filterCtx, C.int(fd)); retCode != 0 { - return syscall.Errno(-1 * retCode) - } - - return nil -} diff --git a/src/runtime/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go b/src/runtime/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go deleted file mode 100644 index 4e36b27ae8..0000000000 --- a/src/runtime/vendor/github.com/seccomp/libseccomp-golang/seccomp_internal.go +++ /dev/null @@ -1,571 +0,0 @@ -// +build linux - -// Internal functions for libseccomp Go bindings -// No exported functions - -package seccomp - -import ( - "fmt" - "syscall" -) - -// Unexported C wrapping code - provides the C-Golang interface -// Get the seccomp header in scope -// Need stdlib.h for free() on cstrings - -// #cgo pkg-config: libseccomp -/* -#include -#include -#include - -#if SCMP_VER_MAJOR < 2 -#error Minimum supported version of Libseccomp is v2.2.0 -#elif SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 2 -#error Minimum supported version of Libseccomp is v2.2.0 -#endif - -#define ARCH_BAD ~0 - -const uint32_t C_ARCH_BAD = ARCH_BAD; - -#ifndef SCMP_ARCH_PPC -#define SCMP_ARCH_PPC ARCH_BAD -#endif - -#ifndef SCMP_ARCH_PPC64 -#define SCMP_ARCH_PPC64 ARCH_BAD -#endif - -#ifndef SCMP_ARCH_PPC64LE -#define SCMP_ARCH_PPC64LE ARCH_BAD -#endif - -#ifndef SCMP_ARCH_S390 -#define SCMP_ARCH_S390 ARCH_BAD -#endif - -#ifndef SCMP_ARCH_S390X -#define SCMP_ARCH_S390X ARCH_BAD -#endif - -const uint32_t C_ARCH_NATIVE = SCMP_ARCH_NATIVE; -const uint32_t C_ARCH_X86 = SCMP_ARCH_X86; -const uint32_t C_ARCH_X86_64 = SCMP_ARCH_X86_64; -const uint32_t C_ARCH_X32 = SCMP_ARCH_X32; -const uint32_t C_ARCH_ARM = SCMP_ARCH_ARM; -const uint32_t C_ARCH_AARCH64 = SCMP_ARCH_AARCH64; -const uint32_t C_ARCH_MIPS = SCMP_ARCH_MIPS; -const uint32_t C_ARCH_MIPS64 = SCMP_ARCH_MIPS64; -const uint32_t C_ARCH_MIPS64N32 = SCMP_ARCH_MIPS64N32; -const uint32_t C_ARCH_MIPSEL = SCMP_ARCH_MIPSEL; -const uint32_t C_ARCH_MIPSEL64 = SCMP_ARCH_MIPSEL64; -const uint32_t C_ARCH_MIPSEL64N32 = SCMP_ARCH_MIPSEL64N32; -const uint32_t C_ARCH_PPC = SCMP_ARCH_PPC; -const uint32_t C_ARCH_PPC64 = SCMP_ARCH_PPC64; -const uint32_t C_ARCH_PPC64LE = SCMP_ARCH_PPC64LE; -const uint32_t C_ARCH_S390 = SCMP_ARCH_S390; -const uint32_t C_ARCH_S390X = SCMP_ARCH_S390X; - -#ifndef SCMP_ACT_LOG -#define SCMP_ACT_LOG 0x7ffc0000U -#endif - -const uint32_t C_ACT_KILL = SCMP_ACT_KILL; -const uint32_t C_ACT_TRAP = SCMP_ACT_TRAP; -const uint32_t C_ACT_ERRNO = SCMP_ACT_ERRNO(0); -const uint32_t C_ACT_TRACE = SCMP_ACT_TRACE(0); -const uint32_t C_ACT_LOG = SCMP_ACT_LOG; -const uint32_t C_ACT_ALLOW = SCMP_ACT_ALLOW; - -// The libseccomp SCMP_FLTATR_CTL_LOG member of the scmp_filter_attr enum was -// added in v2.4.0 -#if (SCMP_VER_MAJOR < 2) || \ - (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 4) -#define SCMP_FLTATR_CTL_LOG _SCMP_FLTATR_MIN -#endif - -const uint32_t C_ATTRIBUTE_DEFAULT = (uint32_t)SCMP_FLTATR_ACT_DEFAULT; -const uint32_t C_ATTRIBUTE_BADARCH = (uint32_t)SCMP_FLTATR_ACT_BADARCH; -const uint32_t C_ATTRIBUTE_NNP = (uint32_t)SCMP_FLTATR_CTL_NNP; -const uint32_t C_ATTRIBUTE_TSYNC = (uint32_t)SCMP_FLTATR_CTL_TSYNC; -const uint32_t C_ATTRIBUTE_LOG = (uint32_t)SCMP_FLTATR_CTL_LOG; - -const int C_CMP_NE = (int)SCMP_CMP_NE; -const int C_CMP_LT = (int)SCMP_CMP_LT; -const int C_CMP_LE = (int)SCMP_CMP_LE; -const int C_CMP_EQ = (int)SCMP_CMP_EQ; -const int C_CMP_GE = (int)SCMP_CMP_GE; -const int C_CMP_GT = (int)SCMP_CMP_GT; -const int C_CMP_MASKED_EQ = (int)SCMP_CMP_MASKED_EQ; - -const int C_VERSION_MAJOR = SCMP_VER_MAJOR; -const int C_VERSION_MINOR = SCMP_VER_MINOR; -const int C_VERSION_MICRO = SCMP_VER_MICRO; - -#if SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR >= 3 -unsigned int get_major_version() -{ - return seccomp_version()->major; -} - -unsigned int get_minor_version() -{ - return seccomp_version()->minor; -} - -unsigned int get_micro_version() -{ - return seccomp_version()->micro; -} -#else -unsigned int get_major_version() -{ - return (unsigned int)C_VERSION_MAJOR; -} - -unsigned int get_minor_version() -{ - return (unsigned int)C_VERSION_MINOR; -} - -unsigned int get_micro_version() -{ - return (unsigned int)C_VERSION_MICRO; -} -#endif - -// The libseccomp API level functions were added in v2.4.0 -#if (SCMP_VER_MAJOR < 2) || \ - (SCMP_VER_MAJOR == 2 && SCMP_VER_MINOR < 4) -const unsigned int seccomp_api_get(void) -{ - // libseccomp-golang requires libseccomp v2.2.0, at a minimum, which - // supported API level 2. However, the kernel may not support API level - // 2 constructs which are the seccomp() system call and the TSYNC - // filter flag. Return the "reserved" value of 0 here to indicate that - // proper API level support is not available in libseccomp. - return 0; -} - -int seccomp_api_set(unsigned int level) -{ - return -EOPNOTSUPP; -} -#endif - -typedef struct scmp_arg_cmp* scmp_cast_t; - -void* make_arg_cmp_array(unsigned int length) -{ - return calloc(length, sizeof(struct scmp_arg_cmp)); -} - -// Wrapper to add an scmp_arg_cmp struct to an existing arg_cmp array -void add_struct_arg_cmp( - struct scmp_arg_cmp* arr, - unsigned int pos, - unsigned int arg, - int compare, - uint64_t a, - uint64_t b - ) -{ - arr[pos].arg = arg; - arr[pos].op = compare; - arr[pos].datum_a = a; - arr[pos].datum_b = b; - - return; -} -*/ -import "C" - -// Nonexported types -type scmpFilterAttr uint32 - -// Nonexported constants - -const ( - filterAttrActDefault scmpFilterAttr = iota - filterAttrActBadArch scmpFilterAttr = iota - filterAttrNNP scmpFilterAttr = iota - filterAttrTsync scmpFilterAttr = iota - filterAttrLog scmpFilterAttr = iota -) - -const ( - // An error return from certain libseccomp functions - scmpError C.int = -1 - // Comparison boundaries to check for architecture validity - archStart ScmpArch = ArchNative - archEnd ScmpArch = ArchS390X - // Comparison boundaries to check for action validity - actionStart ScmpAction = ActKill - actionEnd ScmpAction = ActLog - // Comparison boundaries to check for comparison operator validity - compareOpStart ScmpCompareOp = CompareNotEqual - compareOpEnd ScmpCompareOp = CompareMaskedEqual -) - -var ( - // Error thrown on bad filter context - errBadFilter = fmt.Errorf("filter is invalid or uninitialized") - // Constants representing library major, minor, and micro versions - verMajor = uint(C.get_major_version()) - verMinor = uint(C.get_minor_version()) - verMicro = uint(C.get_micro_version()) -) - -// Nonexported functions - -// Check if library version is greater than or equal to the given one -func checkVersionAbove(major, minor, micro uint) bool { - return (verMajor > major) || - (verMajor == major && verMinor > minor) || - (verMajor == major && verMinor == minor && verMicro >= micro) -} - -// Ensure that the library is supported, i.e. >= 2.2.0. -func ensureSupportedVersion() error { - if !checkVersionAbove(2, 2, 0) { - return VersionError{} - } - return nil -} - -// Get the API level -func getApi() (uint, error) { - api := C.seccomp_api_get() - if api == 0 { - return 0, fmt.Errorf("API level operations are not supported") - } - - return uint(api), nil -} - -// Set the API level -func setApi(api uint) error { - if retCode := C.seccomp_api_set(C.uint(api)); retCode != 0 { - if syscall.Errno(-1*retCode) == syscall.EOPNOTSUPP { - return fmt.Errorf("API level operations are not supported") - } - - return fmt.Errorf("could not set API level: %v", retCode) - } - - return nil -} - -// Filter helpers - -// Filter finalizer - ensure that kernel context for filters is freed -func filterFinalizer(f *ScmpFilter) { - f.Release() -} - -// Get a raw filter attribute -func (f *ScmpFilter) getFilterAttr(attr scmpFilterAttr) (C.uint32_t, error) { - f.lock.Lock() - defer f.lock.Unlock() - - if !f.valid { - return 0x0, errBadFilter - } - - var attribute C.uint32_t - - retCode := C.seccomp_attr_get(f.filterCtx, attr.toNative(), &attribute) - if retCode != 0 { - return 0x0, syscall.Errno(-1 * retCode) - } - - return attribute, nil -} - -// Set a raw filter attribute -func (f *ScmpFilter) setFilterAttr(attr scmpFilterAttr, value C.uint32_t) error { - f.lock.Lock() - defer f.lock.Unlock() - - if !f.valid { - return errBadFilter - } - - retCode := C.seccomp_attr_set(f.filterCtx, attr.toNative(), value) - if retCode != 0 { - return syscall.Errno(-1 * retCode) - } - - return nil -} - -// DOES NOT LOCK OR CHECK VALIDITY -// Assumes caller has already done this -// Wrapper for seccomp_rule_add_... functions -func (f *ScmpFilter) addRuleWrapper(call ScmpSyscall, action ScmpAction, exact bool, length C.uint, cond C.scmp_cast_t) error { - if length != 0 && cond == nil { - return fmt.Errorf("null conditions list, but length is nonzero") - } - - var retCode C.int - if exact { - retCode = C.seccomp_rule_add_exact_array(f.filterCtx, action.toNative(), C.int(call), length, cond) - } else { - retCode = C.seccomp_rule_add_array(f.filterCtx, action.toNative(), C.int(call), length, cond) - } - - if syscall.Errno(-1*retCode) == syscall.EFAULT { - return fmt.Errorf("unrecognized syscall %#x", int32(call)) - } else if syscall.Errno(-1*retCode) == syscall.EPERM { - return fmt.Errorf("requested action matches default action of filter") - } else if syscall.Errno(-1*retCode) == syscall.EINVAL { - return fmt.Errorf("two checks on same syscall argument") - } else if retCode != 0 { - return syscall.Errno(-1 * retCode) - } - - return nil -} - -// Generic add function for filter rules -func (f *ScmpFilter) addRuleGeneric(call ScmpSyscall, action ScmpAction, exact bool, conds []ScmpCondition) error { - f.lock.Lock() - defer f.lock.Unlock() - - if !f.valid { - return errBadFilter - } - - if len(conds) == 0 { - if err := f.addRuleWrapper(call, action, exact, 0, nil); err != nil { - return err - } - } else { - // We don't support conditional filtering in library version v2.1 - if !checkVersionAbove(2, 2, 1) { - return VersionError{ - message: "conditional filtering is not supported", - minimum: "2.2.1", - } - } - - argsArr := C.make_arg_cmp_array(C.uint(len(conds))) - if argsArr == nil { - return fmt.Errorf("error allocating memory for conditions") - } - defer C.free(argsArr) - - for i, cond := range conds { - C.add_struct_arg_cmp(C.scmp_cast_t(argsArr), C.uint(i), - C.uint(cond.Argument), cond.Op.toNative(), - C.uint64_t(cond.Operand1), C.uint64_t(cond.Operand2)) - } - - if err := f.addRuleWrapper(call, action, exact, C.uint(len(conds)), C.scmp_cast_t(argsArr)); err != nil { - return err - } - } - - return nil -} - -// Generic Helpers - -// Helper - Sanitize Arch token input -func sanitizeArch(in ScmpArch) error { - if in < archStart || in > archEnd { - return fmt.Errorf("unrecognized architecture %#x", uint(in)) - } - - if in.toNative() == C.C_ARCH_BAD { - return fmt.Errorf("architecture %v is not supported on this version of the library", in) - } - - return nil -} - -func sanitizeAction(in ScmpAction) error { - inTmp := in & 0x0000FFFF - if inTmp < actionStart || inTmp > actionEnd { - return fmt.Errorf("unrecognized action %#x", uint(inTmp)) - } - - if inTmp != ActTrace && inTmp != ActErrno && (in&0xFFFF0000) != 0 { - return fmt.Errorf("highest 16 bits must be zeroed except for Trace and Errno") - } - - return nil -} - -func sanitizeCompareOp(in ScmpCompareOp) error { - if in < compareOpStart || in > compareOpEnd { - return fmt.Errorf("unrecognized comparison operator %#x", uint(in)) - } - - return nil -} - -func archFromNative(a C.uint32_t) (ScmpArch, error) { - switch a { - case C.C_ARCH_X86: - return ArchX86, nil - case C.C_ARCH_X86_64: - return ArchAMD64, nil - case C.C_ARCH_X32: - return ArchX32, nil - case C.C_ARCH_ARM: - return ArchARM, nil - case C.C_ARCH_NATIVE: - return ArchNative, nil - case C.C_ARCH_AARCH64: - return ArchARM64, nil - case C.C_ARCH_MIPS: - return ArchMIPS, nil - case C.C_ARCH_MIPS64: - return ArchMIPS64, nil - case C.C_ARCH_MIPS64N32: - return ArchMIPS64N32, nil - case C.C_ARCH_MIPSEL: - return ArchMIPSEL, nil - case C.C_ARCH_MIPSEL64: - return ArchMIPSEL64, nil - case C.C_ARCH_MIPSEL64N32: - return ArchMIPSEL64N32, nil - case C.C_ARCH_PPC: - return ArchPPC, nil - case C.C_ARCH_PPC64: - return ArchPPC64, nil - case C.C_ARCH_PPC64LE: - return ArchPPC64LE, nil - case C.C_ARCH_S390: - return ArchS390, nil - case C.C_ARCH_S390X: - return ArchS390X, nil - default: - return 0x0, fmt.Errorf("unrecognized architecture %#x", uint32(a)) - } -} - -// Only use with sanitized arches, no error handling -func (a ScmpArch) toNative() C.uint32_t { - switch a { - case ArchX86: - return C.C_ARCH_X86 - case ArchAMD64: - return C.C_ARCH_X86_64 - case ArchX32: - return C.C_ARCH_X32 - case ArchARM: - return C.C_ARCH_ARM - case ArchARM64: - return C.C_ARCH_AARCH64 - case ArchMIPS: - return C.C_ARCH_MIPS - case ArchMIPS64: - return C.C_ARCH_MIPS64 - case ArchMIPS64N32: - return C.C_ARCH_MIPS64N32 - case ArchMIPSEL: - return C.C_ARCH_MIPSEL - case ArchMIPSEL64: - return C.C_ARCH_MIPSEL64 - case ArchMIPSEL64N32: - return C.C_ARCH_MIPSEL64N32 - case ArchPPC: - return C.C_ARCH_PPC - case ArchPPC64: - return C.C_ARCH_PPC64 - case ArchPPC64LE: - return C.C_ARCH_PPC64LE - case ArchS390: - return C.C_ARCH_S390 - case ArchS390X: - return C.C_ARCH_S390X - case ArchNative: - return C.C_ARCH_NATIVE - default: - return 0x0 - } -} - -// Only use with sanitized ops, no error handling -func (a ScmpCompareOp) toNative() C.int { - switch a { - case CompareNotEqual: - return C.C_CMP_NE - case CompareLess: - return C.C_CMP_LT - case CompareLessOrEqual: - return C.C_CMP_LE - case CompareEqual: - return C.C_CMP_EQ - case CompareGreaterEqual: - return C.C_CMP_GE - case CompareGreater: - return C.C_CMP_GT - case CompareMaskedEqual: - return C.C_CMP_MASKED_EQ - default: - return 0x0 - } -} - -func actionFromNative(a C.uint32_t) (ScmpAction, error) { - aTmp := a & 0xFFFF - switch a & 0xFFFF0000 { - case C.C_ACT_KILL: - return ActKill, nil - case C.C_ACT_TRAP: - return ActTrap, nil - case C.C_ACT_ERRNO: - return ActErrno.SetReturnCode(int16(aTmp)), nil - case C.C_ACT_TRACE: - return ActTrace.SetReturnCode(int16(aTmp)), nil - case C.C_ACT_LOG: - return ActLog, nil - case C.C_ACT_ALLOW: - return ActAllow, nil - default: - return 0x0, fmt.Errorf("unrecognized action %#x", uint32(a)) - } -} - -// Only use with sanitized actions, no error handling -func (a ScmpAction) toNative() C.uint32_t { - switch a & 0xFFFF { - case ActKill: - return C.C_ACT_KILL - case ActTrap: - return C.C_ACT_TRAP - case ActErrno: - return C.C_ACT_ERRNO | (C.uint32_t(a) >> 16) - case ActTrace: - return C.C_ACT_TRACE | (C.uint32_t(a) >> 16) - case ActLog: - return C.C_ACT_LOG - case ActAllow: - return C.C_ACT_ALLOW - default: - return 0x0 - } -} - -// Internal only, assumes safe attribute -func (a scmpFilterAttr) toNative() uint32 { - switch a { - case filterAttrActDefault: - return uint32(C.C_ATTRIBUTE_DEFAULT) - case filterAttrActBadArch: - return uint32(C.C_ATTRIBUTE_BADARCH) - case filterAttrNNP: - return uint32(C.C_ATTRIBUTE_NNP) - case filterAttrTsync: - return uint32(C.C_ATTRIBUTE_TSYNC) - case filterAttrLog: - return uint32(C.C_ATTRIBUTE_LOG) - default: - return 0x0 - } -} diff --git a/src/runtime/vendor/github.com/syndtr/gocapability/LICENSE b/src/runtime/vendor/github.com/syndtr/gocapability/LICENSE deleted file mode 100644 index 80dd96de77..0000000000 --- a/src/runtime/vendor/github.com/syndtr/gocapability/LICENSE +++ /dev/null @@ -1,24 +0,0 @@ -Copyright 2013 Suryandaru Triandana -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - - * Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/runtime/vendor/github.com/syndtr/gocapability/capability/capability.go b/src/runtime/vendor/github.com/syndtr/gocapability/capability/capability.go deleted file mode 100644 index 61a90775e5..0000000000 --- a/src/runtime/vendor/github.com/syndtr/gocapability/capability/capability.go +++ /dev/null @@ -1,133 +0,0 @@ -// Copyright (c) 2013, Suryandaru Triandana -// All rights reserved. -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// Package capability provides utilities for manipulating POSIX capabilities. -package capability - -type Capabilities interface { - // Get check whether a capability present in the given - // capabilities set. The 'which' value should be one of EFFECTIVE, - // PERMITTED, INHERITABLE, BOUNDING or AMBIENT. - Get(which CapType, what Cap) bool - - // Empty check whether all capability bits of the given capabilities - // set are zero. The 'which' value should be one of EFFECTIVE, - // PERMITTED, INHERITABLE, BOUNDING or AMBIENT. - Empty(which CapType) bool - - // Full check whether all capability bits of the given capabilities - // set are one. The 'which' value should be one of EFFECTIVE, - // PERMITTED, INHERITABLE, BOUNDING or AMBIENT. - Full(which CapType) bool - - // Set sets capabilities of the given capabilities sets. The - // 'which' value should be one or combination (OR'ed) of EFFECTIVE, - // PERMITTED, INHERITABLE, BOUNDING or AMBIENT. - Set(which CapType, caps ...Cap) - - // Unset unsets capabilities of the given capabilities sets. The - // 'which' value should be one or combination (OR'ed) of EFFECTIVE, - // PERMITTED, INHERITABLE, BOUNDING or AMBIENT. - Unset(which CapType, caps ...Cap) - - // Fill sets all bits of the given capabilities kind to one. The - // 'kind' value should be one or combination (OR'ed) of CAPS, - // BOUNDS or AMBS. - Fill(kind CapType) - - // Clear sets all bits of the given capabilities kind to zero. The - // 'kind' value should be one or combination (OR'ed) of CAPS, - // BOUNDS or AMBS. - Clear(kind CapType) - - // String return current capabilities state of the given capabilities - // set as string. The 'which' value should be one of EFFECTIVE, - // PERMITTED, INHERITABLE BOUNDING or AMBIENT - StringCap(which CapType) string - - // String return current capabilities state as string. - String() string - - // Load load actual capabilities value. This will overwrite all - // outstanding changes. - Load() error - - // Apply apply the capabilities settings, so all changes will take - // effect. - Apply(kind CapType) error -} - -// NewPid initializes a new Capabilities object for given pid when -// it is nonzero, or for the current process if pid is 0. -// -// Deprecated: Replace with NewPid2. For example, replace: -// -// c, err := NewPid(0) -// if err != nil { -// return err -// } -// -// with: -// -// c, err := NewPid2(0) -// if err != nil { -// return err -// } -// err = c.Load() -// if err != nil { -// return err -// } -func NewPid(pid int) (Capabilities, error) { - c, err := newPid(pid) - if err != nil { - return c, err - } - err = c.Load() - return c, err -} - -// NewPid2 initializes a new Capabilities object for given pid when -// it is nonzero, or for the current process if pid is 0. This -// does not load the process's current capabilities; to do that you -// must call Load explicitly. -func NewPid2(pid int) (Capabilities, error) { - return newPid(pid) -} - -// NewFile initializes a new Capabilities object for given file path. -// -// Deprecated: Replace with NewFile2. For example, replace: -// -// c, err := NewFile(path) -// if err != nil { -// return err -// } -// -// with: -// -// c, err := NewFile2(path) -// if err != nil { -// return err -// } -// err = c.Load() -// if err != nil { -// return err -// } -func NewFile(path string) (Capabilities, error) { - c, err := newFile(path) - if err != nil { - return c, err - } - err = c.Load() - return c, err -} - -// NewFile2 creates a new initialized Capabilities object for given -// file path. This does not load the process's current capabilities; -// to do that you must call Load explicitly. -func NewFile2(path string) (Capabilities, error) { - return newFile(path) -} diff --git a/src/runtime/vendor/github.com/syndtr/gocapability/capability/capability_linux.go b/src/runtime/vendor/github.com/syndtr/gocapability/capability/capability_linux.go deleted file mode 100644 index 1567dc8104..0000000000 --- a/src/runtime/vendor/github.com/syndtr/gocapability/capability/capability_linux.go +++ /dev/null @@ -1,642 +0,0 @@ -// Copyright (c) 2013, Suryandaru Triandana -// All rights reserved. -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -package capability - -import ( - "bufio" - "errors" - "fmt" - "io" - "os" - "strings" - "syscall" -) - -var errUnknownVers = errors.New("unknown capability version") - -const ( - linuxCapVer1 = 0x19980330 - linuxCapVer2 = 0x20071026 - linuxCapVer3 = 0x20080522 -) - -var ( - capVers uint32 - capLastCap Cap -) - -func init() { - var hdr capHeader - capget(&hdr, nil) - capVers = hdr.version - - if initLastCap() == nil { - CAP_LAST_CAP = capLastCap - if capLastCap > 31 { - capUpperMask = (uint32(1) << (uint(capLastCap) - 31)) - 1 - } else { - capUpperMask = 0 - } - } -} - -func initLastCap() error { - if capLastCap != 0 { - return nil - } - - f, err := os.Open("/proc/sys/kernel/cap_last_cap") - if err != nil { - return err - } - defer f.Close() - - var b []byte = make([]byte, 11) - _, err = f.Read(b) - if err != nil { - return err - } - - fmt.Sscanf(string(b), "%d", &capLastCap) - - return nil -} - -func mkStringCap(c Capabilities, which CapType) (ret string) { - for i, first := Cap(0), true; i <= CAP_LAST_CAP; i++ { - if !c.Get(which, i) { - continue - } - if first { - first = false - } else { - ret += ", " - } - ret += i.String() - } - return -} - -func mkString(c Capabilities, max CapType) (ret string) { - ret = "{" - for i := CapType(1); i <= max; i <<= 1 { - ret += " " + i.String() + "=\"" - if c.Empty(i) { - ret += "empty" - } else if c.Full(i) { - ret += "full" - } else { - ret += c.StringCap(i) - } - ret += "\"" - } - ret += " }" - return -} - -func newPid(pid int) (c Capabilities, err error) { - switch capVers { - case linuxCapVer1: - p := new(capsV1) - p.hdr.version = capVers - p.hdr.pid = int32(pid) - c = p - case linuxCapVer2, linuxCapVer3: - p := new(capsV3) - p.hdr.version = capVers - p.hdr.pid = int32(pid) - c = p - default: - err = errUnknownVers - return - } - return -} - -type capsV1 struct { - hdr capHeader - data capData -} - -func (c *capsV1) Get(which CapType, what Cap) bool { - if what > 32 { - return false - } - - switch which { - case EFFECTIVE: - return (1< 32 { - continue - } - - if which&EFFECTIVE != 0 { - c.data.effective |= 1 << uint(what) - } - if which&PERMITTED != 0 { - c.data.permitted |= 1 << uint(what) - } - if which&INHERITABLE != 0 { - c.data.inheritable |= 1 << uint(what) - } - } -} - -func (c *capsV1) Unset(which CapType, caps ...Cap) { - for _, what := range caps { - if what > 32 { - continue - } - - if which&EFFECTIVE != 0 { - c.data.effective &= ^(1 << uint(what)) - } - if which&PERMITTED != 0 { - c.data.permitted &= ^(1 << uint(what)) - } - if which&INHERITABLE != 0 { - c.data.inheritable &= ^(1 << uint(what)) - } - } -} - -func (c *capsV1) Fill(kind CapType) { - if kind&CAPS == CAPS { - c.data.effective = 0x7fffffff - c.data.permitted = 0x7fffffff - c.data.inheritable = 0 - } -} - -func (c *capsV1) Clear(kind CapType) { - if kind&CAPS == CAPS { - c.data.effective = 0 - c.data.permitted = 0 - c.data.inheritable = 0 - } -} - -func (c *capsV1) StringCap(which CapType) (ret string) { - return mkStringCap(c, which) -} - -func (c *capsV1) String() (ret string) { - return mkString(c, BOUNDING) -} - -func (c *capsV1) Load() (err error) { - return capget(&c.hdr, &c.data) -} - -func (c *capsV1) Apply(kind CapType) error { - if kind&CAPS == CAPS { - return capset(&c.hdr, &c.data) - } - return nil -} - -type capsV3 struct { - hdr capHeader - data [2]capData - bounds [2]uint32 - ambient [2]uint32 -} - -func (c *capsV3) Get(which CapType, what Cap) bool { - var i uint - if what > 31 { - i = uint(what) >> 5 - what %= 32 - } - - switch which { - case EFFECTIVE: - return (1< 31 { - i = uint(what) >> 5 - what %= 32 - } - - if which&EFFECTIVE != 0 { - c.data[i].effective |= 1 << uint(what) - } - if which&PERMITTED != 0 { - c.data[i].permitted |= 1 << uint(what) - } - if which&INHERITABLE != 0 { - c.data[i].inheritable |= 1 << uint(what) - } - if which&BOUNDING != 0 { - c.bounds[i] |= 1 << uint(what) - } - if which&AMBIENT != 0 { - c.ambient[i] |= 1 << uint(what) - } - } -} - -func (c *capsV3) Unset(which CapType, caps ...Cap) { - for _, what := range caps { - var i uint - if what > 31 { - i = uint(what) >> 5 - what %= 32 - } - - if which&EFFECTIVE != 0 { - c.data[i].effective &= ^(1 << uint(what)) - } - if which&PERMITTED != 0 { - c.data[i].permitted &= ^(1 << uint(what)) - } - if which&INHERITABLE != 0 { - c.data[i].inheritable &= ^(1 << uint(what)) - } - if which&BOUNDING != 0 { - c.bounds[i] &= ^(1 << uint(what)) - } - if which&AMBIENT != 0 { - c.ambient[i] &= ^(1 << uint(what)) - } - } -} - -func (c *capsV3) Fill(kind CapType) { - if kind&CAPS == CAPS { - c.data[0].effective = 0xffffffff - c.data[0].permitted = 0xffffffff - c.data[0].inheritable = 0 - c.data[1].effective = 0xffffffff - c.data[1].permitted = 0xffffffff - c.data[1].inheritable = 0 - } - - if kind&BOUNDS == BOUNDS { - c.bounds[0] = 0xffffffff - c.bounds[1] = 0xffffffff - } - if kind&AMBS == AMBS { - c.ambient[0] = 0xffffffff - c.ambient[1] = 0xffffffff - } -} - -func (c *capsV3) Clear(kind CapType) { - if kind&CAPS == CAPS { - c.data[0].effective = 0 - c.data[0].permitted = 0 - c.data[0].inheritable = 0 - c.data[1].effective = 0 - c.data[1].permitted = 0 - c.data[1].inheritable = 0 - } - - if kind&BOUNDS == BOUNDS { - c.bounds[0] = 0 - c.bounds[1] = 0 - } - if kind&AMBS == AMBS { - c.ambient[0] = 0 - c.ambient[1] = 0 - } -} - -func (c *capsV3) StringCap(which CapType) (ret string) { - return mkStringCap(c, which) -} - -func (c *capsV3) String() (ret string) { - return mkString(c, BOUNDING) -} - -func (c *capsV3) Load() (err error) { - err = capget(&c.hdr, &c.data[0]) - if err != nil { - return - } - - var status_path string - - if c.hdr.pid == 0 { - status_path = fmt.Sprintf("/proc/self/status") - } else { - status_path = fmt.Sprintf("/proc/%d/status", c.hdr.pid) - } - - f, err := os.Open(status_path) - if err != nil { - return - } - b := bufio.NewReader(f) - for { - line, e := b.ReadString('\n') - if e != nil { - if e != io.EOF { - err = e - } - break - } - if strings.HasPrefix(line, "CapB") { - fmt.Sscanf(line[4:], "nd: %08x%08x", &c.bounds[1], &c.bounds[0]) - continue - } - if strings.HasPrefix(line, "CapA") { - fmt.Sscanf(line[4:], "mb: %08x%08x", &c.ambient[1], &c.ambient[0]) - continue - } - } - f.Close() - - return -} - -func (c *capsV3) Apply(kind CapType) (err error) { - if kind&BOUNDS == BOUNDS { - var data [2]capData - err = capget(&c.hdr, &data[0]) - if err != nil { - return - } - if (1< 31 { - if c.data.version == 1 { - return false - } - i = uint(what) >> 5 - what %= 32 - } - - switch which { - case EFFECTIVE: - return (1< 31 { - if c.data.version == 1 { - continue - } - i = uint(what) >> 5 - what %= 32 - } - - if which&EFFECTIVE != 0 { - c.data.effective[i] |= 1 << uint(what) - } - if which&PERMITTED != 0 { - c.data.data[i].permitted |= 1 << uint(what) - } - if which&INHERITABLE != 0 { - c.data.data[i].inheritable |= 1 << uint(what) - } - } -} - -func (c *capsFile) Unset(which CapType, caps ...Cap) { - for _, what := range caps { - var i uint - if what > 31 { - if c.data.version == 1 { - continue - } - i = uint(what) >> 5 - what %= 32 - } - - if which&EFFECTIVE != 0 { - c.data.effective[i] &= ^(1 << uint(what)) - } - if which&PERMITTED != 0 { - c.data.data[i].permitted &= ^(1 << uint(what)) - } - if which&INHERITABLE != 0 { - c.data.data[i].inheritable &= ^(1 << uint(what)) - } - } -} - -func (c *capsFile) Fill(kind CapType) { - if kind&CAPS == CAPS { - c.data.effective[0] = 0xffffffff - c.data.data[0].permitted = 0xffffffff - c.data.data[0].inheritable = 0 - if c.data.version == 2 { - c.data.effective[1] = 0xffffffff - c.data.data[1].permitted = 0xffffffff - c.data.data[1].inheritable = 0 - } - } -} - -func (c *capsFile) Clear(kind CapType) { - if kind&CAPS == CAPS { - c.data.effective[0] = 0 - c.data.data[0].permitted = 0 - c.data.data[0].inheritable = 0 - if c.data.version == 2 { - c.data.effective[1] = 0 - c.data.data[1].permitted = 0 - c.data.data[1].inheritable = 0 - } - } -} - -func (c *capsFile) StringCap(which CapType) (ret string) { - return mkStringCap(c, which) -} - -func (c *capsFile) String() (ret string) { - return mkString(c, INHERITABLE) -} - -func (c *capsFile) Load() (err error) { - return getVfsCap(c.path, &c.data) -} - -func (c *capsFile) Apply(kind CapType) (err error) { - if kind&CAPS == CAPS { - return setVfsCap(c.path, &c.data) - } - return -} diff --git a/src/runtime/vendor/github.com/syndtr/gocapability/capability/capability_noop.go b/src/runtime/vendor/github.com/syndtr/gocapability/capability/capability_noop.go deleted file mode 100644 index 9bb3070c5e..0000000000 --- a/src/runtime/vendor/github.com/syndtr/gocapability/capability/capability_noop.go +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright (c) 2013, Suryandaru Triandana -// All rights reserved. -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// +build !linux - -package capability - -import "errors" - -func newPid(pid int) (Capabilities, error) { - return nil, errors.New("not supported") -} - -func newFile(path string) (Capabilities, error) { - return nil, errors.New("not supported") -} diff --git a/src/runtime/vendor/github.com/syndtr/gocapability/capability/enum.go b/src/runtime/vendor/github.com/syndtr/gocapability/capability/enum.go deleted file mode 100644 index ad10785314..0000000000 --- a/src/runtime/vendor/github.com/syndtr/gocapability/capability/enum.go +++ /dev/null @@ -1,309 +0,0 @@ -// Copyright (c) 2013, Suryandaru Triandana -// All rights reserved. -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -package capability - -type CapType uint - -func (c CapType) String() string { - switch c { - case EFFECTIVE: - return "effective" - case PERMITTED: - return "permitted" - case INHERITABLE: - return "inheritable" - case BOUNDING: - return "bounding" - case CAPS: - return "caps" - case AMBIENT: - return "ambient" - } - return "unknown" -} - -const ( - EFFECTIVE CapType = 1 << iota - PERMITTED - INHERITABLE - BOUNDING - AMBIENT - - CAPS = EFFECTIVE | PERMITTED | INHERITABLE - BOUNDS = BOUNDING - AMBS = AMBIENT -) - -//go:generate go run enumgen/gen.go -type Cap int - -// POSIX-draft defined capabilities and Linux extensions. -// -// Defined in https://github.com/torvalds/linux/blob/master/include/uapi/linux/capability.h -const ( - // In a system with the [_POSIX_CHOWN_RESTRICTED] option defined, this - // overrides the restriction of changing file ownership and group - // ownership. - CAP_CHOWN = Cap(0) - - // Override all DAC access, including ACL execute access if - // [_POSIX_ACL] is defined. Excluding DAC access covered by - // CAP_LINUX_IMMUTABLE. - CAP_DAC_OVERRIDE = Cap(1) - - // Overrides all DAC restrictions regarding read and search on files - // and directories, including ACL restrictions if [_POSIX_ACL] is - // defined. Excluding DAC access covered by CAP_LINUX_IMMUTABLE. - CAP_DAC_READ_SEARCH = Cap(2) - - // Overrides all restrictions about allowed operations on files, where - // file owner ID must be equal to the user ID, except where CAP_FSETID - // is applicable. It doesn't override MAC and DAC restrictions. - CAP_FOWNER = Cap(3) - - // Overrides the following restrictions that the effective user ID - // shall match the file owner ID when setting the S_ISUID and S_ISGID - // bits on that file; that the effective group ID (or one of the - // supplementary group IDs) shall match the file owner ID when setting - // the S_ISGID bit on that file; that the S_ISUID and S_ISGID bits are - // cleared on successful return from chown(2) (not implemented). - CAP_FSETID = Cap(4) - - // Overrides the restriction that the real or effective user ID of a - // process sending a signal must match the real or effective user ID - // of the process receiving the signal. - CAP_KILL = Cap(5) - - // Allows setgid(2) manipulation - // Allows setgroups(2) - // Allows forged gids on socket credentials passing. - CAP_SETGID = Cap(6) - - // Allows set*uid(2) manipulation (including fsuid). - // Allows forged pids on socket credentials passing. - CAP_SETUID = Cap(7) - - // Linux-specific capabilities - - // Without VFS support for capabilities: - // Transfer any capability in your permitted set to any pid, - // remove any capability in your permitted set from any pid - // With VFS support for capabilities (neither of above, but) - // Add any capability from current's capability bounding set - // to the current process' inheritable set - // Allow taking bits out of capability bounding set - // Allow modification of the securebits for a process - CAP_SETPCAP = Cap(8) - - // Allow modification of S_IMMUTABLE and S_APPEND file attributes - CAP_LINUX_IMMUTABLE = Cap(9) - - // Allows binding to TCP/UDP sockets below 1024 - // Allows binding to ATM VCIs below 32 - CAP_NET_BIND_SERVICE = Cap(10) - - // Allow broadcasting, listen to multicast - CAP_NET_BROADCAST = Cap(11) - - // Allow interface configuration - // Allow administration of IP firewall, masquerading and accounting - // Allow setting debug option on sockets - // Allow modification of routing tables - // Allow setting arbitrary process / process group ownership on - // sockets - // Allow binding to any address for transparent proxying (also via NET_RAW) - // Allow setting TOS (type of service) - // Allow setting promiscuous mode - // Allow clearing driver statistics - // Allow multicasting - // Allow read/write of device-specific registers - // Allow activation of ATM control sockets - CAP_NET_ADMIN = Cap(12) - - // Allow use of RAW sockets - // Allow use of PACKET sockets - // Allow binding to any address for transparent proxying (also via NET_ADMIN) - CAP_NET_RAW = Cap(13) - - // Allow locking of shared memory segments - // Allow mlock and mlockall (which doesn't really have anything to do - // with IPC) - CAP_IPC_LOCK = Cap(14) - - // Override IPC ownership checks - CAP_IPC_OWNER = Cap(15) - - // Insert and remove kernel modules - modify kernel without limit - CAP_SYS_MODULE = Cap(16) - - // Allow ioperm/iopl access - // Allow sending USB messages to any device via /proc/bus/usb - CAP_SYS_RAWIO = Cap(17) - - // Allow use of chroot() - CAP_SYS_CHROOT = Cap(18) - - // Allow ptrace() of any process - CAP_SYS_PTRACE = Cap(19) - - // Allow configuration of process accounting - CAP_SYS_PACCT = Cap(20) - - // Allow configuration of the secure attention key - // Allow administration of the random device - // Allow examination and configuration of disk quotas - // Allow setting the domainname - // Allow setting the hostname - // Allow calling bdflush() - // Allow mount() and umount(), setting up new smb connection - // Allow some autofs root ioctls - // Allow nfsservctl - // Allow VM86_REQUEST_IRQ - // Allow to read/write pci config on alpha - // Allow irix_prctl on mips (setstacksize) - // Allow flushing all cache on m68k (sys_cacheflush) - // Allow removing semaphores - // Used instead of CAP_CHOWN to "chown" IPC message queues, semaphores - // and shared memory - // Allow locking/unlocking of shared memory segment - // Allow turning swap on/off - // Allow forged pids on socket credentials passing - // Allow setting readahead and flushing buffers on block devices - // Allow setting geometry in floppy driver - // Allow turning DMA on/off in xd driver - // Allow administration of md devices (mostly the above, but some - // extra ioctls) - // Allow tuning the ide driver - // Allow access to the nvram device - // Allow administration of apm_bios, serial and bttv (TV) device - // Allow manufacturer commands in isdn CAPI support driver - // Allow reading non-standardized portions of pci configuration space - // Allow DDI debug ioctl on sbpcd driver - // Allow setting up serial ports - // Allow sending raw qic-117 commands - // Allow enabling/disabling tagged queuing on SCSI controllers and sending - // arbitrary SCSI commands - // Allow setting encryption key on loopback filesystem - // Allow setting zone reclaim policy - // Allow everything under CAP_BPF and CAP_PERFMON for backward compatibility - CAP_SYS_ADMIN = Cap(21) - - // Allow use of reboot() - CAP_SYS_BOOT = Cap(22) - - // Allow raising priority and setting priority on other (different - // UID) processes - // Allow use of FIFO and round-robin (realtime) scheduling on own - // processes and setting the scheduling algorithm used by another - // process. - // Allow setting cpu affinity on other processes - CAP_SYS_NICE = Cap(23) - - // Override resource limits. Set resource limits. - // Override quota limits. - // Override reserved space on ext2 filesystem - // Modify data journaling mode on ext3 filesystem (uses journaling - // resources) - // NOTE: ext2 honors fsuid when checking for resource overrides, so - // you can override using fsuid too - // Override size restrictions on IPC message queues - // Allow more than 64hz interrupts from the real-time clock - // Override max number of consoles on console allocation - // Override max number of keymaps - // Control memory reclaim behavior - CAP_SYS_RESOURCE = Cap(24) - - // Allow manipulation of system clock - // Allow irix_stime on mips - // Allow setting the real-time clock - CAP_SYS_TIME = Cap(25) - - // Allow configuration of tty devices - // Allow vhangup() of tty - CAP_SYS_TTY_CONFIG = Cap(26) - - // Allow the privileged aspects of mknod() - CAP_MKNOD = Cap(27) - - // Allow taking of leases on files - CAP_LEASE = Cap(28) - - CAP_AUDIT_WRITE = Cap(29) - CAP_AUDIT_CONTROL = Cap(30) - CAP_SETFCAP = Cap(31) - - // Override MAC access. - // The base kernel enforces no MAC policy. - // An LSM may enforce a MAC policy, and if it does and it chooses - // to implement capability based overrides of that policy, this is - // the capability it should use to do so. - CAP_MAC_OVERRIDE = Cap(32) - - // Allow MAC configuration or state changes. - // The base kernel requires no MAC configuration. - // An LSM may enforce a MAC policy, and if it does and it chooses - // to implement capability based checks on modifications to that - // policy or the data required to maintain it, this is the - // capability it should use to do so. - CAP_MAC_ADMIN = Cap(33) - - // Allow configuring the kernel's syslog (printk behaviour) - CAP_SYSLOG = Cap(34) - - // Allow triggering something that will wake the system - CAP_WAKE_ALARM = Cap(35) - - // Allow preventing system suspends - CAP_BLOCK_SUSPEND = Cap(36) - - // Allow reading the audit log via multicast netlink socket - CAP_AUDIT_READ = Cap(37) - - // Allow system performance and observability privileged operations - // using perf_events, i915_perf and other kernel subsystems - CAP_PERFMON = Cap(38) - - // CAP_BPF allows the following BPF operations: - // - Creating all types of BPF maps - // - Advanced verifier features - // - Indirect variable access - // - Bounded loops - // - BPF to BPF function calls - // - Scalar precision tracking - // - Larger complexity limits - // - Dead code elimination - // - And potentially other features - // - Loading BPF Type Format (BTF) data - // - Retrieve xlated and JITed code of BPF programs - // - Use bpf_spin_lock() helper - // - // CAP_PERFMON relaxes the verifier checks further: - // - BPF progs can use of pointer-to-integer conversions - // - speculation attack hardening measures are bypassed - // - bpf_probe_read to read arbitrary kernel memory is allowed - // - bpf_trace_printk to print kernel memory is allowed - // - // CAP_SYS_ADMIN is required to use bpf_probe_write_user. - // - // CAP_SYS_ADMIN is required to iterate system wide loaded - // programs, maps, links, BTFs and convert their IDs to file descriptors. - // - // CAP_PERFMON and CAP_BPF are required to load tracing programs. - // CAP_NET_ADMIN and CAP_BPF are required to load networking programs. - CAP_BPF = Cap(39) - - // Allow checkpoint/restore related operations. - // Introduced in kernel 5.9 - CAP_CHECKPOINT_RESTORE = Cap(40) -) - -var ( - // Highest valid capability of the running kernel. - CAP_LAST_CAP = Cap(63) - - capUpperMask = ^uint32(0) -) diff --git a/src/runtime/vendor/github.com/syndtr/gocapability/capability/enum_gen.go b/src/runtime/vendor/github.com/syndtr/gocapability/capability/enum_gen.go deleted file mode 100644 index 2ff9bf4d88..0000000000 --- a/src/runtime/vendor/github.com/syndtr/gocapability/capability/enum_gen.go +++ /dev/null @@ -1,138 +0,0 @@ -// generated file; DO NOT EDIT - use go generate in directory with source - -package capability - -func (c Cap) String() string { - switch c { - case CAP_CHOWN: - return "chown" - case CAP_DAC_OVERRIDE: - return "dac_override" - case CAP_DAC_READ_SEARCH: - return "dac_read_search" - case CAP_FOWNER: - return "fowner" - case CAP_FSETID: - return "fsetid" - case CAP_KILL: - return "kill" - case CAP_SETGID: - return "setgid" - case CAP_SETUID: - return "setuid" - case CAP_SETPCAP: - return "setpcap" - case CAP_LINUX_IMMUTABLE: - return "linux_immutable" - case CAP_NET_BIND_SERVICE: - return "net_bind_service" - case CAP_NET_BROADCAST: - return "net_broadcast" - case CAP_NET_ADMIN: - return "net_admin" - case CAP_NET_RAW: - return "net_raw" - case CAP_IPC_LOCK: - return "ipc_lock" - case CAP_IPC_OWNER: - return "ipc_owner" - case CAP_SYS_MODULE: - return "sys_module" - case CAP_SYS_RAWIO: - return "sys_rawio" - case CAP_SYS_CHROOT: - return "sys_chroot" - case CAP_SYS_PTRACE: - return "sys_ptrace" - case CAP_SYS_PACCT: - return "sys_pacct" - case CAP_SYS_ADMIN: - return "sys_admin" - case CAP_SYS_BOOT: - return "sys_boot" - case CAP_SYS_NICE: - return "sys_nice" - case CAP_SYS_RESOURCE: - return "sys_resource" - case CAP_SYS_TIME: - return "sys_time" - case CAP_SYS_TTY_CONFIG: - return "sys_tty_config" - case CAP_MKNOD: - return "mknod" - case CAP_LEASE: - return "lease" - case CAP_AUDIT_WRITE: - return "audit_write" - case CAP_AUDIT_CONTROL: - return "audit_control" - case CAP_SETFCAP: - return "setfcap" - case CAP_MAC_OVERRIDE: - return "mac_override" - case CAP_MAC_ADMIN: - return "mac_admin" - case CAP_SYSLOG: - return "syslog" - case CAP_WAKE_ALARM: - return "wake_alarm" - case CAP_BLOCK_SUSPEND: - return "block_suspend" - case CAP_AUDIT_READ: - return "audit_read" - case CAP_PERFMON: - return "perfmon" - case CAP_BPF: - return "bpf" - case CAP_CHECKPOINT_RESTORE: - return "checkpoint_restore" - } - return "unknown" -} - -// List returns list of all supported capabilities -func List() []Cap { - return []Cap{ - CAP_CHOWN, - CAP_DAC_OVERRIDE, - CAP_DAC_READ_SEARCH, - CAP_FOWNER, - CAP_FSETID, - CAP_KILL, - CAP_SETGID, - CAP_SETUID, - CAP_SETPCAP, - CAP_LINUX_IMMUTABLE, - CAP_NET_BIND_SERVICE, - CAP_NET_BROADCAST, - CAP_NET_ADMIN, - CAP_NET_RAW, - CAP_IPC_LOCK, - CAP_IPC_OWNER, - CAP_SYS_MODULE, - CAP_SYS_RAWIO, - CAP_SYS_CHROOT, - CAP_SYS_PTRACE, - CAP_SYS_PACCT, - CAP_SYS_ADMIN, - CAP_SYS_BOOT, - CAP_SYS_NICE, - CAP_SYS_RESOURCE, - CAP_SYS_TIME, - CAP_SYS_TTY_CONFIG, - CAP_MKNOD, - CAP_LEASE, - CAP_AUDIT_WRITE, - CAP_AUDIT_CONTROL, - CAP_SETFCAP, - CAP_MAC_OVERRIDE, - CAP_MAC_ADMIN, - CAP_SYSLOG, - CAP_WAKE_ALARM, - CAP_BLOCK_SUSPEND, - CAP_AUDIT_READ, - CAP_PERFMON, - CAP_BPF, - CAP_CHECKPOINT_RESTORE, - } -} diff --git a/src/runtime/vendor/github.com/syndtr/gocapability/capability/syscall_linux.go b/src/runtime/vendor/github.com/syndtr/gocapability/capability/syscall_linux.go deleted file mode 100644 index 3d2bf6927f..0000000000 --- a/src/runtime/vendor/github.com/syndtr/gocapability/capability/syscall_linux.go +++ /dev/null @@ -1,154 +0,0 @@ -// Copyright (c) 2013, Suryandaru Triandana -// All rights reserved. -// -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -package capability - -import ( - "syscall" - "unsafe" -) - -type capHeader struct { - version uint32 - pid int32 -} - -type capData struct { - effective uint32 - permitted uint32 - inheritable uint32 -} - -func capget(hdr *capHeader, data *capData) (err error) { - _, _, e1 := syscall.Syscall(syscall.SYS_CAPGET, uintptr(unsafe.Pointer(hdr)), uintptr(unsafe.Pointer(data)), 0) - if e1 != 0 { - err = e1 - } - return -} - -func capset(hdr *capHeader, data *capData) (err error) { - _, _, e1 := syscall.Syscall(syscall.SYS_CAPSET, uintptr(unsafe.Pointer(hdr)), uintptr(unsafe.Pointer(data)), 0) - if e1 != 0 { - err = e1 - } - return -} - -// not yet in syscall -const ( - pr_CAP_AMBIENT = 47 - pr_CAP_AMBIENT_IS_SET = uintptr(1) - pr_CAP_AMBIENT_RAISE = uintptr(2) - pr_CAP_AMBIENT_LOWER = uintptr(3) - pr_CAP_AMBIENT_CLEAR_ALL = uintptr(4) -) - -func prctl(option int, arg2, arg3, arg4, arg5 uintptr) (err error) { - _, _, e1 := syscall.Syscall6(syscall.SYS_PRCTL, uintptr(option), arg2, arg3, arg4, arg5, 0) - if e1 != 0 { - err = e1 - } - return -} - -const ( - vfsXattrName = "security.capability" - - vfsCapVerMask = 0xff000000 - vfsCapVer1 = 0x01000000 - vfsCapVer2 = 0x02000000 - - vfsCapFlagMask = ^vfsCapVerMask - vfsCapFlageffective = 0x000001 - - vfscapDataSizeV1 = 4 * (1 + 2*1) - vfscapDataSizeV2 = 4 * (1 + 2*2) -) - -type vfscapData struct { - magic uint32 - data [2]struct { - permitted uint32 - inheritable uint32 - } - effective [2]uint32 - version int8 -} - -var ( - _vfsXattrName *byte -) - -func init() { - _vfsXattrName, _ = syscall.BytePtrFromString(vfsXattrName) -} - -func getVfsCap(path string, dest *vfscapData) (err error) { - var _p0 *byte - _p0, err = syscall.BytePtrFromString(path) - if err != nil { - return - } - r0, _, e1 := syscall.Syscall6(syscall.SYS_GETXATTR, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_vfsXattrName)), uintptr(unsafe.Pointer(dest)), vfscapDataSizeV2, 0, 0) - if e1 != 0 { - if e1 == syscall.ENODATA { - dest.version = 2 - return - } - err = e1 - } - switch dest.magic & vfsCapVerMask { - case vfsCapVer1: - dest.version = 1 - if r0 != vfscapDataSizeV1 { - return syscall.EINVAL - } - dest.data[1].permitted = 0 - dest.data[1].inheritable = 0 - case vfsCapVer2: - dest.version = 2 - if r0 != vfscapDataSizeV2 { - return syscall.EINVAL - } - default: - return syscall.EINVAL - } - if dest.magic&vfsCapFlageffective != 0 { - dest.effective[0] = dest.data[0].permitted | dest.data[0].inheritable - dest.effective[1] = dest.data[1].permitted | dest.data[1].inheritable - } else { - dest.effective[0] = 0 - dest.effective[1] = 0 - } - return -} - -func setVfsCap(path string, data *vfscapData) (err error) { - var _p0 *byte - _p0, err = syscall.BytePtrFromString(path) - if err != nil { - return - } - var size uintptr - if data.version == 1 { - data.magic = vfsCapVer1 - size = vfscapDataSizeV1 - } else if data.version == 2 { - data.magic = vfsCapVer2 - if data.effective[0] != 0 || data.effective[1] != 0 { - data.magic |= vfsCapFlageffective - } - size = vfscapDataSizeV2 - } else { - return syscall.EINVAL - } - _, _, e1 := syscall.Syscall6(syscall.SYS_SETXATTR, uintptr(unsafe.Pointer(_p0)), uintptr(unsafe.Pointer(_vfsXattrName)), uintptr(unsafe.Pointer(data)), size, 0, 0) - if e1 != 0 { - err = e1 - } - return -} diff --git a/src/runtime/vendor/golang.org/x/net/bpf/asm.go b/src/runtime/vendor/golang.org/x/net/bpf/asm.go deleted file mode 100644 index 15e21b1812..0000000000 --- a/src/runtime/vendor/golang.org/x/net/bpf/asm.go +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package bpf - -import "fmt" - -// Assemble converts insts into raw instructions suitable for loading -// into a BPF virtual machine. -// -// Currently, no optimization is attempted, the assembled program flow -// is exactly as provided. -func Assemble(insts []Instruction) ([]RawInstruction, error) { - ret := make([]RawInstruction, len(insts)) - var err error - for i, inst := range insts { - ret[i], err = inst.Assemble() - if err != nil { - return nil, fmt.Errorf("assembling instruction %d: %s", i+1, err) - } - } - return ret, nil -} - -// Disassemble attempts to parse raw back into -// Instructions. Unrecognized RawInstructions are assumed to be an -// extension not implemented by this package, and are passed through -// unchanged to the output. The allDecoded value reports whether insts -// contains no RawInstructions. -func Disassemble(raw []RawInstruction) (insts []Instruction, allDecoded bool) { - insts = make([]Instruction, len(raw)) - allDecoded = true - for i, r := range raw { - insts[i] = r.Disassemble() - if _, ok := insts[i].(RawInstruction); ok { - allDecoded = false - } - } - return insts, allDecoded -} diff --git a/src/runtime/vendor/golang.org/x/net/bpf/constants.go b/src/runtime/vendor/golang.org/x/net/bpf/constants.go deleted file mode 100644 index 12f3ee835a..0000000000 --- a/src/runtime/vendor/golang.org/x/net/bpf/constants.go +++ /dev/null @@ -1,222 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package bpf - -// A Register is a register of the BPF virtual machine. -type Register uint16 - -const ( - // RegA is the accumulator register. RegA is always the - // destination register of ALU operations. - RegA Register = iota - // RegX is the indirection register, used by LoadIndirect - // operations. - RegX -) - -// An ALUOp is an arithmetic or logic operation. -type ALUOp uint16 - -// ALU binary operation types. -const ( - ALUOpAdd ALUOp = iota << 4 - ALUOpSub - ALUOpMul - ALUOpDiv - ALUOpOr - ALUOpAnd - ALUOpShiftLeft - ALUOpShiftRight - aluOpNeg // Not exported because it's the only unary ALU operation, and gets its own instruction type. - ALUOpMod - ALUOpXor -) - -// A JumpTest is a comparison operator used in conditional jumps. -type JumpTest uint16 - -// Supported operators for conditional jumps. -// K can be RegX for JumpIfX -const ( - // K == A - JumpEqual JumpTest = iota - // K != A - JumpNotEqual - // K > A - JumpGreaterThan - // K < A - JumpLessThan - // K >= A - JumpGreaterOrEqual - // K <= A - JumpLessOrEqual - // K & A != 0 - JumpBitsSet - // K & A == 0 - JumpBitsNotSet -) - -// An Extension is a function call provided by the kernel that -// performs advanced operations that are expensive or impossible -// within the BPF virtual machine. -// -// Extensions are only implemented by the Linux kernel. -// -// TODO: should we prune this list? Some of these extensions seem -// either broken or near-impossible to use correctly, whereas other -// (len, random, ifindex) are quite useful. -type Extension int - -// Extension functions available in the Linux kernel. -const ( - // extOffset is the negative maximum number of instructions used - // to load instructions by overloading the K argument. - extOffset = -0x1000 - // ExtLen returns the length of the packet. - ExtLen Extension = 1 - // ExtProto returns the packet's L3 protocol type. - ExtProto Extension = 0 - // ExtType returns the packet's type (skb->pkt_type in the kernel) - // - // TODO: better documentation. How nice an API do we want to - // provide for these esoteric extensions? - ExtType Extension = 4 - // ExtPayloadOffset returns the offset of the packet payload, or - // the first protocol header that the kernel does not know how to - // parse. - ExtPayloadOffset Extension = 52 - // ExtInterfaceIndex returns the index of the interface on which - // the packet was received. - ExtInterfaceIndex Extension = 8 - // ExtNetlinkAttr returns the netlink attribute of type X at - // offset A. - ExtNetlinkAttr Extension = 12 - // ExtNetlinkAttrNested returns the nested netlink attribute of - // type X at offset A. - ExtNetlinkAttrNested Extension = 16 - // ExtMark returns the packet's mark value. - ExtMark Extension = 20 - // ExtQueue returns the packet's assigned hardware queue. - ExtQueue Extension = 24 - // ExtLinkLayerType returns the packet's hardware address type - // (e.g. Ethernet, Infiniband). - ExtLinkLayerType Extension = 28 - // ExtRXHash returns the packets receive hash. - // - // TODO: figure out what this rxhash actually is. - ExtRXHash Extension = 32 - // ExtCPUID returns the ID of the CPU processing the current - // packet. - ExtCPUID Extension = 36 - // ExtVLANTag returns the packet's VLAN tag. - ExtVLANTag Extension = 44 - // ExtVLANTagPresent returns non-zero if the packet has a VLAN - // tag. - // - // TODO: I think this might be a lie: it reads bit 0x1000 of the - // VLAN header, which changed meaning in recent revisions of the - // spec - this extension may now return meaningless information. - ExtVLANTagPresent Extension = 48 - // ExtVLANProto returns 0x8100 if the frame has a VLAN header, - // 0x88a8 if the frame has a "Q-in-Q" double VLAN header, or some - // other value if no VLAN information is present. - ExtVLANProto Extension = 60 - // ExtRand returns a uniformly random uint32. - ExtRand Extension = 56 -) - -// The following gives names to various bit patterns used in opcode construction. - -const ( - opMaskCls uint16 = 0x7 - // opClsLoad masks - opMaskLoadDest = 0x01 - opMaskLoadWidth = 0x18 - opMaskLoadMode = 0xe0 - // opClsALU & opClsJump - opMaskOperand = 0x08 - opMaskOperator = 0xf0 -) - -const ( - // +---------------+-----------------+---+---+---+ - // | AddrMode (3b) | LoadWidth (2b) | 0 | 0 | 0 | - // +---------------+-----------------+---+---+---+ - opClsLoadA uint16 = iota - // +---------------+-----------------+---+---+---+ - // | AddrMode (3b) | LoadWidth (2b) | 0 | 0 | 1 | - // +---------------+-----------------+---+---+---+ - opClsLoadX - // +---+---+---+---+---+---+---+---+ - // | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | - // +---+---+---+---+---+---+---+---+ - opClsStoreA - // +---+---+---+---+---+---+---+---+ - // | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | - // +---+---+---+---+---+---+---+---+ - opClsStoreX - // +---------------+-----------------+---+---+---+ - // | Operator (4b) | OperandSrc (1b) | 1 | 0 | 0 | - // +---------------+-----------------+---+---+---+ - opClsALU - // +-----------------------------+---+---+---+---+ - // | TestOperator (4b) | 0 | 1 | 0 | 1 | - // +-----------------------------+---+---+---+---+ - opClsJump - // +---+-------------------------+---+---+---+---+ - // | 0 | 0 | 0 | RetSrc (1b) | 0 | 1 | 1 | 0 | - // +---+-------------------------+---+---+---+---+ - opClsReturn - // +---+-------------------------+---+---+---+---+ - // | 0 | 0 | 0 | TXAorTAX (1b) | 0 | 1 | 1 | 1 | - // +---+-------------------------+---+---+---+---+ - opClsMisc -) - -const ( - opAddrModeImmediate uint16 = iota << 5 - opAddrModeAbsolute - opAddrModeIndirect - opAddrModeScratch - opAddrModePacketLen // actually an extension, not an addressing mode. - opAddrModeMemShift -) - -const ( - opLoadWidth4 uint16 = iota << 3 - opLoadWidth2 - opLoadWidth1 -) - -// Operand for ALU and Jump instructions -type opOperand uint16 - -// Supported operand sources. -const ( - opOperandConstant opOperand = iota << 3 - opOperandX -) - -// An jumpOp is a conditional jump condition. -type jumpOp uint16 - -// Supported jump conditions. -const ( - opJumpAlways jumpOp = iota << 4 - opJumpEqual - opJumpGT - opJumpGE - opJumpSet -) - -const ( - opRetSrcConstant uint16 = iota << 4 - opRetSrcA -) - -const ( - opMiscTAX = 0x00 - opMiscTXA = 0x80 -) diff --git a/src/runtime/vendor/golang.org/x/net/bpf/doc.go b/src/runtime/vendor/golang.org/x/net/bpf/doc.go deleted file mode 100644 index ae62feb534..0000000000 --- a/src/runtime/vendor/golang.org/x/net/bpf/doc.go +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -/* - -Package bpf implements marshaling and unmarshaling of programs for the -Berkeley Packet Filter virtual machine, and provides a Go implementation -of the virtual machine. - -BPF's main use is to specify a packet filter for network taps, so that -the kernel doesn't have to expensively copy every packet it sees to -userspace. However, it's been repurposed to other areas where running -user code in-kernel is needed. For example, Linux's seccomp uses BPF -to apply security policies to system calls. For simplicity, this -documentation refers only to packets, but other uses of BPF have their -own data payloads. - -BPF programs run in a restricted virtual machine. It has almost no -access to kernel functions, and while conditional branches are -allowed, they can only jump forwards, to guarantee that there are no -infinite loops. - -The virtual machine - -The BPF VM is an accumulator machine. Its main register, called -register A, is an implicit source and destination in all arithmetic -and logic operations. The machine also has 16 scratch registers for -temporary storage, and an indirection register (register X) for -indirect memory access. All registers are 32 bits wide. - -Each run of a BPF program is given one packet, which is placed in the -VM's read-only "main memory". LoadAbsolute and LoadIndirect -instructions can fetch up to 32 bits at a time into register A for -examination. - -The goal of a BPF program is to produce and return a verdict (uint32), -which tells the kernel what to do with the packet. In the context of -packet filtering, the returned value is the number of bytes of the -packet to forward to userspace, or 0 to ignore the packet. Other -contexts like seccomp define their own return values. - -In order to simplify programs, attempts to read past the end of the -packet terminate the program execution with a verdict of 0 (ignore -packet). This means that the vast majority of BPF programs don't need -to do any explicit bounds checking. - -In addition to the bytes of the packet, some BPF programs have access -to extensions, which are essentially calls to kernel utility -functions. Currently, the only extensions supported by this package -are the Linux packet filter extensions. - -Examples - -This packet filter selects all ARP packets. - - bpf.Assemble([]bpf.Instruction{ - // Load "EtherType" field from the ethernet header. - bpf.LoadAbsolute{Off: 12, Size: 2}, - // Skip over the next instruction if EtherType is not ARP. - bpf.JumpIf{Cond: bpf.JumpNotEqual, Val: 0x0806, SkipTrue: 1}, - // Verdict is "send up to 4k of the packet to userspace." - bpf.RetConstant{Val: 4096}, - // Verdict is "ignore packet." - bpf.RetConstant{Val: 0}, - }) - -This packet filter captures a random 1% sample of traffic. - - bpf.Assemble([]bpf.Instruction{ - // Get a 32-bit random number from the Linux kernel. - bpf.LoadExtension{Num: bpf.ExtRand}, - // 1% dice roll? - bpf.JumpIf{Cond: bpf.JumpLessThan, Val: 2^32/100, SkipFalse: 1}, - // Capture. - bpf.RetConstant{Val: 4096}, - // Ignore. - bpf.RetConstant{Val: 0}, - }) - -*/ -package bpf // import "golang.org/x/net/bpf" diff --git a/src/runtime/vendor/golang.org/x/net/bpf/instructions.go b/src/runtime/vendor/golang.org/x/net/bpf/instructions.go deleted file mode 100644 index 3cffcaa014..0000000000 --- a/src/runtime/vendor/golang.org/x/net/bpf/instructions.go +++ /dev/null @@ -1,726 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package bpf - -import "fmt" - -// An Instruction is one instruction executed by the BPF virtual -// machine. -type Instruction interface { - // Assemble assembles the Instruction into a RawInstruction. - Assemble() (RawInstruction, error) -} - -// A RawInstruction is a raw BPF virtual machine instruction. -type RawInstruction struct { - // Operation to execute. - Op uint16 - // For conditional jump instructions, the number of instructions - // to skip if the condition is true/false. - Jt uint8 - Jf uint8 - // Constant parameter. The meaning depends on the Op. - K uint32 -} - -// Assemble implements the Instruction Assemble method. -func (ri RawInstruction) Assemble() (RawInstruction, error) { return ri, nil } - -// Disassemble parses ri into an Instruction and returns it. If ri is -// not recognized by this package, ri itself is returned. -func (ri RawInstruction) Disassemble() Instruction { - switch ri.Op & opMaskCls { - case opClsLoadA, opClsLoadX: - reg := Register(ri.Op & opMaskLoadDest) - sz := 0 - switch ri.Op & opMaskLoadWidth { - case opLoadWidth4: - sz = 4 - case opLoadWidth2: - sz = 2 - case opLoadWidth1: - sz = 1 - default: - return ri - } - switch ri.Op & opMaskLoadMode { - case opAddrModeImmediate: - if sz != 4 { - return ri - } - return LoadConstant{Dst: reg, Val: ri.K} - case opAddrModeScratch: - if sz != 4 || ri.K > 15 { - return ri - } - return LoadScratch{Dst: reg, N: int(ri.K)} - case opAddrModeAbsolute: - if ri.K > extOffset+0xffffffff { - return LoadExtension{Num: Extension(-extOffset + ri.K)} - } - return LoadAbsolute{Size: sz, Off: ri.K} - case opAddrModeIndirect: - return LoadIndirect{Size: sz, Off: ri.K} - case opAddrModePacketLen: - if sz != 4 { - return ri - } - return LoadExtension{Num: ExtLen} - case opAddrModeMemShift: - return LoadMemShift{Off: ri.K} - default: - return ri - } - - case opClsStoreA: - if ri.Op != opClsStoreA || ri.K > 15 { - return ri - } - return StoreScratch{Src: RegA, N: int(ri.K)} - - case opClsStoreX: - if ri.Op != opClsStoreX || ri.K > 15 { - return ri - } - return StoreScratch{Src: RegX, N: int(ri.K)} - - case opClsALU: - switch op := ALUOp(ri.Op & opMaskOperator); op { - case ALUOpAdd, ALUOpSub, ALUOpMul, ALUOpDiv, ALUOpOr, ALUOpAnd, ALUOpShiftLeft, ALUOpShiftRight, ALUOpMod, ALUOpXor: - switch operand := opOperand(ri.Op & opMaskOperand); operand { - case opOperandX: - return ALUOpX{Op: op} - case opOperandConstant: - return ALUOpConstant{Op: op, Val: ri.K} - default: - return ri - } - case aluOpNeg: - return NegateA{} - default: - return ri - } - - case opClsJump: - switch op := jumpOp(ri.Op & opMaskOperator); op { - case opJumpAlways: - return Jump{Skip: ri.K} - case opJumpEqual, opJumpGT, opJumpGE, opJumpSet: - cond, skipTrue, skipFalse := jumpOpToTest(op, ri.Jt, ri.Jf) - switch operand := opOperand(ri.Op & opMaskOperand); operand { - case opOperandX: - return JumpIfX{Cond: cond, SkipTrue: skipTrue, SkipFalse: skipFalse} - case opOperandConstant: - return JumpIf{Cond: cond, Val: ri.K, SkipTrue: skipTrue, SkipFalse: skipFalse} - default: - return ri - } - default: - return ri - } - - case opClsReturn: - switch ri.Op { - case opClsReturn | opRetSrcA: - return RetA{} - case opClsReturn | opRetSrcConstant: - return RetConstant{Val: ri.K} - default: - return ri - } - - case opClsMisc: - switch ri.Op { - case opClsMisc | opMiscTAX: - return TAX{} - case opClsMisc | opMiscTXA: - return TXA{} - default: - return ri - } - - default: - panic("unreachable") // switch is exhaustive on the bit pattern - } -} - -func jumpOpToTest(op jumpOp, skipTrue uint8, skipFalse uint8) (JumpTest, uint8, uint8) { - var test JumpTest - - // Decode "fake" jump conditions that don't appear in machine code - // Ensures the Assemble -> Disassemble stage recreates the same instructions - // See https://github.com/golang/go/issues/18470 - if skipTrue == 0 { - switch op { - case opJumpEqual: - test = JumpNotEqual - case opJumpGT: - test = JumpLessOrEqual - case opJumpGE: - test = JumpLessThan - case opJumpSet: - test = JumpBitsNotSet - } - - return test, skipFalse, 0 - } - - switch op { - case opJumpEqual: - test = JumpEqual - case opJumpGT: - test = JumpGreaterThan - case opJumpGE: - test = JumpGreaterOrEqual - case opJumpSet: - test = JumpBitsSet - } - - return test, skipTrue, skipFalse -} - -// LoadConstant loads Val into register Dst. -type LoadConstant struct { - Dst Register - Val uint32 -} - -// Assemble implements the Instruction Assemble method. -func (a LoadConstant) Assemble() (RawInstruction, error) { - return assembleLoad(a.Dst, 4, opAddrModeImmediate, a.Val) -} - -// String returns the instruction in assembler notation. -func (a LoadConstant) String() string { - switch a.Dst { - case RegA: - return fmt.Sprintf("ld #%d", a.Val) - case RegX: - return fmt.Sprintf("ldx #%d", a.Val) - default: - return fmt.Sprintf("unknown instruction: %#v", a) - } -} - -// LoadScratch loads scratch[N] into register Dst. -type LoadScratch struct { - Dst Register - N int // 0-15 -} - -// Assemble implements the Instruction Assemble method. -func (a LoadScratch) Assemble() (RawInstruction, error) { - if a.N < 0 || a.N > 15 { - return RawInstruction{}, fmt.Errorf("invalid scratch slot %d", a.N) - } - return assembleLoad(a.Dst, 4, opAddrModeScratch, uint32(a.N)) -} - -// String returns the instruction in assembler notation. -func (a LoadScratch) String() string { - switch a.Dst { - case RegA: - return fmt.Sprintf("ld M[%d]", a.N) - case RegX: - return fmt.Sprintf("ldx M[%d]", a.N) - default: - return fmt.Sprintf("unknown instruction: %#v", a) - } -} - -// LoadAbsolute loads packet[Off:Off+Size] as an integer value into -// register A. -type LoadAbsolute struct { - Off uint32 - Size int // 1, 2 or 4 -} - -// Assemble implements the Instruction Assemble method. -func (a LoadAbsolute) Assemble() (RawInstruction, error) { - return assembleLoad(RegA, a.Size, opAddrModeAbsolute, a.Off) -} - -// String returns the instruction in assembler notation. -func (a LoadAbsolute) String() string { - switch a.Size { - case 1: // byte - return fmt.Sprintf("ldb [%d]", a.Off) - case 2: // half word - return fmt.Sprintf("ldh [%d]", a.Off) - case 4: // word - if a.Off > extOffset+0xffffffff { - return LoadExtension{Num: Extension(a.Off + 0x1000)}.String() - } - return fmt.Sprintf("ld [%d]", a.Off) - default: - return fmt.Sprintf("unknown instruction: %#v", a) - } -} - -// LoadIndirect loads packet[X+Off:X+Off+Size] as an integer value -// into register A. -type LoadIndirect struct { - Off uint32 - Size int // 1, 2 or 4 -} - -// Assemble implements the Instruction Assemble method. -func (a LoadIndirect) Assemble() (RawInstruction, error) { - return assembleLoad(RegA, a.Size, opAddrModeIndirect, a.Off) -} - -// String returns the instruction in assembler notation. -func (a LoadIndirect) String() string { - switch a.Size { - case 1: // byte - return fmt.Sprintf("ldb [x + %d]", a.Off) - case 2: // half word - return fmt.Sprintf("ldh [x + %d]", a.Off) - case 4: // word - return fmt.Sprintf("ld [x + %d]", a.Off) - default: - return fmt.Sprintf("unknown instruction: %#v", a) - } -} - -// LoadMemShift multiplies the first 4 bits of the byte at packet[Off] -// by 4 and stores the result in register X. -// -// This instruction is mainly useful to load into X the length of an -// IPv4 packet header in a single instruction, rather than have to do -// the arithmetic on the header's first byte by hand. -type LoadMemShift struct { - Off uint32 -} - -// Assemble implements the Instruction Assemble method. -func (a LoadMemShift) Assemble() (RawInstruction, error) { - return assembleLoad(RegX, 1, opAddrModeMemShift, a.Off) -} - -// String returns the instruction in assembler notation. -func (a LoadMemShift) String() string { - return fmt.Sprintf("ldx 4*([%d]&0xf)", a.Off) -} - -// LoadExtension invokes a linux-specific extension and stores the -// result in register A. -type LoadExtension struct { - Num Extension -} - -// Assemble implements the Instruction Assemble method. -func (a LoadExtension) Assemble() (RawInstruction, error) { - if a.Num == ExtLen { - return assembleLoad(RegA, 4, opAddrModePacketLen, 0) - } - return assembleLoad(RegA, 4, opAddrModeAbsolute, uint32(extOffset+a.Num)) -} - -// String returns the instruction in assembler notation. -func (a LoadExtension) String() string { - switch a.Num { - case ExtLen: - return "ld #len" - case ExtProto: - return "ld #proto" - case ExtType: - return "ld #type" - case ExtPayloadOffset: - return "ld #poff" - case ExtInterfaceIndex: - return "ld #ifidx" - case ExtNetlinkAttr: - return "ld #nla" - case ExtNetlinkAttrNested: - return "ld #nlan" - case ExtMark: - return "ld #mark" - case ExtQueue: - return "ld #queue" - case ExtLinkLayerType: - return "ld #hatype" - case ExtRXHash: - return "ld #rxhash" - case ExtCPUID: - return "ld #cpu" - case ExtVLANTag: - return "ld #vlan_tci" - case ExtVLANTagPresent: - return "ld #vlan_avail" - case ExtVLANProto: - return "ld #vlan_tpid" - case ExtRand: - return "ld #rand" - default: - return fmt.Sprintf("unknown instruction: %#v", a) - } -} - -// StoreScratch stores register Src into scratch[N]. -type StoreScratch struct { - Src Register - N int // 0-15 -} - -// Assemble implements the Instruction Assemble method. -func (a StoreScratch) Assemble() (RawInstruction, error) { - if a.N < 0 || a.N > 15 { - return RawInstruction{}, fmt.Errorf("invalid scratch slot %d", a.N) - } - var op uint16 - switch a.Src { - case RegA: - op = opClsStoreA - case RegX: - op = opClsStoreX - default: - return RawInstruction{}, fmt.Errorf("invalid source register %v", a.Src) - } - - return RawInstruction{ - Op: op, - K: uint32(a.N), - }, nil -} - -// String returns the instruction in assembler notation. -func (a StoreScratch) String() string { - switch a.Src { - case RegA: - return fmt.Sprintf("st M[%d]", a.N) - case RegX: - return fmt.Sprintf("stx M[%d]", a.N) - default: - return fmt.Sprintf("unknown instruction: %#v", a) - } -} - -// ALUOpConstant executes A = A Val. -type ALUOpConstant struct { - Op ALUOp - Val uint32 -} - -// Assemble implements the Instruction Assemble method. -func (a ALUOpConstant) Assemble() (RawInstruction, error) { - return RawInstruction{ - Op: opClsALU | uint16(opOperandConstant) | uint16(a.Op), - K: a.Val, - }, nil -} - -// String returns the instruction in assembler notation. -func (a ALUOpConstant) String() string { - switch a.Op { - case ALUOpAdd: - return fmt.Sprintf("add #%d", a.Val) - case ALUOpSub: - return fmt.Sprintf("sub #%d", a.Val) - case ALUOpMul: - return fmt.Sprintf("mul #%d", a.Val) - case ALUOpDiv: - return fmt.Sprintf("div #%d", a.Val) - case ALUOpMod: - return fmt.Sprintf("mod #%d", a.Val) - case ALUOpAnd: - return fmt.Sprintf("and #%d", a.Val) - case ALUOpOr: - return fmt.Sprintf("or #%d", a.Val) - case ALUOpXor: - return fmt.Sprintf("xor #%d", a.Val) - case ALUOpShiftLeft: - return fmt.Sprintf("lsh #%d", a.Val) - case ALUOpShiftRight: - return fmt.Sprintf("rsh #%d", a.Val) - default: - return fmt.Sprintf("unknown instruction: %#v", a) - } -} - -// ALUOpX executes A = A X -type ALUOpX struct { - Op ALUOp -} - -// Assemble implements the Instruction Assemble method. -func (a ALUOpX) Assemble() (RawInstruction, error) { - return RawInstruction{ - Op: opClsALU | uint16(opOperandX) | uint16(a.Op), - }, nil -} - -// String returns the instruction in assembler notation. -func (a ALUOpX) String() string { - switch a.Op { - case ALUOpAdd: - return "add x" - case ALUOpSub: - return "sub x" - case ALUOpMul: - return "mul x" - case ALUOpDiv: - return "div x" - case ALUOpMod: - return "mod x" - case ALUOpAnd: - return "and x" - case ALUOpOr: - return "or x" - case ALUOpXor: - return "xor x" - case ALUOpShiftLeft: - return "lsh x" - case ALUOpShiftRight: - return "rsh x" - default: - return fmt.Sprintf("unknown instruction: %#v", a) - } -} - -// NegateA executes A = -A. -type NegateA struct{} - -// Assemble implements the Instruction Assemble method. -func (a NegateA) Assemble() (RawInstruction, error) { - return RawInstruction{ - Op: opClsALU | uint16(aluOpNeg), - }, nil -} - -// String returns the instruction in assembler notation. -func (a NegateA) String() string { - return fmt.Sprintf("neg") -} - -// Jump skips the following Skip instructions in the program. -type Jump struct { - Skip uint32 -} - -// Assemble implements the Instruction Assemble method. -func (a Jump) Assemble() (RawInstruction, error) { - return RawInstruction{ - Op: opClsJump | uint16(opJumpAlways), - K: a.Skip, - }, nil -} - -// String returns the instruction in assembler notation. -func (a Jump) String() string { - return fmt.Sprintf("ja %d", a.Skip) -} - -// JumpIf skips the following Skip instructions in the program if A -// Val is true. -type JumpIf struct { - Cond JumpTest - Val uint32 - SkipTrue uint8 - SkipFalse uint8 -} - -// Assemble implements the Instruction Assemble method. -func (a JumpIf) Assemble() (RawInstruction, error) { - return jumpToRaw(a.Cond, opOperandConstant, a.Val, a.SkipTrue, a.SkipFalse) -} - -// String returns the instruction in assembler notation. -func (a JumpIf) String() string { - return jumpToString(a.Cond, fmt.Sprintf("#%d", a.Val), a.SkipTrue, a.SkipFalse) -} - -// JumpIfX skips the following Skip instructions in the program if A -// X is true. -type JumpIfX struct { - Cond JumpTest - SkipTrue uint8 - SkipFalse uint8 -} - -// Assemble implements the Instruction Assemble method. -func (a JumpIfX) Assemble() (RawInstruction, error) { - return jumpToRaw(a.Cond, opOperandX, 0, a.SkipTrue, a.SkipFalse) -} - -// String returns the instruction in assembler notation. -func (a JumpIfX) String() string { - return jumpToString(a.Cond, "x", a.SkipTrue, a.SkipFalse) -} - -// jumpToRaw assembles a jump instruction into a RawInstruction -func jumpToRaw(test JumpTest, operand opOperand, k uint32, skipTrue, skipFalse uint8) (RawInstruction, error) { - var ( - cond jumpOp - flip bool - ) - switch test { - case JumpEqual: - cond = opJumpEqual - case JumpNotEqual: - cond, flip = opJumpEqual, true - case JumpGreaterThan: - cond = opJumpGT - case JumpLessThan: - cond, flip = opJumpGE, true - case JumpGreaterOrEqual: - cond = opJumpGE - case JumpLessOrEqual: - cond, flip = opJumpGT, true - case JumpBitsSet: - cond = opJumpSet - case JumpBitsNotSet: - cond, flip = opJumpSet, true - default: - return RawInstruction{}, fmt.Errorf("unknown JumpTest %v", test) - } - jt, jf := skipTrue, skipFalse - if flip { - jt, jf = jf, jt - } - return RawInstruction{ - Op: opClsJump | uint16(cond) | uint16(operand), - Jt: jt, - Jf: jf, - K: k, - }, nil -} - -// jumpToString converts a jump instruction to assembler notation -func jumpToString(cond JumpTest, operand string, skipTrue, skipFalse uint8) string { - switch cond { - // K == A - case JumpEqual: - return conditionalJump(operand, skipTrue, skipFalse, "jeq", "jneq") - // K != A - case JumpNotEqual: - return fmt.Sprintf("jneq %s,%d", operand, skipTrue) - // K > A - case JumpGreaterThan: - return conditionalJump(operand, skipTrue, skipFalse, "jgt", "jle") - // K < A - case JumpLessThan: - return fmt.Sprintf("jlt %s,%d", operand, skipTrue) - // K >= A - case JumpGreaterOrEqual: - return conditionalJump(operand, skipTrue, skipFalse, "jge", "jlt") - // K <= A - case JumpLessOrEqual: - return fmt.Sprintf("jle %s,%d", operand, skipTrue) - // K & A != 0 - case JumpBitsSet: - if skipFalse > 0 { - return fmt.Sprintf("jset %s,%d,%d", operand, skipTrue, skipFalse) - } - return fmt.Sprintf("jset %s,%d", operand, skipTrue) - // K & A == 0, there is no assembler instruction for JumpBitNotSet, use JumpBitSet and invert skips - case JumpBitsNotSet: - return jumpToString(JumpBitsSet, operand, skipFalse, skipTrue) - default: - return fmt.Sprintf("unknown JumpTest %#v", cond) - } -} - -func conditionalJump(operand string, skipTrue, skipFalse uint8, positiveJump, negativeJump string) string { - if skipTrue > 0 { - if skipFalse > 0 { - return fmt.Sprintf("%s %s,%d,%d", positiveJump, operand, skipTrue, skipFalse) - } - return fmt.Sprintf("%s %s,%d", positiveJump, operand, skipTrue) - } - return fmt.Sprintf("%s %s,%d", negativeJump, operand, skipFalse) -} - -// RetA exits the BPF program, returning the value of register A. -type RetA struct{} - -// Assemble implements the Instruction Assemble method. -func (a RetA) Assemble() (RawInstruction, error) { - return RawInstruction{ - Op: opClsReturn | opRetSrcA, - }, nil -} - -// String returns the instruction in assembler notation. -func (a RetA) String() string { - return fmt.Sprintf("ret a") -} - -// RetConstant exits the BPF program, returning a constant value. -type RetConstant struct { - Val uint32 -} - -// Assemble implements the Instruction Assemble method. -func (a RetConstant) Assemble() (RawInstruction, error) { - return RawInstruction{ - Op: opClsReturn | opRetSrcConstant, - K: a.Val, - }, nil -} - -// String returns the instruction in assembler notation. -func (a RetConstant) String() string { - return fmt.Sprintf("ret #%d", a.Val) -} - -// TXA copies the value of register X to register A. -type TXA struct{} - -// Assemble implements the Instruction Assemble method. -func (a TXA) Assemble() (RawInstruction, error) { - return RawInstruction{ - Op: opClsMisc | opMiscTXA, - }, nil -} - -// String returns the instruction in assembler notation. -func (a TXA) String() string { - return fmt.Sprintf("txa") -} - -// TAX copies the value of register A to register X. -type TAX struct{} - -// Assemble implements the Instruction Assemble method. -func (a TAX) Assemble() (RawInstruction, error) { - return RawInstruction{ - Op: opClsMisc | opMiscTAX, - }, nil -} - -// String returns the instruction in assembler notation. -func (a TAX) String() string { - return fmt.Sprintf("tax") -} - -func assembleLoad(dst Register, loadSize int, mode uint16, k uint32) (RawInstruction, error) { - var ( - cls uint16 - sz uint16 - ) - switch dst { - case RegA: - cls = opClsLoadA - case RegX: - cls = opClsLoadX - default: - return RawInstruction{}, fmt.Errorf("invalid target register %v", dst) - } - switch loadSize { - case 1: - sz = opLoadWidth1 - case 2: - sz = opLoadWidth2 - case 4: - sz = opLoadWidth4 - default: - return RawInstruction{}, fmt.Errorf("invalid load byte length %d", sz) - } - return RawInstruction{ - Op: cls | sz | mode, - K: k, - }, nil -} diff --git a/src/runtime/vendor/golang.org/x/net/bpf/setter.go b/src/runtime/vendor/golang.org/x/net/bpf/setter.go deleted file mode 100644 index 43e35f0ac2..0000000000 --- a/src/runtime/vendor/golang.org/x/net/bpf/setter.go +++ /dev/null @@ -1,10 +0,0 @@ -// Copyright 2017 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package bpf - -// A Setter is a type which can attach a compiled BPF filter to itself. -type Setter interface { - SetBPF(filter []RawInstruction) error -} diff --git a/src/runtime/vendor/golang.org/x/net/bpf/vm.go b/src/runtime/vendor/golang.org/x/net/bpf/vm.go deleted file mode 100644 index 73f57f1f72..0000000000 --- a/src/runtime/vendor/golang.org/x/net/bpf/vm.go +++ /dev/null @@ -1,150 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package bpf - -import ( - "errors" - "fmt" -) - -// A VM is an emulated BPF virtual machine. -type VM struct { - filter []Instruction -} - -// NewVM returns a new VM using the input BPF program. -func NewVM(filter []Instruction) (*VM, error) { - if len(filter) == 0 { - return nil, errors.New("one or more Instructions must be specified") - } - - for i, ins := range filter { - check := len(filter) - (i + 1) - switch ins := ins.(type) { - // Check for out-of-bounds jumps in instructions - case Jump: - if check <= int(ins.Skip) { - return nil, fmt.Errorf("cannot jump %d instructions; jumping past program bounds", ins.Skip) - } - case JumpIf: - if check <= int(ins.SkipTrue) { - return nil, fmt.Errorf("cannot jump %d instructions in true case; jumping past program bounds", ins.SkipTrue) - } - if check <= int(ins.SkipFalse) { - return nil, fmt.Errorf("cannot jump %d instructions in false case; jumping past program bounds", ins.SkipFalse) - } - case JumpIfX: - if check <= int(ins.SkipTrue) { - return nil, fmt.Errorf("cannot jump %d instructions in true case; jumping past program bounds", ins.SkipTrue) - } - if check <= int(ins.SkipFalse) { - return nil, fmt.Errorf("cannot jump %d instructions in false case; jumping past program bounds", ins.SkipFalse) - } - // Check for division or modulus by zero - case ALUOpConstant: - if ins.Val != 0 { - break - } - - switch ins.Op { - case ALUOpDiv, ALUOpMod: - return nil, errors.New("cannot divide by zero using ALUOpConstant") - } - // Check for unknown extensions - case LoadExtension: - switch ins.Num { - case ExtLen: - default: - return nil, fmt.Errorf("extension %d not implemented", ins.Num) - } - } - } - - // Make sure last instruction is a return instruction - switch filter[len(filter)-1].(type) { - case RetA, RetConstant: - default: - return nil, errors.New("BPF program must end with RetA or RetConstant") - } - - // Though our VM works using disassembled instructions, we - // attempt to assemble the input filter anyway to ensure it is compatible - // with an operating system VM. - _, err := Assemble(filter) - - return &VM{ - filter: filter, - }, err -} - -// Run runs the VM's BPF program against the input bytes. -// Run returns the number of bytes accepted by the BPF program, and any errors -// which occurred while processing the program. -func (v *VM) Run(in []byte) (int, error) { - var ( - // Registers of the virtual machine - regA uint32 - regX uint32 - regScratch [16]uint32 - - // OK is true if the program should continue processing the next - // instruction, or false if not, causing the loop to break - ok = true - ) - - // TODO(mdlayher): implement: - // - NegateA: - // - would require a change from uint32 registers to int32 - // registers - - // TODO(mdlayher): add interop tests that check signedness of ALU - // operations against kernel implementation, and make sure Go - // implementation matches behavior - - for i := 0; i < len(v.filter) && ok; i++ { - ins := v.filter[i] - - switch ins := ins.(type) { - case ALUOpConstant: - regA = aluOpConstant(ins, regA) - case ALUOpX: - regA, ok = aluOpX(ins, regA, regX) - case Jump: - i += int(ins.Skip) - case JumpIf: - jump := jumpIf(ins, regA) - i += jump - case JumpIfX: - jump := jumpIfX(ins, regA, regX) - i += jump - case LoadAbsolute: - regA, ok = loadAbsolute(ins, in) - case LoadConstant: - regA, regX = loadConstant(ins, regA, regX) - case LoadExtension: - regA = loadExtension(ins, in) - case LoadIndirect: - regA, ok = loadIndirect(ins, in, regX) - case LoadMemShift: - regX, ok = loadMemShift(ins, in) - case LoadScratch: - regA, regX = loadScratch(ins, regScratch, regA, regX) - case RetA: - return int(regA), nil - case RetConstant: - return int(ins.Val), nil - case StoreScratch: - regScratch = storeScratch(ins, regScratch, regA, regX) - case TAX: - regX = regA - case TXA: - regA = regX - default: - return 0, fmt.Errorf("unknown Instruction at index %d: %T", i, ins) - } - } - - return 0, nil -} diff --git a/src/runtime/vendor/golang.org/x/net/bpf/vm_instructions.go b/src/runtime/vendor/golang.org/x/net/bpf/vm_instructions.go deleted file mode 100644 index cf8947c332..0000000000 --- a/src/runtime/vendor/golang.org/x/net/bpf/vm_instructions.go +++ /dev/null @@ -1,182 +0,0 @@ -// Copyright 2016 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -package bpf - -import ( - "encoding/binary" - "fmt" -) - -func aluOpConstant(ins ALUOpConstant, regA uint32) uint32 { - return aluOpCommon(ins.Op, regA, ins.Val) -} - -func aluOpX(ins ALUOpX, regA uint32, regX uint32) (uint32, bool) { - // Guard against division or modulus by zero by terminating - // the program, as the OS BPF VM does - if regX == 0 { - switch ins.Op { - case ALUOpDiv, ALUOpMod: - return 0, false - } - } - - return aluOpCommon(ins.Op, regA, regX), true -} - -func aluOpCommon(op ALUOp, regA uint32, value uint32) uint32 { - switch op { - case ALUOpAdd: - return regA + value - case ALUOpSub: - return regA - value - case ALUOpMul: - return regA * value - case ALUOpDiv: - // Division by zero not permitted by NewVM and aluOpX checks - return regA / value - case ALUOpOr: - return regA | value - case ALUOpAnd: - return regA & value - case ALUOpShiftLeft: - return regA << value - case ALUOpShiftRight: - return regA >> value - case ALUOpMod: - // Modulus by zero not permitted by NewVM and aluOpX checks - return regA % value - case ALUOpXor: - return regA ^ value - default: - return regA - } -} - -func jumpIf(ins JumpIf, regA uint32) int { - return jumpIfCommon(ins.Cond, ins.SkipTrue, ins.SkipFalse, regA, ins.Val) -} - -func jumpIfX(ins JumpIfX, regA uint32, regX uint32) int { - return jumpIfCommon(ins.Cond, ins.SkipTrue, ins.SkipFalse, regA, regX) -} - -func jumpIfCommon(cond JumpTest, skipTrue, skipFalse uint8, regA uint32, value uint32) int { - var ok bool - - switch cond { - case JumpEqual: - ok = regA == value - case JumpNotEqual: - ok = regA != value - case JumpGreaterThan: - ok = regA > value - case JumpLessThan: - ok = regA < value - case JumpGreaterOrEqual: - ok = regA >= value - case JumpLessOrEqual: - ok = regA <= value - case JumpBitsSet: - ok = (regA & value) != 0 - case JumpBitsNotSet: - ok = (regA & value) == 0 - } - - if ok { - return int(skipTrue) - } - - return int(skipFalse) -} - -func loadAbsolute(ins LoadAbsolute, in []byte) (uint32, bool) { - offset := int(ins.Off) - size := int(ins.Size) - - return loadCommon(in, offset, size) -} - -func loadConstant(ins LoadConstant, regA uint32, regX uint32) (uint32, uint32) { - switch ins.Dst { - case RegA: - regA = ins.Val - case RegX: - regX = ins.Val - } - - return regA, regX -} - -func loadExtension(ins LoadExtension, in []byte) uint32 { - switch ins.Num { - case ExtLen: - return uint32(len(in)) - default: - panic(fmt.Sprintf("unimplemented extension: %d", ins.Num)) - } -} - -func loadIndirect(ins LoadIndirect, in []byte, regX uint32) (uint32, bool) { - offset := int(ins.Off) + int(regX) - size := int(ins.Size) - - return loadCommon(in, offset, size) -} - -func loadMemShift(ins LoadMemShift, in []byte) (uint32, bool) { - offset := int(ins.Off) - - // Size of LoadMemShift is always 1 byte - if !inBounds(len(in), offset, 1) { - return 0, false - } - - // Mask off high 4 bits and multiply low 4 bits by 4 - return uint32(in[offset]&0x0f) * 4, true -} - -func inBounds(inLen int, offset int, size int) bool { - return offset+size <= inLen -} - -func loadCommon(in []byte, offset int, size int) (uint32, bool) { - if !inBounds(len(in), offset, size) { - return 0, false - } - - switch size { - case 1: - return uint32(in[offset]), true - case 2: - return uint32(binary.BigEndian.Uint16(in[offset : offset+size])), true - case 4: - return uint32(binary.BigEndian.Uint32(in[offset : offset+size])), true - default: - panic(fmt.Sprintf("invalid load size: %d", size)) - } -} - -func loadScratch(ins LoadScratch, regScratch [16]uint32, regA uint32, regX uint32) (uint32, uint32) { - switch ins.Dst { - case RegA: - regA = regScratch[ins.N] - case RegX: - regX = regScratch[ins.N] - } - - return regA, regX -} - -func storeScratch(ins StoreScratch, regScratch [16]uint32, regA uint32, regX uint32) [16]uint32 { - switch ins.Src { - case RegA: - regScratch[ins.N] = regA - case RegX: - regScratch[ins.N] = regX - } - - return regScratch -} diff --git a/src/runtime/vendor/modules.txt b/src/runtime/vendor/modules.txt index 17bcc59935..f41c35f96e 100644 --- a/src/runtime/vendor/modules.txt +++ b/src/runtime/vendor/modules.txt @@ -47,16 +47,6 @@ github.com/blang/semver github.com/blang/semver/v4 # github.com/cespare/xxhash/v2 v2.1.1 github.com/cespare/xxhash/v2 -# github.com/checkpoint-restore/go-criu/v5 v5.0.0 -github.com/checkpoint-restore/go-criu/v5 -github.com/checkpoint-restore/go-criu/v5/rpc -# github.com/cilium/ebpf v0.6.2 -github.com/cilium/ebpf -github.com/cilium/ebpf/asm -github.com/cilium/ebpf/internal -github.com/cilium/ebpf/internal/btf -github.com/cilium/ebpf/internal/unix -github.com/cilium/ebpf/link # github.com/containerd/cgroups v1.0.1 ## explicit github.com/containerd/cgroups @@ -121,8 +111,6 @@ github.com/cpuguy83/go-md2man/v2/md2man # github.com/cri-o/cri-o v1.0.0-rc2.0.20170928185954-3394b3b2d6af ## explicit github.com/cri-o/cri-o/pkg/annotations -# github.com/cyphar/filepath-securejoin v0.2.2 -github.com/cyphar/filepath-securejoin # github.com/davecgh/go-spew v1.1.1 github.com/davecgh/go-spew/spew # github.com/docker/go-units v0.4.0 @@ -208,38 +196,14 @@ github.com/mdlayher/vsock github.com/mitchellh/mapstructure # github.com/moby/sys/mountinfo v0.4.1 github.com/moby/sys/mountinfo -# github.com/mrunalp/fileutils v0.5.0 -github.com/mrunalp/fileutils # github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/go-digest # github.com/opencontainers/runc v1.0.1 => github.com/opencontainers/runc v1.0.1 ## explicit -github.com/opencontainers/runc/libcontainer -github.com/opencontainers/runc/libcontainer/apparmor -github.com/opencontainers/runc/libcontainer/capabilities -github.com/opencontainers/runc/libcontainer/cgroups -github.com/opencontainers/runc/libcontainer/cgroups/devices -github.com/opencontainers/runc/libcontainer/cgroups/ebpf -github.com/opencontainers/runc/libcontainer/cgroups/ebpf/devicefilter -github.com/opencontainers/runc/libcontainer/cgroups/fs -github.com/opencontainers/runc/libcontainer/cgroups/fs2 -github.com/opencontainers/runc/libcontainer/cgroups/fscommon -github.com/opencontainers/runc/libcontainer/cgroups/systemd github.com/opencontainers/runc/libcontainer/configs -github.com/opencontainers/runc/libcontainer/configs/validate github.com/opencontainers/runc/libcontainer/devices -github.com/opencontainers/runc/libcontainer/intelrdt -github.com/opencontainers/runc/libcontainer/keys -github.com/opencontainers/runc/libcontainer/logs -github.com/opencontainers/runc/libcontainer/seccomp -github.com/opencontainers/runc/libcontainer/seccomp/patchbpf -github.com/opencontainers/runc/libcontainer/specconv -github.com/opencontainers/runc/libcontainer/stacktrace -github.com/opencontainers/runc/libcontainer/system github.com/opencontainers/runc/libcontainer/user github.com/opencontainers/runc/libcontainer/userns -github.com/opencontainers/runc/libcontainer/utils -github.com/opencontainers/runc/types # github.com/opencontainers/runtime-spec v1.0.3-0.20210326190908-1c3f411f0417 ## explicit github.com/opencontainers/runtime-spec/specs-go @@ -275,8 +239,6 @@ github.com/russross/blackfriday/v2 # github.com/safchain/ethtool v0.0.0-20190326074333-42ed695e3de8 ## explicit github.com/safchain/ethtool -# github.com/seccomp/libseccomp-golang v0.9.1 -github.com/seccomp/libseccomp-golang # github.com/shurcooL/sanitized_anchor_name v1.0.0 github.com/shurcooL/sanitized_anchor_name # github.com/sirupsen/logrus v1.8.1 @@ -288,8 +250,6 @@ github.com/sirupsen/logrus/hooks/syslog # github.com/stretchr/testify v1.6.1 ## explicit github.com/stretchr/testify/assert -# github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 -github.com/syndtr/gocapability/capability # github.com/urfave/cli v1.22.2 ## explicit github.com/urfave/cli @@ -339,7 +299,6 @@ go.opentelemetry.io/otel/sdk/resource go.opentelemetry.io/otel/sdk/trace # golang.org/x/net v0.0.0-20210226172049-e18ecbb05110 ## explicit -golang.org/x/net/bpf golang.org/x/net/context golang.org/x/net/context/ctxhttp golang.org/x/net/http/httpguts