From 52c66d20dc353fdf5d367276dbe4b2d325156c15 Mon Sep 17 00:00:00 2001 From: lifupan Date: Tue, 2 Apr 2019 10:55:20 +0800 Subject: [PATCH] shimv2: plugin the block backed rootfs directly instead of mount it When the container's rootfs is block storage backed such as devmapper, shimv2 will not mount it on the host, instead it insert it into hypervisor as a block device directly. If kata's config set "disable_block_device_use" as true, it will mount the rootfs onto host as before. Fixes:#1158 Signed-off-by: lifupan --- containerd-shim-v2/create.go | 182 ++++++++++++++++++++++++---------- containerd-shim-v2/delete.go | 8 +- containerd-shim-v2/service.go | 37 +++---- 3 files changed, 148 insertions(+), 79 deletions(-) diff --git a/containerd-shim-v2/create.go b/containerd-shim-v2/create.go index 65e301135a..b5fe278b42 100644 --- a/containerd-shim-v2/create.go +++ b/containerd-shim-v2/create.go @@ -13,29 +13,34 @@ import ( "github.com/containerd/typeurl" vc "github.com/kata-containers/runtime/virtcontainers" "github.com/kata-containers/runtime/virtcontainers/pkg/oci" + "github.com/pkg/errors" "os" + "path/filepath" taskAPI "github.com/containerd/containerd/runtime/v2/task" "github.com/kata-containers/runtime/pkg/katautils" "github.com/opencontainers/runtime-spec/specs-go" + containerd_types "github.com/containerd/containerd/api/types" + "github.com/containerd/containerd/mount" + "github.com/sirupsen/logrus" // only register the proto type _ "github.com/containerd/containerd/runtime/linux/runctypes" crioption "github.com/containerd/cri-containerd/pkg/api/runtimeoptions/v1" ) func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest, netns string) (*container, error) { - - detach := !r.Terminal - - // Checks the MUST and MUST NOT from OCI runtime specification - bundlePath, err := validBundle(r.ID, r.Bundle) - if err != nil { - return nil, err + rootFs := vc.RootFs{Mounted: s.mount} + if len(r.Rootfs) == 1 { + m := r.Rootfs[0] + rootFs.Source = m.Source + rootFs.Type = m.Type + rootFs.Options = m.Options } - ociSpec, err := oci.ParseConfigJSON(bundlePath) + detach := !r.Terminal + ociSpec, bundlePath, err := loadSpec(r, netns) if err != nil { return nil, err } @@ -45,13 +50,86 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest, netns return nil, err } - // Todo: - // Since there is a bug in kata for sharedPidNs, here to - // remove the pidns to disable the sharePidNs temporarily, - // once kata fixed this issue, we can remove this line. - // For the bug, please see: - // https://github.com/kata-containers/runtime/issues/930 - removeNamespace(&ociSpec, specs.PIDNamespace) + disableOutput := noNeedForOutput(detach, ociSpec.Process.Terminal) + rootfs := filepath.Join(r.Bundle, "rootfs") + + switch containerType { + case vc.PodSandbox: + if s.sandbox != nil { + return nil, fmt.Errorf("cannot create another sandbox in sandbox: %s", s.sandbox.ID()) + } + + _, err := loadRuntimeConfig(s, r) + if err != nil { + return nil, err + } + + defer func() { + if err != nil && s.mount { + if err2 := mount.UnmountAll(rootfs, 0); err2 != nil { + logrus.WithError(err2).Warn("failed to cleanup rootfs mount") + } + } + }() + + s.mount = true + if err = checkAndMount(s, r); err != nil { + return nil, err + } + + rootFs.Mounted = s.mount + + katautils.HandleFactory(ctx, vci, s.config) + sandbox, _, err := katautils.CreateSandbox(ctx, vci, *ociSpec, *s.config, rootFs, r.ID, bundlePath, "", disableOutput, false, true) + if err != nil { + return nil, err + } + s.sandbox = sandbox + + case vc.PodContainer: + if s.sandbox == nil { + return nil, fmt.Errorf("BUG: Cannot start the container, since the sandbox hasn't been created") + } + + if s.mount { + defer func() { + if err != nil { + if err2 := mount.UnmountAll(rootfs, 0); err2 != nil { + logrus.WithError(err2).Warn("failed to cleanup rootfs mount") + } + } + }() + + if err = doMount(r.Rootfs, rootfs); err != nil { + return nil, err + } + } + + _, err = katautils.CreateContainer(ctx, vci, s.sandbox, *ociSpec, rootFs, r.ID, bundlePath, "", disableOutput, true) + if err != nil { + return nil, err + } + } + + container, err := newContainer(s, r, containerType, ociSpec) + if err != nil { + return nil, err + } + + return container, nil +} + +func loadSpec(r *taskAPI.CreateTaskRequest, netns string) (*oci.CompatOCISpec, string, error) { + // Checks the MUST and MUST NOT from OCI runtime specification + bundlePath, err := validBundle(r.ID, r.Bundle) + if err != nil { + return nil, "", err + } + + ociSpec, err := oci.ParseConfigJSON(bundlePath) + if err != nil { + return nil, "", err + } //set the network namespace path //this set will be applied to sandbox's @@ -70,43 +148,15 @@ func create(ctx context.Context, s *service, r *taskAPI.CreateTaskRequest, netns } } - disableOutput := noNeedForOutput(detach, ociSpec.Process.Terminal) + // Todo: + // Since there is a bug in kata for sharedPidNs, here to + // remove the pidns to disable the sharePidNs temporarily, + // once kata fixed this issue, we can remove this line. + // For the bug, please see: + // https://github.com/kata-containers/runtime/issues/930 + removeNamespace(&ociSpec, specs.PIDNamespace) - switch containerType { - case vc.PodSandbox: - if s.sandbox != nil { - return nil, fmt.Errorf("cannot create another sandbox in sandbox: %s", s.sandbox.ID()) - } - - _, err := loadRuntimeConfig(s, r) - if err != nil { - return nil, err - } - - katautils.HandleFactory(ctx, vci, s.config) - sandbox, _, err := katautils.CreateSandbox(ctx, vci, ociSpec, *s.config, r.ID, bundlePath, "", disableOutput, false, true) - if err != nil { - return nil, err - } - s.sandbox = sandbox - - case vc.PodContainer: - if s.sandbox == nil { - return nil, fmt.Errorf("BUG: Cannot start the container, since the sandbox hasn't been created") - } - - _, err = katautils.CreateContainer(ctx, vci, s.sandbox, ociSpec, r.ID, bundlePath, "", disableOutput, true) - if err != nil { - return nil, err - } - } - - container, err := newContainer(s, r, containerType, &ociSpec) - if err != nil { - return nil, err - } - - return container, nil + return &ociSpec, bundlePath, nil } func loadRuntimeConfig(s *service, r *taskAPI.CreateTaskRequest) (*oci.RuntimeConfig, error) { @@ -142,3 +192,33 @@ func loadRuntimeConfig(s *service, r *taskAPI.CreateTaskRequest) (*oci.RuntimeCo return &runtimeConfig, nil } + +func checkAndMount(s *service, r *taskAPI.CreateTaskRequest) error { + if len(r.Rootfs) == 1 { + m := r.Rootfs[0] + + if katautils.IsBlockDevice(m.Source) && !s.config.HypervisorConfig.DisableBlockDeviceUse { + s.mount = false + return nil + } + } + rootfs := filepath.Join(r.Bundle, "rootfs") + if err := doMount(r.Rootfs, rootfs); err != nil { + return err + } + return nil +} + +func doMount(mounts []*containerd_types.Mount, rootfs string) error { + for _, rm := range mounts { + m := &mount.Mount{ + Type: rm.Type, + Source: rm.Source, + Options: rm.Options, + } + if err := m.Mount(rootfs); err != nil { + return errors.Wrapf(err, "failed to mount rootfs component %v", m) + } + } + return nil +} diff --git a/containerd-shim-v2/delete.go b/containerd-shim-v2/delete.go index 936133b5c9..462749215e 100644 --- a/containerd-shim-v2/delete.go +++ b/containerd-shim-v2/delete.go @@ -39,9 +39,11 @@ func deleteContainer(ctx context.Context, s *service, c *container) error { return err } - rootfs := path.Join(c.bundle, "rootfs") - if err := mount.UnmountAll(rootfs, 0); err != nil { - logrus.WithError(err).Warn("failed to cleanup rootfs mount") + if s.mount { + rootfs := path.Join(c.bundle, "rootfs") + if err := mount.UnmountAll(rootfs, 0); err != nil { + logrus.WithError(err).Warn("failed to cleanup rootfs mount") + } } delete(s.containers, c.id) diff --git a/containerd-shim-v2/service.go b/containerd-shim-v2/service.go index 7ed3971ae3..e00a6d135f 100644 --- a/containerd-shim-v2/service.go +++ b/containerd-shim-v2/service.go @@ -10,7 +10,6 @@ import ( "io/ioutil" "os" sysexec "os/exec" - "path/filepath" "sync" "syscall" "time" @@ -18,7 +17,6 @@ import ( eventstypes "github.com/containerd/containerd/api/events" "github.com/containerd/containerd/errdefs" "github.com/containerd/containerd/events" - "github.com/containerd/containerd/mount" "github.com/containerd/containerd/namespaces" cdruntime "github.com/containerd/containerd/runtime" cdshim "github.com/containerd/containerd/runtime/v2/shim" @@ -72,6 +70,7 @@ func New(ctx context.Context, id string, publisher events.Publisher) (cdshim.Shi containers: make(map[string]*container), events: make(chan interface{}, chSize), ec: make(chan exit, bufferSize), + mount: false, } go s.processExits() @@ -99,6 +98,10 @@ type service struct { // pid directly. pid uint32 + // if the container's rootfs is block device backed, kata shimv2 + // will not do the rootfs mount. + mount bool + context context.Context sandbox vc.VCSandbox containers map[string]*container @@ -310,39 +313,23 @@ func (s *service) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ * s.mu.Lock() defer s.mu.Unlock() + var c *container + var netns string + //the network namespace created by cni plugin - netns, err := namespaces.NamespaceRequired(ctx) + netns, err = namespaces.NamespaceRequired(ctx) if err != nil { return nil, errors.Wrap(err, "create namespace") } - rootfs := filepath.Join(r.Bundle, "rootfs") - defer func() { - if err != nil { - if err2 := mount.UnmountAll(rootfs, 0); err2 != nil { - logrus.WithError(err2).Warn("failed to cleanup rootfs mount") - } - } - }() - for _, rm := range r.Rootfs { - m := &mount.Mount{ - Type: rm.Type, - Source: rm.Source, - Options: rm.Options, - } - if err := m.Mount(rootfs); err != nil { - return nil, errors.Wrapf(err, "failed to mount rootfs component %v", m) - } - } - - container, err := create(ctx, s, r, netns) + c, err = create(ctx, s, r, netns) if err != nil { return nil, err } - container.status = task.StatusCreated + c.status = task.StatusCreated - s.containers[r.ID] = container + s.containers[r.ID] = c s.send(&eventstypes.TaskCreate{ ContainerID: r.ID,