diff --git a/.github/workflows/run-kata-coco-tests.yaml b/.github/workflows/run-kata-coco-tests.yaml index e71e59fd8c..3109b9ea8c 100644 --- a/.github/workflows/run-kata-coco-tests.yaml +++ b/.github/workflows/run-kata-coco-tests.yaml @@ -123,6 +123,7 @@ jobs: run: bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs - name: Delete CSI driver + if: always() timeout-minutes: 5 run: bash tests/integration/kubernetes/gha-run.sh delete-csi-driver @@ -208,6 +209,7 @@ jobs: run: bash tests/integration/kubernetes/gha-run.sh delete-coco-kbs - name: Delete CSI driver + if: always() timeout-minutes: 5 run: bash tests/integration/kubernetes/gha-run.sh delete-csi-driver diff --git a/.gitignore b/.gitignore index 94b91954a9..b8cffced7d 100644 --- a/.gitignore +++ b/.gitignore @@ -18,3 +18,7 @@ src/tools/log-parser/kata-log-parser tools/packaging/static-build/agent/install_libseccomp.sh .envrc .direnv + +# Generated by genpolicy and stored in the working directory, so only +# the basename is ignored. +layers-cache.json diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index c3d86726e7..a1dd3092b5 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -990,6 +990,12 @@ dependencies = [ "parking_lot_core", ] +[[package]] +name = "data-encoding" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" + [[package]] name = "deranged" version = "0.4.0" @@ -3485,6 +3491,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843c3d97f07e3b5ac0955d53ad0af4c91fe4a4f8525843ece5bf014f27829b73" dependencies = [ "anyhow", + "data-encoding", "lazy_static", "rand", "regex", diff --git a/src/agent/policy/Cargo.toml b/src/agent/policy/Cargo.toml index d31773ab78..3562008b91 100644 --- a/src/agent/policy/Cargo.toml +++ b/src/agent/policy/Cargo.toml @@ -18,6 +18,8 @@ serde_json.workspace = true # Agent Policy regorus = { version = "0.2.8", default-features = false, features = [ "arc", + "base64", + "base64url", "regex", "std", ] } diff --git a/src/agent/src/storage/block_handler.rs b/src/agent/src/storage/block_handler.rs index 251a4dfff7..4ad24bdd13 100644 --- a/src/agent/src/storage/block_handler.rs +++ b/src/agent/src/storage/block_handler.rs @@ -5,7 +5,7 @@ // use std::fs; -use std::os::unix::fs::PermissionsExt; +use std::os::unix::fs::{MetadataExt, PermissionsExt}; use std::path::Path; use std::str::FromStr; use std::sync::Arc; @@ -16,6 +16,7 @@ use kata_types::device::{ DRIVER_SCSI_TYPE, }; use kata_types::mount::StorageDevice; +use nix::sys::stat::{major, minor}; use protocols::agent::Storage; use tracing::instrument; @@ -28,8 +29,8 @@ use crate::device::block_device_handler::{ }; use crate::device::nvdimm_device_handler::wait_for_pmem_device; use crate::device::scsi_device_handler::get_scsi_device_name; -use crate::pci; use crate::storage::{common_storage_handler, new_device, StorageContext, StorageHandler}; +use crate::{confidential_data_hub, pci, AGENT_CONFIG}; #[derive(Debug)] pub struct VirtioBlkMmioHandler {} @@ -73,6 +74,8 @@ impl StorageHandler for VirtioBlkPciHandler { mut storage: Storage, ctx: &mut StorageContext, ) -> Result> { + let dev_num; + // If hot-plugged, get the device node path based on the PCI path // otherwise use the virt path provided in Storage Source if storage.source.starts_with("/dev") { @@ -82,14 +85,46 @@ impl StorageHandler for VirtioBlkPciHandler { if mode & libc::S_IFBLK == 0 { return Err(anyhow!("Invalid device {}", &storage.source)); } + let dev_id = metadata.rdev(); + dev_num = format!("{}:{}", major(dev_id), minor(dev_id)); } else { let pcipath = pci::Path::from_str(&storage.source)?; let dev_path = get_virtio_blk_pci_device_name(ctx.sandbox, &pcipath).await?; storage.source = dev_path; + let metadata = fs::metadata(&storage.source) + .context(format!("get metadata on file {:?}", &storage.source))?; + let dev_id = metadata.rdev(); + dev_num = format!("{}:{}", major(dev_id), minor(dev_id)); } - let path = common_storage_handler(ctx.logger, &storage)?; - new_device(path) + let confidential = storage + .driver_options + .contains(&"confidential=true".to_string()); + let ephemeral = storage + .driver_options + .contains(&"ephemeral=true".to_string()); + + if confidential && ephemeral { + let integrity = AGENT_CONFIG.secure_storage_integrity.to_string(); + let options = std::collections::HashMap::from([ + ("deviceId".to_string(), dev_num), + ("encryptType".to_string(), "LUKS".to_string()), + ("dataIntegrity".to_string(), integrity), + ]); + + confidential_data_hub::secure_mount( + "BlockDevice", + &options, + vec![], + &storage.mount_point, + ) + .await?; + + new_device(storage.mount_point) + } else { + let path = common_storage_handler(ctx.logger, &storage)?; + new_device(path) + } } } diff --git a/src/runtime/Makefile b/src/runtime/Makefile index 940338c8e6..43ea422b2d 100644 --- a/src/runtime/Makefile +++ b/src/runtime/Makefile @@ -214,8 +214,8 @@ DEFMEMSLOTS := 10 DEFMAXMEMSZ := 0 #Default number of bridges DEFBRIDGES := 1 -DEFENABLEANNOTATIONS := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\"] -DEFENABLEANNOTATIONSTEE := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\", \"default_vcpus\", \"default_memory\"] +DEFENABLEANNOTATIONS := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\", \"block_device_driver\"] +DEFENABLEANNOTATIONSTEE := [\"enable_iommu\", \"virtio_fs_extra_args\", \"kernel_params\", \"default_vcpus\", \"default_memory\", \"block_device_driver\"] DEFDISABLEGUESTSECCOMP := true DEFDISABLEGUESTEMPTYDIR := false #Default experimental features enabled diff --git a/src/runtime/pkg/direct-volume/utils.go b/src/runtime/pkg/direct-volume/utils.go index 9e13a4d227..6daef00065 100644 --- a/src/runtime/pkg/direct-volume/utils.go +++ b/src/runtime/pkg/direct-volume/utils.go @@ -17,6 +17,8 @@ import ( const ( mountInfoFileName = "mountInfo.json" + ConfidentialMetadataKey = "confidential" + EphemeralMetadataKey = "ephemeral" FSGroupMetadataKey = "fsGroup" FSGroupChangePolicyMetadataKey = "fsGroupChangePolicy" ) diff --git a/src/runtime/virtcontainers/container.go b/src/runtime/virtcontainers/container.go index b554df4cfa..30b272ea94 100644 --- a/src/runtime/virtcontainers/container.go +++ b/src/runtime/virtcontainers/container.go @@ -644,6 +644,20 @@ func (c *Container) createBlockDevices(ctx context.Context) error { for key, value := range mntInfo.Metadata { switch key { + case volume.ConfidentialMetadataKey: + confidential, err := strconv.ParseBool(value) + if err != nil { + c.Logger().Errorf("invalid value %q for metadata key %q, expected boolean string", value, key) + continue + } + c.mounts[i].Confidential = confidential + case volume.EphemeralMetadataKey: + ephemeral, err := strconv.ParseBool(value) + if err != nil { + c.Logger().Errorf("invalid value %q for metadata key %q, expected boolean string", value, key) + continue + } + c.mounts[i].Ephemeral = ephemeral case volume.FSGroupMetadataKey: gid, err := strconv.Atoi(value) if err != nil { diff --git a/src/runtime/virtcontainers/kata_agent.go b/src/runtime/virtcontainers/kata_agent.go index fef95c4737..eb32a258f1 100644 --- a/src/runtime/virtcontainers/kata_agent.go +++ b/src/runtime/virtcontainers/kata_agent.go @@ -1793,6 +1793,13 @@ func (k *kataAgent) handleDeviceBlockVolume(c *Container, m Mount, device api.De } } + if m.Confidential { + vol.DriverOptions = append(vol.DriverOptions, fmt.Sprintf("%s=true", volume.ConfidentialMetadataKey)) + } + if m.Ephemeral { + vol.DriverOptions = append(vol.DriverOptions, fmt.Sprintf("%s=true", volume.EphemeralMetadataKey)) + } + return vol, nil } diff --git a/src/runtime/virtcontainers/mount.go b/src/runtime/virtcontainers/mount.go index e9f44dffd7..f3c44a725b 100644 --- a/src/runtime/virtcontainers/mount.go +++ b/src/runtime/virtcontainers/mount.go @@ -273,6 +273,13 @@ type Mount struct { // FSGroupChangePolicy specifies the policy that will be used when applying // group id ownership change for a volume. FSGroupChangePolicy volume.FSGroupChangePolicy + + // Confidential specifies whether to encrypt the underlying storage. + Confidential bool + + // Ephemeral specifies whether the underlying storage is ephemeral: + // https://kubernetes.io/docs/concepts/storage/ephemeral-volumes/ + Ephemeral bool } func isSymlink(path string) bool { diff --git a/src/tools/csi-kata-directvolume/.gitignore b/src/tools/csi-kata-directvolume/.gitignore index e660fd93d3..08f7deafae 100644 --- a/src/tools/csi-kata-directvolume/.gitignore +++ b/src/tools/csi-kata-directvolume/.gitignore @@ -1 +1,2 @@ bin/ +deploy/kata-directvolume/kata-directvol-rbac.yaml diff --git a/src/tools/csi-kata-directvolume/README.md b/src/tools/csi-kata-directvolume/README.md index 7c9eccb040..206f950c02 100644 --- a/src/tools/csi-kata-directvolume/README.md +++ b/src/tools/csi-kata-directvolume/README.md @@ -30,50 +30,9 @@ cd tools/csi-kata-directvolume/ && make ## Building the Container Image -If you want to build the container image yourself, you can do so with the following command from a specified path. -Here, we just use `buildah/podman` as an example: +If you want to build the container image yourself, you can do so with the following command: ```shell -$ tree -L 2 buildah-directv/ -buildah-directv/ -├── bin -│   └── directvolplugin -└── Dockerfile - -$ buildah bud -t kata-directvolume:v1.0.19 -STEP 1/7: FROM alpine -STEP 2/7: LABEL maintainers="Kata Containers Authors" -STEP 3/7: LABEL description="Kata DirectVolume Driver" -STEP 4/7: ARG binary=./bin/directvolplugin -STEP 5/7: RUN apk add util-linux coreutils e2fsprogs xfsprogs xfsprogs-extra btrfs-progs && apk update && apk upgrade -fetch https://dl-cdn.alpinelinux.org/alpine/v3.19/main/x86_64/APKINDEX.tar.gz -fetch https://dl-cdn.alpinelinux.org/alpine/v3.19/community/x86_64/APKINDEX.tar.gz -(1/66) Installing libblkid (2.39.3-r0) -... -(66/66) Installing xfsprogs-extra (6.5.0-r0) -Executing busybox-1.36.1-r15.trigger -OK: 64 MiB in 81 packages -fetch https://dl-cdn.alpinelinux.org/alpine/v3.19/main/x86_64/APKINDEX.tar.gz -fetch https://dl-cdn.alpinelinux.org/alpine/v3.19/community/x86_64/APKINDEX.tar.gz -v3.19.0-19-ga0ddaee500e [https://dl-cdn.alpinelinux.org/alpine/v3.19/main] -v3.19.0-18-gec62a609516 [https://dl-cdn.alpinelinux.org/alpine/v3.19/community] -OK: 22983 distinct packages available -OK: 64 MiB in 81 packages -STEP 6/7: COPY ${binary} /kata-directvol-plugin -STEP 7/7: ENTRYPOINT ["/kata-directvol-plugin"] -COMMIT kata-directvolume:v1.0.19 -Getting image source signatures -Copying blob 5af4f8f59b76 skipped: already exists -Copying blob a55645705de3 done -Copying config 244001cc51 done -Writing manifest to image destination -Storing signatures ---> 244001cc51d -Successfully tagged localhost/kata-directvolume:v1.0.19 -244001cc51d77302c4ed5e1a0ec347d12d85dec4576ea1313f700f66e2a7d36d -$ podman save localhost/kata-directvolume:v1.0.19 -o kata-directvolume-v1.0.19.tar -$ ctr -n k8s.io image import kata-directvolume-v1.0.19.tar -unpacking localhost/kata-directvolume:v1.0.19 (sha256:1bdc33ff7f9cee92e74cbf77a9d79d00dce6dbb9ba19b9811f683e1a087f8fbf)...done -$ crictl images |grep 1.0.19 -localhost/kata-directvolume v1.0.19 244001cc51d77 83.8MB +$ cd src/tools/csi-kata-directvolume +$ docker build -t localhost/kata-directvolume:v1.0.18 . ``` diff --git a/src/tools/csi-kata-directvolume/deploy/deploy.sh b/src/tools/csi-kata-directvolume/deploy/deploy.sh old mode 100644 new mode 100755 diff --git a/src/tools/csi-kata-directvolume/docs/deploy-csi-kata-directvol.md b/src/tools/csi-kata-directvolume/docs/deploy-csi-kata-directvol.md index c57eda4480..ccfea48a57 100644 --- a/src/tools/csi-kata-directvolume/docs/deploy-csi-kata-directvol.md +++ b/src/tools/csi-kata-directvolume/docs/deploy-csi-kata-directvol.md @@ -17,87 +17,58 @@ The easiest way to deploy the `Direct Volume CSI driver` is to run the `deploy.s the cluster as shown below for Kubernetes 1.28.2. ```shell -sudo deploy/deploy.sh -``` - -You'll get an output similar to the following, indicating the application of `RBAC rules` and the successful deployment of `csi-provisioner`, `node-driver-registrar`, `kata directvolume csi driver`(`csi-kata-directvol-plugin`), liveness-probe. Please note that the following output is specific to Kubernetes 1.28.2. - -```shell +$ ./deploy/deploy.sh Creating Namespace kata-directvolume ... -kubectl apply -f /tmp/tmp.kN43BWUGQ5/kata-directvol-ns.yaml +kubectl apply -f /tmp/tmp.lAAPNQ1aI2/kata-directvol-ns.yaml namespace/kata-directvolume created Namespace kata-directvolume created Done ! Applying RBAC rules ... -curl https://raw.githubusercontent.com/kubernetes-csi/external-provisioner/v3.6.0/deploy/kubernetes/rbac.yaml --output /tmp/tmp.kN43BWUGQ5/rbac.yaml --silent --location -kubectl apply -f ./kata-directvolume/kata-directvol-rbac.yaml +curl https://raw.githubusercontent.com/kubernetes-csi/external-provisioner/v3.6.0/deploy/kubernetes/rbac.yaml --output /tmp/tmp.lAAPNQ1aI2/rbac.yaml --silent --location +kubectl apply -f ./deploy/kata-directvolume/kata-directvol-rbac.yaml serviceaccount/csi-provisioner created clusterrole.rbac.authorization.k8s.io/external-provisioner-runner created clusterrolebinding.rbac.authorization.k8s.io/csi-provisioner-role created role.rbac.authorization.k8s.io/external-provisioner-cfg created rolebinding.rbac.authorization.k8s.io/csi-provisioner-role-cfg created - -$ ./directvol-deploy.sh +Applying RBAC rules Done! deploying kata directvolume components - ./kata-directvolume/csi-directvol-driverinfo.yaml + ./deploy/kata-directvolume/csi-directvol-driverinfo.yaml csidriver.storage.k8s.io/directvolume.csi.katacontainers.io created - ./kata-directvolume/csi-directvol-plugin.yaml + ./deploy/kata-directvolume/csi-directvol-plugin.yaml kata-directvolume plugin using image: registry.k8s.io/sig-storage/csi-provisioner:v3.6.0 kata-directvolume plugin using image: registry.k8s.io/sig-storage/csi-node-driver-registrar:v2.9.0 -kata-directvolume plugin using image: localhost/kata-directvolume:v1.0.52 +kata-directvolume plugin using image: localhost/kata-directvolume:v1.0.19 kata-directvolume plugin using image: registry.k8s.io/sig-storage/livenessprobe:v2.8.0 daemonset.apps/csi-kata-directvol-plugin created - ./kata-directvolume/kata-directvol-ns.yaml -namespace/kata-directvolume unchanged - ./kata-directvolume/kata-directvol-rbac.yaml -serviceaccount/csi-provisioner unchanged -clusterrole.rbac.authorization.k8s.io/external-provisioner-runner configured -clusterrolebinding.rbac.authorization.k8s.io/csi-provisioner-role unchanged -role.rbac.authorization.k8s.io/external-provisioner-cfg unchanged -rolebinding.rbac.authorization.k8s.io/csi-provisioner-role-cfg unchanged -NAMESPACE NAME READY STATUS RESTARTS AGE -default pod/kata-driectvol-01 1/1 Running 0 3h57m -kata-directvolume pod/csi-kata-directvol-plugin-92smp 4/4 Running 0 4s -kube-flannel pod/kube-flannel-ds-vq796 1/1 Running 1 (67d ago) 67d -kube-system pod/coredns-66f779496c-9bmp2 1/1 Running 3 (67d ago) 67d -kube-system pod/coredns-66f779496c-qlq6d 1/1 Running 1 (67d ago) 67d -kube-system pod/etcd-tnt001 1/1 Running 19 (67d ago) 67d -kube-system pod/kube-apiserver-tnt001 1/1 Running 5 (67d ago) 67d -kube-system pod/kube-controller-manager-tnt001 1/1 Running 8 (67d ago) 67d -kube-system pod/kube-proxy-p9t6t 1/1 Running 6 (67d ago) 67d -kube-system pod/kube-scheduler-tnt001 1/1 Running 8 (67d ago) 67d +NAMESPACE NAME READY STATUS RESTARTS AGE +kata-directvolume pod/csi-kata-directvol-plugin-9vvhc 4/4 Running 0 3s +[...TRUNCATED...] -NAMESPACE NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE -kata-directvolume daemonset.apps/csi-kata-directvol-plugin 1 1 1 1 1 4s -kube-flannel daemonset.apps/kube-flannel-ds 1 1 1 1 1 67d -kube-system daemonset.apps/kube-proxy 1 1 1 1 1 kubernetes.io/os=linux 67d +NAMESPACE NAME DESIRED CURRENT READY UP-TO-DATE AVAILABLE NODE SELECTOR AGE +kata-directvolume daemonset.apps/csi-kata-directvol-plugin 1 1 1 1 1 3s +[...TRUNCATED...] ``` ## How to Run a Kata Pod and Validate it -First, ensure all expected pods are running properly, including `csi-provisioner`, `node-driver-registrar`, `kata-directvolume` `csi driver(csi-kata-directvol-plugin)`, liveness-probe: +First, ensure all expected containers are running properly: ```shell -$ kubectl get po -A -NAMESPACE NAME READY STATUS RESTARTS AGE -default csi-kata-directvol-plugin-dlphw 4/4 Running 0 68m -kube-flannel kube-flannel-ds-vq796 1/1 Running 1 (52d ago) 52d -kube-system coredns-66f779496c-9bmp2 1/1 Running 3 (52d ago) 52d -kube-system coredns-66f779496c-qlq6d 1/1 Running 1 (52d ago) 52d -kube-system etcd-node001 1/1 Running 19 (52d ago) 52d -kube-system kube-apiserver-node001 1/1 Running 5 (52d ago) 52d -kube-system kube-controller-manager-node001 1/1 Running 8 (52d ago) 52d -kube-system kube-proxy-p9t6t 1/1 Running 6 (52d ago) 52d -kube-system kube-scheduler-node001 1/1 Running 8 (52d ago) 52d +$ kubectl get po -n kata-directvolume +NAME READY STATUS RESTARTS AGE +csi-kata-directvol-plugin-9vvhc 4/4 Running 0 6m14s ``` -From the root directory, deploy the application pods including a storage class, a `PVC`, and a pod which uses direct block device based volume. The details can be seen in `/examples/pod-with-directvol/*.yaml`: +Deploy the application pods including a storage class, a `PVC`, and a +pod which uses direct block device based volume: ```shell -kubectl apply -f ${BASE_DIR}/csi-storageclass.yaml -kubectl apply -f ${BASE_DIR}/csi-pvc.yaml -kubectl apply -f ${BASE_DIR}/csi-app.yaml +$ cd src/tools/csi-kata-directvolume/examples/pod-with-directvol +$ kubectl apply -f csi-storageclass.yaml +$ kubectl apply -f csi-pvc.yaml +$ kubectl apply -f csi-app.yaml ``` Let's validate the components are deployed: diff --git a/src/tools/csi-kata-directvolume/pkg/directvolume/controllerserver.go b/src/tools/csi-kata-directvolume/pkg/directvolume/controllerserver.go index 7ee6be4d71..381de9f201 100644 --- a/src/tools/csi-kata-directvolume/pkg/directvolume/controllerserver.go +++ b/src/tools/csi-kata-directvolume/pkg/directvolume/controllerserver.go @@ -49,6 +49,7 @@ func (dv *directVolume) CreateVolume(ctx context.Context, req *csi.CreateVolumeR volumeCtx := make(map[string]string) volumeCtx[utils.IsDirectVolume] = "False" + volumeCtx[utils.KataContainersDirectLoop] = "False" for key, value := range req.GetParameters() { switch strings.ToLower(key) { @@ -56,12 +57,20 @@ func (dv *directVolume) CreateVolume(ctx context.Context, req *csi.CreateVolumeR if value == utils.DirectVolumeTypeName { volumeCtx[utils.IsDirectVolume] = "True" } + volumeCtx[utils.KataContainersDirectVolumeType] = value case utils.KataContainersDirectFsType: volumeCtx[utils.KataContainersDirectFsType] = value + case utils.KataContainersDirectLoop: + volumeCtx[utils.KataContainersDirectLoop] = value + case utils.KataContainersDirectCoCoEphemeral: + volumeCtx[utils.KataContainersDirectCoCoEphemeral] = value default: - continue + klog.Warningf("unknown parameter: %s", key) } } + if isLoopDevice(volumeCtx) { + volumeCtx[utils.IsDirectVolume] = "True" + } contentSrc := req.GetVolumeContentSource() diff --git a/src/tools/csi-kata-directvolume/pkg/directvolume/nodeserver.go b/src/tools/csi-kata-directvolume/pkg/directvolume/nodeserver.go index 9847c38722..d7e88f0dcb 100644 --- a/src/tools/csi-kata-directvolume/pkg/directvolume/nodeserver.go +++ b/src/tools/csi-kata-directvolume/pkg/directvolume/nodeserver.go @@ -10,8 +10,10 @@ package directvolume import ( "fmt" "os" + "os/exec" "path/filepath" "strconv" + "strings" "kata-containers/csi-kata-directvolume/pkg/utils" @@ -68,8 +70,12 @@ func (dv *directVolume) NodePublishVolume(ctx context.Context, req *csi.NodePubl attrib := req.GetVolumeContext() devicePath := dv.config.VolumeDevices[volumeID] - klog.Infof("target %v\nfstype %v\ndevice %v\nreadonly %v\nvolumeID %v\n", - targetPath, fsType, devicePath, readOnly, volumeID) + klog.Infoln("target", targetPath) + klog.Infoln("volType", volType) + klog.Infoln("fstype", fsType) + klog.Infoln("device", devicePath) + klog.Infoln("readonly", readOnly) + klog.Infoln("volumeID", volumeID) options := []string{"bind"} if readOnly { @@ -93,13 +99,25 @@ func (dv *directVolume) NodePublishVolume(ctx context.Context, req *csi.NodePubl return nil, status.Error(codes.Aborted, errMsg) } + var guestOptions []string + if isLoopDevice(attrib) { + guestOptions = []string{} + } else { + guestOptions = options + } + + if isCoCoEphemeralVolume(attrib) { + attrib["confidential"] = "true" + attrib["ephemeral"] = "true" + } + // kata-containers DirectVolume add mountInfo := utils.MountInfo{ VolumeType: volType, Device: devicePath, FsType: fsType, Metadata: attrib, - Options: options, + Options: guestOptions, } if err := utils.AddDirectVolume(targetPath, mountInfo); err != nil { klog.Errorf("add direct volume with source %s and mountInfo %v failed", targetPath, mountInfo) @@ -196,8 +214,31 @@ func (dv *directVolume) NodeUnpublishVolume(ctx context.Context, req *csi.NodeUn return &csi.NodeUnpublishVolumeResponse{}, nil } +func parseBool(s string) bool { + if b, err := strconv.ParseBool(s); err != nil { + return false + } else { + return b + } +} + func isDirectVolume(VolumeCtx map[string]string) bool { - return VolumeCtx[utils.IsDirectVolume] == "True" + return parseBool(VolumeCtx[utils.IsDirectVolume]) +} + +func isLoopDevice(VolumeCtx map[string]string) bool { + return parseBool(VolumeCtx[utils.KataContainersDirectLoop]) +} + +func isCoCoEphemeralVolume(VolumeCtx map[string]string) bool { + return parseBool(VolumeCtx[utils.KataContainersDirectCoCoEphemeral]) +} + +// getDeviceSymlinkPath returns the path of the symlink that is used to +// point to the loop device from inside the specified stagingTargetPath +// directory. +func getDeviceSymlinkPath(stagingTargetPath string) string { + return filepath.Join(stagingTargetPath, "device") } func (dv *directVolume) NodeStageVolume(ctx context.Context, req *csi.NodeStageVolumeRequest) (*csi.NodeStageVolumeResponse, error) { @@ -224,14 +265,14 @@ func (dv *directVolume) NodeStageVolume(ctx context.Context, req *csi.NodeStageV defer dv.mutex.Unlock() capacityInBytes := req.VolumeContext[utils.CapabilityInBytes] - devicePath, err := utils.CreateDirectBlockDevice(volumeID, capacityInBytes, dv.config.StoragePath) + imagePath, err := utils.CreateDirectBlockDevice(volumeID, capacityInBytes, dv.config.StoragePath) if err != nil { errMsg := status.Errorf(codes.Internal, "setup storage for volume '%s' failed", volumeID) return &csi.NodeStageVolumeResponse{}, errMsg } // /full_path_on_host/VolumeId/ - deviceUpperPath := filepath.Dir(*devicePath) + imageUpperPath := filepath.Dir(*imagePath) if canMnt, err := utils.CanDoBindmount(dv.config.safeMounter, stagingTargetPath); err != nil { return nil, err } else if !canMnt { @@ -240,8 +281,8 @@ func (dv *directVolume) NodeStageVolume(ctx context.Context, req *csi.NodeStageV } options := []string{"bind"} - if err := dv.config.safeMounter.DoBindmount(deviceUpperPath, stagingTargetPath, "", options); err != nil { - klog.Errorf("safe mounter: %v do bind mount %v failed, with error: %v", deviceUpperPath, stagingTargetPath, err.Error()) + if err := dv.config.safeMounter.DoBindmount(imageUpperPath, stagingTargetPath, "", options); err != nil { + klog.Errorf("safe mounter: %v do bind mount %v failed, with error: %v", imageUpperPath, stagingTargetPath, err.Error()) return nil, err } @@ -251,11 +292,33 @@ func (dv *directVolume) NodeStageVolume(ctx context.Context, req *csi.NodeStageV fsType = utils.DefaultFsType } - if err := dv.config.safeMounter.SafeFormatWithFstype(*devicePath, fsType, options); err != nil { + if err := dv.config.safeMounter.SafeFormatWithFstype(*imagePath, fsType, options); err != nil { return nil, err } - dv.config.VolumeDevices[volumeID] = *devicePath + if isLoopDevice(req.VolumeContext) { + deviceLink := getDeviceSymlinkPath(stagingTargetPath) + + losetupOut, err := exec.Command("losetup", "-f", "--show", *imagePath).Output() + if err != nil { + var stderr []byte + if exitErr, isExitError := err.(*exec.ExitError); isExitError { + stderr = exitErr.Stderr + } + errMsg := status.Errorf(codes.Internal, "failed to set up loop device from %s: %v: %s", *imagePath, err, stderr) + return &csi.NodeStageVolumeResponse{}, errMsg + } + + devicePath := strings.TrimSuffix(string(losetupOut), "\n") + + if err := os.Symlink(devicePath, deviceLink); err != nil { + return nil, status.Errorf(codes.Internal, "failed to create symlink at %s: %v", deviceLink, err) + } + + dv.config.VolumeDevices[volumeID] = devicePath + } else { + dv.config.VolumeDevices[volumeID] = *imagePath + } klog.Infof("directvolume: volume %s has been staged.", stagingTargetPath) @@ -305,6 +368,24 @@ func (dv *directVolume) NodeUnstageVolume(ctx context.Context, req *csi.NodeUnst dv.mutex.Lock() defer dv.mutex.Unlock() + deviceLink := getDeviceSymlinkPath(stagingTargetPath) + + if _, err := os.Stat(deviceLink); err != nil { + if !os.IsNotExist(err) { + return nil, status.Errorf(codes.Internal, "failed to stat file %s: %v", deviceLink, err) + } + // Else this volume didn't use a loop device, so do nothing. + } else { + // We have to resolve the symlink first because losetup won't follow it. + canonicalDevice, err := filepath.EvalSymlinks(deviceLink) + if err != nil { + return nil, status.Errorf(codes.Internal, "failed to resolve device symlink %s: %v", deviceLink, err) + } + if err := exec.Command("losetup", "-d", canonicalDevice).Run(); err != nil { + return nil, status.Errorf(codes.Internal, "failed to detach loop device %s: %v", deviceLink, err) + } + } + // Unmount only if the target path is really a mount point. if isMnt, err := dv.config.safeMounter.IsMountPoint(stagingTargetPath); err != nil { return nil, status.Error(codes.Internal, fmt.Sprintf("check staging target path: %v", err)) diff --git a/src/tools/csi-kata-directvolume/pkg/utils/utils.go b/src/tools/csi-kata-directvolume/pkg/utils/utils.go index cdd80147a4..fea4219101 100644 --- a/src/tools/csi-kata-directvolume/pkg/utils/utils.go +++ b/src/tools/csi-kata-directvolume/pkg/utils/utils.go @@ -24,10 +24,12 @@ import ( ) const ( - KataContainersDirectVolumeType = "katacontainers.direct.volume/volumetype" - KataContainersDirectFsType = "katacontainers.direct.volume/fstype" - DirectVolumeTypeName = "directvol" - IsDirectVolume = "is_directvolume" + KataContainersDirectVolumeType = "katacontainers.direct.volume/volumetype" + KataContainersDirectFsType = "katacontainers.direct.volume/fstype" + KataContainersDirectLoop = "katacontainers.direct.volume/loop" + KataContainersDirectCoCoEphemeral = "katacontainers.direct.volume/cocoephemeral" + DirectVolumeTypeName = "directvol" + IsDirectVolume = "is_directvolume" ) const ( diff --git a/src/tools/genpolicy/Cargo.lock b/src/tools/genpolicy/Cargo.lock index bde129a10c..1cb98bda33 100644 --- a/src/tools/genpolicy/Cargo.lock +++ b/src/tools/genpolicy/Cargo.lock @@ -518,6 +518,12 @@ dependencies = [ "syn 2.0.104", ] +[[package]] +name = "data-encoding" +version = "2.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a2330da5de22e8a3cb63252ce2abb30116bf5265e89c0e01bc17015ce30a476" + [[package]] name = "derive-new" version = "0.5.9" @@ -2159,6 +2165,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "843c3d97f07e3b5ac0955d53ad0af4c91fe4a4f8525843ece5bf014f27829b73" dependencies = [ "anyhow", + "data-encoding", "lazy_static", "rand", "regex", diff --git a/src/tools/genpolicy/genpolicy-settings.json b/src/tools/genpolicy/genpolicy-settings.json index 75a4463cc1..a543cd4441 100644 --- a/src/tools/genpolicy/genpolicy-settings.json +++ b/src/tools/genpolicy/genpolicy-settings.json @@ -228,6 +228,7 @@ }, "common": { "cpath": "/run/kata-containers/shared/containers", + "spath": "/run/kata-containers/sandbox/storage", "root_path": "/run/kata-containers/$(bundle-id)/rootfs", "sfprefix": "^$(cpath)/$(bundle-id)-[a-z0-9]{16}-", "ip_p": "[0-9]{1,5}", @@ -293,7 +294,18 @@ "CAP_BPF", "CAP_CHECKPOINT_RESTORE" ], - "image_layer_verification" : "none" + "image_layer_verification" : "none", + "storage_classes": { + "csi-kata-directvolume-sc": { + "driver": "blk", + "driver_options": [ + "confidential=true", + "ephemeral=true" + ], + "fs_type": "ext4", + "options": [] + } + } }, "kata_config": { "oci_version": "1.1.0", diff --git a/src/tools/genpolicy/rules.rego b/src/tools/genpolicy/rules.rego index b402b1c5ec..bfce14494b 100644 --- a/src/tools/genpolicy/rules.rego +++ b/src/tools/genpolicy/rules.rego @@ -1027,6 +1027,27 @@ mount_source_allows(p_mount, i_mount, bundle_id, sandbox_id) if { print("mount_source_allows 2: true") } +mount_source_allows(p_mount, i_mount, bundle_id, sandbox_id) { + print("mount_source_allows 3: i_mount.source=", i_mount.source) + + i_source_parts = split(i_mount.source, "/") + b64_device_id = i_source_parts[count(i_source_parts) - 1] + + base64.is_valid(b64_device_id) + + source1 := p_mount.source + print("mount_source_allows 3: source1 =", source1) + + source2 := replace(source1, "$(spath)", policy_data.common.spath) + print("mount_source_allows 3: source2 =", source2) + + source3 := replace(source2, "$(b64_device_id)", b64_device_id) + print("mount_source_allows 3: source3 =", source3) + + source3 == i_mount.source + + print("mount_source_allows 3: true") +} ###################################################################### # Create container Storages @@ -1105,7 +1126,6 @@ allow_storage_source(p_storage, i_storage, bundle_id) if { allow_storage_options(p_storage, i_storage) if { print("allow_storage_options 1: start") - p_storage.driver != "blk" p_storage.driver != "overlayfs" p_storage.options == i_storage.options @@ -1154,6 +1174,24 @@ allow_mount_point(p_storage, i_storage, bundle_id, sandbox_id) if { print("allow_mount_point 3: true") } +# This rule is for storages shared via the direct volume assignment API. +allow_mount_point(p_storage, i_storage, bundle_id, sandbox_id, layer_ids) if { + p_storage.fstype == i_storage.fstype + + mount1 := p_storage.mount_point + print("allow_mount_point 6: mount1 =", mount1) + + mount2 := replace(mount1, "$(spath)", policy_data.common.spath) + print("allow_mount_point 6: mount2 =", mount2) + + device_id := i_storage.source + mount3 := replace(mount2, "$(b64_device_id)", base64url.encode(device_id)) + print("allow_mount_point 6: mount3 =", mount3) + + mount3 == i_storage.mount_point + + print("allow_mount_point 6: true") +} # ExecProcessRequest.process.Capabilities allow_exec_caps(i_caps) if { diff --git a/src/tools/genpolicy/src/mount_and_storage.rs b/src/tools/genpolicy/src/mount_and_storage.rs index bb93261f76..e26764335f 100644 --- a/src/tools/genpolicy/src/mount_and_storage.rs +++ b/src/tools/genpolicy/src/mount_and_storage.rs @@ -111,6 +111,21 @@ pub fn get_mount_and_storage( &yaml_volume ); + let options = { + let propagation = match &yaml_mount.mountPropagation { + Some(p) if p == "Bidirectional" => "rshared", + _ => "rprivate", + }; + + let access = if let Some(true) = yaml_mount.readOnly { + "ro" + } else { + "rw" + }; + + (propagation, access) + }; + if let Some(emptyDir) = &yaml_volume.emptyDir { let settings_volumes = &settings.volumes; let mut volume: Option<&settings::EmptyDirVolume> = None; @@ -127,15 +142,24 @@ pub fn get_mount_and_storage( get_empty_dir_mount_and_storage(settings, p_mounts, storages, yaml_mount, volume.unwrap()); } else if yaml_volume.persistentVolumeClaim.is_some() || yaml_volume.azureFile.is_some() { - get_shared_bind_mount(yaml_mount, p_mounts, "rprivate", "rw"); + get_shared_bind_mount(yaml_mount, p_mounts, ("rprivate", "rw")); } else if yaml_volume.hostPath.is_some() { - get_host_path_mount(yaml_mount, yaml_volume, p_mounts); + get_host_path_mount(yaml_mount, yaml_volume, p_mounts, options); } else if yaml_volume.configMap.is_some() || yaml_volume.secret.is_some() { get_config_map_mount_and_storage(settings, p_mounts, storages, yaml_mount); } else if yaml_volume.projected.is_some() { - get_shared_bind_mount(yaml_mount, p_mounts, "rprivate", "ro"); + get_shared_bind_mount(yaml_mount, p_mounts, ("rprivate", "ro")); } else if yaml_volume.downwardAPI.is_some() { get_downward_api_mount(yaml_mount, p_mounts); + } else if yaml_volume.ephemeral.is_some() { + get_ephemeral_mount( + settings, + yaml_mount, + yaml_volume, + p_mounts, + storages, + options, + ); } else { todo!("Unsupported volume type {:?}", yaml_volume); } @@ -201,25 +225,11 @@ fn get_host_path_mount( yaml_mount: &pod::VolumeMount, yaml_volume: &volume::Volume, p_mounts: &mut Vec, + mount_options: (&str, &str), ) { let host_path = yaml_volume.hostPath.as_ref().unwrap().path.clone(); let path = Path::new(&host_path); - let mut biderectional = false; - if let Some(mount_propagation) = &yaml_mount.mountPropagation { - if mount_propagation.eq("Bidirectional") { - debug!("get_host_path_mount: Bidirectional"); - biderectional = true; - } - } - - let access = match yaml_mount.readOnly { - Some(true) => { - debug!("setting read only access for host path mount"); - "ro" - } - _ => "rw", - }; // TODO: // // - When volume.hostPath.path: /dev/ttyS0 @@ -230,17 +240,11 @@ fn get_host_path_mount( // What is the reason for this source path difference in the Guest OS? if !path.starts_with("/dev/") && !path.starts_with("/sys/") { debug!("get_host_path_mount: calling get_shared_bind_mount"); - let propagation = if biderectional { "rshared" } else { "rprivate" }; - get_shared_bind_mount(yaml_mount, p_mounts, propagation, access); + get_shared_bind_mount(yaml_mount, p_mounts, mount_options); } else { let dest = yaml_mount.mountPath.clone(); let type_ = "bind".to_string(); - let mount_option = if biderectional { "rshared" } else { "rprivate" }; - let options = vec![ - "rbind".to_string(), - mount_option.to_string(), - access.to_string(), - ]; + let options = build_options_vec(mount_options); if let Some(policy_mount) = p_mounts.iter_mut().find(|m| m.destination.eq(&dest)) { debug!("get_host_path_mount: updating dest = {dest}, source = {host_path}"); @@ -298,8 +302,7 @@ fn get_config_map_mount_and_storage( fn get_shared_bind_mount( yaml_mount: &pod::VolumeMount, p_mounts: &mut Vec, - propagation: &str, - access: &str, + mount_options: (&str, &str), ) { let mount_path = if let Some(byte_index) = str::rfind(&yaml_mount.mountPath, '/') { str::from_utf8(&yaml_mount.mountPath.as_bytes()[byte_index + 1..]).unwrap() @@ -310,11 +313,7 @@ fn get_shared_bind_mount( let dest = yaml_mount.mountPath.clone(); let type_ = "bind".to_string(); - let options = vec![ - "rbind".to_string(), - propagation.to_string(), - access.to_string(), - ]; + let options = build_options_vec(mount_options); if let Some(policy_mount) = p_mounts.iter_mut().find(|m| m.destination.eq(&dest)) { debug!("get_shared_bind_mount: updating dest = {dest}, source = {source}"); @@ -364,6 +363,67 @@ fn get_downward_api_mount(yaml_mount: &pod::VolumeMount, p_mounts: &mut Vec, + storages: &mut Vec, + mount_options: (&str, &str), +) { + let storage_class = &yaml_volume + .ephemeral + .as_ref() + .unwrap() + .volumeClaimTemplate + .spec + .storageClassName + .as_ref(); + + if let Some(sc_config) = storage_class.and_then(|sc| settings.common.storage_classes.get(sc)) { + // Mounting a device into a container takes two steps: + // 1. In the guest: Mount the device from `Storage.source` on + // this path (i.e. `Storage.mount_point`). + // 2. In the container: Bind mount this path on the pod spec + // mount point (volumeMount). + let source = "$(spath)/$(b64_device_id)".to_string(); + + storages.push(agent::Storage { + driver: sc_config.driver.clone(), + driver_options: sc_config.driver_options.clone(), + fstype: sc_config.fs_type.clone(), + options: sc_config.options.clone(), + + source: "$(device_id)".to_string(), + mount_point: source.to_string(), + + fs_group: protobuf::MessageField::none(), + special_fields: ::protobuf::SpecialFields::new(), + }); + + let dest = yaml_mount.mountPath.clone(); + let type_ = "bind".to_string(); + let options = build_options_vec(mount_options); + + if let Some(policy_mount) = p_mounts.iter_mut().find(|m| m.destination == dest) { + debug!("get_ephemeral_mount: updating dest = {dest}, source = {source}"); + policy_mount.type_ = type_; + policy_mount.source = source; + policy_mount.options = options; + } else { + debug!("get_ephemeral_mount: adding dest = {dest}, source = {source}"); + p_mounts.push(policy::KataMount { + destination: dest, + type_, + source, + options, + }); + } + } else { + get_shared_bind_mount(yaml_mount, p_mounts, mount_options); + } +} + pub fn get_image_mount_and_storage( settings: &settings::Settings, p_mounts: &mut Vec, @@ -406,3 +466,12 @@ pub fn get_image_mount_and_storage( options: settings_image.options.clone(), }); } + +fn build_options_vec(mount_options: (&str, &str)) -> Vec { + let (propagation, access) = mount_options; + vec![ + "rbind".to_string(), + propagation.to_string(), + access.to_string(), + ] +} diff --git a/src/tools/genpolicy/src/persistent_volume_claim.rs b/src/tools/genpolicy/src/persistent_volume_claim.rs index 3db25a490c..61d0ce3f08 100644 --- a/src/tools/genpolicy/src/persistent_volume_claim.rs +++ b/src/tools/genpolicy/src/persistent_volume_claim.rs @@ -21,19 +21,19 @@ pub struct PersistentVolumeClaim { kind: Option, pub metadata: obj_meta::ObjectMeta, - spec: PersistentVolumeClaimSpec, + pub spec: PersistentVolumeClaimSpec, } /// See Reference / Kubernetes API / Config and Storage Resources / PersistentVolumeClaim. #[derive(Clone, Debug, Default, Serialize, Deserialize)] -struct PersistentVolumeClaimSpec { +pub struct PersistentVolumeClaimSpec { resources: ResourceRequirements, #[serde(skip_serializing_if = "Option::is_none")] accessModes: Option>, #[serde(skip_serializing_if = "Option::is_none")] - storageClassName: Option, + pub storageClassName: Option, #[serde(skip_serializing_if = "Option::is_none")] volumeMode: Option, diff --git a/src/tools/genpolicy/src/policy.rs b/src/tools/genpolicy/src/policy.rs index d029acd0a2..040765738c 100644 --- a/src/tools/genpolicy/src/policy.rs +++ b/src/tools/genpolicy/src/policy.rs @@ -24,7 +24,7 @@ use protocols::agent; use serde::{Deserialize, Serialize}; use serde_yaml::Value; use std::boxed; -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::fs::read_to_string; use std::io::Write; @@ -399,6 +399,9 @@ pub struct CommonData { /// Regex prefix for shared file paths - e.g., "^$(cpath)/$(bundle-id)-[a-z0-9]{16}-". pub sfprefix: String, + /// Path to the shared sandbox storage - e.g., "/run/kata-containers/sandbox/storage". + pub spath: String, + /// Regex for an IPv4 address. pub ipv4_a: String, @@ -416,6 +419,22 @@ pub struct CommonData { /// Default capabilities for a privileged container. pub privileged_caps: Vec, + + /// A mapping of storage classes to configurations, which determines + /// the behavior of storage classes. + pub storage_classes: HashMap, +} + +/// A subset of the `agent::Storage` gRPC object sent by the shim. This +/// prescribes the values for the fields of that object. This is +/// necessary with e.g. CSI drivers, as genpolicy cannot possibly infer +/// the content of the gRPC object on its own in such cases. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct StorageConfig { + pub driver: String, + pub driver_options: Vec, + pub fs_type: String, + pub options: Vec, } /// Configuration from "kubectl config". diff --git a/src/tools/genpolicy/src/volume.rs b/src/tools/genpolicy/src/volume.rs index 0bb908a81c..20a1fa39c8 100644 --- a/src/tools/genpolicy/src/volume.rs +++ b/src/tools/genpolicy/src/volume.rs @@ -6,7 +6,7 @@ // Allow K8s YAML field names. #![allow(non_snake_case)] -use crate::pod; +use crate::{obj_meta, persistent_volume_claim, pod}; use serde::{Deserialize, Serialize}; @@ -37,7 +37,11 @@ pub struct Volume { pub secret: Option, #[serde(skip_serializing_if = "Option::is_none")] - pub downwardAPI: Option, // TODO: additional fields. + pub downwardAPI: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub ephemeral: Option, + // TODO: additional fields. } /// See Reference / Kubernetes API / Config and Storage Resources / Volume. @@ -129,3 +133,18 @@ pub struct DownwardAPIVolumeFile { #[serde(skip_serializing_if = "Option::is_none")] pub fieldRef: Option, } + +/// See Reference / Kubernetes API / Config and Storage Resources / Volume. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct EphemeralVolumeSource { + pub volumeClaimTemplate: PersistentVolumeClaimTemplate, +} + +/// See Reference / Kubernetes API / Config and Storage Resources / Volume. +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct PersistentVolumeClaimTemplate { + #[serde(skip_serializing_if = "Option::is_none")] + pub metadata: Option, + + pub spec: persistent_volume_claim::PersistentVolumeClaimSpec, +} diff --git a/tests/gha-run-k8s-common.sh b/tests/gha-run-k8s-common.sh index 9dc2a93550..0e827b22b6 100644 --- a/tests/gha-run-k8s-common.sh +++ b/tests/gha-run-k8s-common.sh @@ -9,6 +9,8 @@ source "${tests_dir}/common.bash" kubernetes_dir="${tests_dir}/integration/kubernetes" helm_chart_dir="${repo_root_dir}/tools/packaging/kata-deploy/helm-chart/kata-deploy" +AZ_REGION="${AZ_REGION:-eastus}" +AZ_NODEPOOL_TAGS="${AZ_NODEPOOL_TAGS:-}" GENPOLICY_PULL_METHOD="${GENPOLICY_PULL_METHOD:-oci-distribution}" GH_PR_NUMBER="${GH_PR_NUMBER:-}" HELM_DEFAULT_INSTALLATION="${HELM_DEFAULT_INSTALLATION:-false}" @@ -106,7 +108,7 @@ function create_cluster() { "GENPOLICY_PULL_METHOD=${GENPOLICY_PULL_METHOD:0:1}") az group create \ - -l eastus \ + -l "${AZ_REGION}" \ -n "${rg}" # Required by e.g. AKS App Routing for KBS installation. @@ -123,7 +125,8 @@ function create_cluster() { -s "$(_print_instance_type)" \ --node-count 1 \ --generate-ssh-keys \ - --tags "${tags[@]}" + --tags "${tags[@]}" \ + $([ -n "${AZ_NODEPOOL_TAGS}" ] && echo "--nodepool-tags "${AZ_NODEPOOL_TAGS}"") } function install_bats() { diff --git a/tests/integration/kubernetes/gha-run.sh b/tests/integration/kubernetes/gha-run.sh index b02ee510fd..11a3a9fa07 100755 --- a/tests/integration/kubernetes/gha-run.sh +++ b/tests/integration/kubernetes/gha-run.sh @@ -19,6 +19,10 @@ source "${kubernetes_dir}/confidential_kbs.sh" tools_dir="${repo_root_dir}/tools" kata_tarball_dir="${2:-kata-artifacts}" +csi_dir="${repo_root_dir}/src/tools/csi-kata-directvolume" +csi_deploy_dir="${csi_dir}/deploy" +csi_storage_class="${csi_dir}/examples/pod-with-directvol/csi-storageclass.yaml" + export DOCKER_REGISTRY="${DOCKER_REGISTRY:-quay.io}" export DOCKER_REPO="${DOCKER_REPO:-kata-containers/kata-deploy-ci}" export DOCKER_TAG="${DOCKER_TAG:-kata-containers-latest}" @@ -559,6 +563,40 @@ function cleanup_nydus_snapshotter() { echo "::endgroup::" } +function deploy_csi_driver() { + echo "::group::deploy_csi_driver" + ensure_yq + + csi_image_selector="image: ghcr.io/kata-containers/csi-kata-directvolume:${GH_PR_NUMBER}" + csi_plugin="${csi_deploy_dir}/kata-directvolume/csi-directvol-plugin.yaml" + + # Deploy the driver pods. + sed -i "s|image: localhost/kata-directvolume:v1.0.18|${csi_image_selector}|" "${csi_plugin}" + grep -q "${csi_image_selector}" "${csi_plugin}" # Ensure the substitution took place. + bash "${csi_deploy_dir}/deploy.sh" + + # Deploy the storage class. + yq -i ".parameters.\"katacontainers.direct.volume/volumetype\" = \"blk\"" "${csi_storage_class}" + yq -i ".parameters.\"katacontainers.direct.volume/loop\" = \"True\"" "${csi_storage_class}" + yq -i ".parameters.\"katacontainers.direct.volume/cocoephemeral\" = \"True\"" "${csi_storage_class}" + yq -i ".volumeBindingMode = \"WaitForFirstConsumer\"" "${csi_storage_class}" + kubectl apply -f "${csi_storage_class}" + + echo "::endgroup::" +} + +function delete_csi_driver() { + echo "::group::delete_csi_driver" + + # Delete the storage class. + kubectl delete --ignore-not-found -f "${csi_storage_class}" + + # Delete the driver pods. + kubectl delete --ignore-not-found -f "${csi_deploy_dir}/kata-directvolume/" + + echo "::endgroup::" +} + function main() { export KATA_HOST_OS="${KATA_HOST_OS:-}" export K8S_TEST_HOST_TYPE="${K8S_TEST_HOST_TYPE:-}" @@ -575,8 +613,8 @@ function main() { install-bats) install_bats ;; install-kata-tools) install_kata_tools ;; install-kbs-client) install_kbs_client ;; - get-cluster-credentials) get_cluster_credentials "" ;; - deploy-csi-driver) return 0 ;; + get-cluster-credentials) get_cluster_credentials ;; + deploy-csi-driver) deploy_csi_driver ;; deploy-kata) deploy_kata ;; deploy-kata-aks) deploy_kata "aks" ;; deploy-kata-kcli) deploy_kata "kcli" ;; @@ -607,7 +645,7 @@ function main() { cleanup-garm) cleanup "garm" ;; cleanup-zvsi) cleanup "zvsi" ;; cleanup-snapshotter) cleanup_snapshotter ;; - delete-csi-driver) return 0 ;; + delete-csi-driver) delete_csi_driver ;; delete-coco-kbs) delete_coco_kbs ;; delete-cluster) cleanup "aks" ;; delete-cluster-kcli) delete_cluster_kcli ;; diff --git a/tests/integration/kubernetes/k8s-trusted-ephemeral-data-storage.bats b/tests/integration/kubernetes/k8s-trusted-ephemeral-data-storage.bats new file mode 100644 index 0000000000..8867f5bdf9 --- /dev/null +++ b/tests/integration/kubernetes/k8s-trusted-ephemeral-data-storage.bats @@ -0,0 +1,110 @@ +#!/usr/bin/env bats +# Copyright (c) 2025 Microsoft Corporation +# SPDX-License-Identifier: Apache-2.0 + +load "${BATS_TEST_DIRNAME}/lib.sh" +load "${BATS_TEST_DIRNAME}/../../common.bash" +load "${BATS_TEST_DIRNAME}/confidential_common.sh" +load "${BATS_TEST_DIRNAME}/tests_common.sh" + +setup() { + is_confidential_runtime_class || skip "Test only supported for CoCo" + + setup_common + get_pod_config_dir + + pod_name="trusted-ephemeral-data-storage" + mountpoint="/mnt/temp-encrypted" + capacity_bytes="10000000" + + yaml_file="${pod_config_dir}/pod-trusted-ephemeral-data-storage.yaml" + policy_settings_dir="$(create_tmp_policy_settings_dir "${pod_config_dir}")" + + # Use virtio-blk to mount the host device. + set_metadata_annotation "${yaml_file}" \ + "io.katacontainers.config.hypervisor.block_device_driver" \ + "virtio-blk" + + # Enable dm-integrity. + set_metadata_annotation "${yaml_file}" \ + "io.katacontainers.config.hypervisor.kernel_params" \ + "agent.secure_storage_integrity=true" + + # The policy would only block container creation, so allow these + # requests to make writing tests easier. + allow_requests "${policy_settings_dir}" "ExecProcessRequest" "ReadStreamRequest" + auto_generate_policy "${policy_settings_dir}" "${yaml_file}" + + if exec_host "${node}" which apt-get; then + exec_host "${node}" apt-get install -y expect + elif exec_host "${node}" which tdnf; then + exec_host "${node}" tdnf install -y expect + fi + + copy_file_to_host "${pod_config_dir}/cryptsetup.exp" "${node}" "/tmp/cryptsetup.exp" +} + +@test "Trusted ephemeral data storage" { + kubectl apply -f "${yaml_file}" + kubectl wait --for=condition=Ready --timeout="${timeout}" pod "${pod_name}" + + # With long device names, df adds line breaks by default, so we pass -P to prevent that. + df="$(kubectl exec "${pod_name}" -- df -PT "${mountpoint}" | tail -1)" + info "df output:" + info "${df}" + + dm_device="$(echo "${df}" | awk '{print $1}')" + fs_type="$(echo "${df}" | awk '{print $2}')" + available_bytes="$(echo "${df}" | awk '{print $5}')" + + # The output of the cryptsetup command will contain something like this: + # + # /dev/mapper/encrypted_disk_N6PxO is active and is in use. + # type: LUKS2 + # cipher: aes-xts-plain64 + # keysize: 768 bits + # key location: keyring + # integrity: hmac(sha256) + # integrity keysize: 256 bits + # device: /dev/vda + # sector size: 4096 + # offset: 0 sectors + # size: 2031880 sectors + # mode: read/write + pod_id=$(exec_host "${node}" crictl pods -q --name "^${pod_name}$") + crypt_status="$(exec_host "${node}" expect /tmp/cryptsetup.exp "${pod_id}" "${dm_device}")" + info "cryptsetup status output:" + info "${crypt_status}" + + # Check filesystem type and capacity. + + [[ "${fs_type}" == "ext4" ]] + # Allow FS and encryption metadata to take up to 15% of storage. + (( available_bytes >= capacity_bytes * 85 / 100 )) + + # Check encryption settings. + + grep -q "${dm_device} is active and is in use" <<< "${crypt_status}" + grep -Eq "type: +LUKS2" <<< "${crypt_status}" + grep -Eq "cipher: +aes-xts-plain64" <<< "${crypt_status}" + grep -Eq "integrity: +hmac\(sha256\)" <<< "${crypt_status}" + + # Check I/O. + + kubectl exec "${pod_name}" -- sh -c "echo foo > "${mountpoint}/foo.txt"" + [[ "$(kubectl exec "${pod_name}" -- cat "${mountpoint}/foo.txt")" == "foo" ]] +} + +teardown() { + is_confidential_runtime_class || skip "Test only supported for CoCo" + + exec_host "${node}" rm -f /tmp/cryptsetup.exp + + if exec_host "${node}" which apt-get; then + exec_host "${node}" apt-get autoremove -y expect + elif exec_host "${node}" which tdnf; then + exec_host "${node}" tdnf remove -y expect + fi + + teardown_common "${node}" "${node_start_time:-}" +} diff --git a/tests/integration/kubernetes/lib.sh b/tests/integration/kubernetes/lib.sh index c726a0c14c..d7eb36f475 100644 --- a/tests/integration/kubernetes/lib.sh +++ b/tests/integration/kubernetes/lib.sh @@ -7,6 +7,7 @@ # This provides generic functions to use in the tests. # set -e +set -o pipefail # Necessary for exec_host() to return non-zero exits properly. wait_time=60 sleep_time=3 @@ -105,23 +106,15 @@ k8s_create_pod() { fi } -# Runs a command in the host filesystem. +# Creates a debugger pod if one doesn't already exist. # # Parameters: # $1 - the node name # -exec_host() { +create_debugger_pod() { local node="$1" - # Validate the node - if ! kubectl get node "${node}" > /dev/null 2>&1; then - die "A given node ${node} is not valid" - fi - # `kubectl debug` always returns 0, so we hack it to return the right exit code. - local command="${@:2}" - # Make 7 character hash from the node name local pod_name="custom-node-debugger-$(echo -n "$node" | sha1sum | cut -c1-7)" - # Run a debug pod # Check if there is an existing node debugger pod and reuse it # Otherwise, create a new one if ! kubectl get pod -n kube-system "${pod_name}" > /dev/null 2>&1; then @@ -136,6 +129,40 @@ exec_host() { fi fi + echo "${pod_name}" +} + +# Copies a file into the host filesystem. +# +# Parameters: +# $1 - source file path on the client +# $2 - node +# $3 - destination path on the node +# +copy_file_to_host() { + local source="$1" + local node="$2" + local destination="$3" + + debugger_pod="$(create_debugger_pod "${node}")" + kubectl cp -n kube-system "${source}" "${debugger_pod}:/host/${destination}" +} + +# Runs a command in the host filesystem. +# +# Parameters: +# $1 - the node name +# +exec_host() { + local node="$1" + # Validate the node + if ! kubectl get node "${node}" > /dev/null 2>&1; then + die "A given node ${node} is not valid" + fi + + local command="${@:2}" + local pod_name="$(create_debugger_pod "${node}")" + # Execute the command and capture the output # We're trailing the `\r` here due to: https://github.com/kata-containers/kata-containers/issues/8051 # tl;dr: When testing with CRI-O we're facing the following error: diff --git a/tests/integration/kubernetes/run_kubernetes_tests.sh b/tests/integration/kubernetes/run_kubernetes_tests.sh index 162bd4808a..5f69944a02 100755 --- a/tests/integration/kubernetes/run_kubernetes_tests.sh +++ b/tests/integration/kubernetes/run_kubernetes_tests.sh @@ -89,6 +89,7 @@ else "k8s-sysctls.bats" \ "k8s-security-context.bats" \ "k8s-shared-volume.bats" \ + "k8s-trusted-ephemeral-data-storage.bats" \ "k8s-volume.bats" \ "k8s-nginx-connectivity.bats" \ ) diff --git a/tests/integration/kubernetes/runtimeclass_workloads/cryptsetup.exp b/tests/integration/kubernetes/runtimeclass_workloads/cryptsetup.exp new file mode 100644 index 0000000000..425eba66f7 --- /dev/null +++ b/tests/integration/kubernetes/runtimeclass_workloads/cryptsetup.exp @@ -0,0 +1,12 @@ +# Copyright (c) 2025 Microsoft Corporation +# SPDX-License-Identifier: Apache-2.0 +set timeout 60 + +set POD_ID [lindex $argv 0] +set DM_DEVICE [lindex $argv 1] + +spawn /opt/kata/bin/kata-runtime exec $POD_ID +expect "# " +send "cryptsetup status $DM_DEVICE\n" +send "exit\n" +expect eof diff --git a/tests/integration/kubernetes/runtimeclass_workloads/pod-trusted-ephemeral-data-storage.yaml b/tests/integration/kubernetes/runtimeclass_workloads/pod-trusted-ephemeral-data-storage.yaml new file mode 100644 index 0000000000..80fd24e413 --- /dev/null +++ b/tests/integration/kubernetes/runtimeclass_workloads/pod-trusted-ephemeral-data-storage.yaml @@ -0,0 +1,26 @@ +--- +kind: Pod +apiVersion: v1 +metadata: + name: trusted-ephemeral-data-storage +spec: + runtimeClassName: kata + terminationGracePeriodSeconds: 0 + restartPolicy: Never + containers: + - image: quay.io/prometheus/busybox:latest + name: busybox + command: ["sleep", "infinity"] + volumeMounts: + - name: temp-encrypted + mountPath: /mnt/temp-encrypted + volumes: + - name: temp-encrypted + ephemeral: + volumeClaimTemplate: + spec: + accessModes: [ReadWriteOncePod] + storageClassName: csi-kata-directvolume-sc + resources: + requests: + storage: 10G diff --git a/tests/integration/kubernetes/tests_common.sh b/tests/integration/kubernetes/tests_common.sh index 473358a579..4ffccb47c0 100644 --- a/tests/integration/kubernetes/tests_common.sh +++ b/tests/integration/kubernetes/tests_common.sh @@ -248,6 +248,21 @@ add_requests_to_policy_settings() { done } +# Change Rego rules to allow one or more ttrpc requests from the Host to the Guest. +allow_requests() { + declare -r settings_dir="$1" + shift + declare -r requests=("$@") + + auto_generate_policy_enabled || return 0 + + for request in "${requests[@]}" + do + info "${settings_dir}/rules.rego: allowing ${request}" + sed -i "s/^default \(${request}\).\+/default \1 := true/" "${settings_dir}"/rules.rego + done +} + # Change genpolicy settings to allow executing on the Guest VM the commands # used by "kubectl cp" from the Host to the Guest. add_copy_from_host_to_policy_settings() {