From 17d0db9865fe815f9ea0ec5ac4105219fb05f14c Mon Sep 17 00:00:00 2001 From: Xynnn007 Date: Thu, 3 Apr 2025 12:43:45 +0800 Subject: [PATCH 1/3] agent: add initdata parse logic Kata-agent now will check if a device /dev/vd* with 'initdata' magic number exists. If it exists, kata-agent will try to read it. Bytes 9~16 are the length of the compressed initdata toml in little endine. Bytes starting from 17 is the compressed initdata. The initdata image device layout looks like 0 8 16 16+length ... EOF 'initdata' length gzip(initdata toml) paddings The initdata will be parsed and put as aa.toml, cdh.toml and policy.rego to /run/confidential-containers/initdata. When AgentPolicy is initialized, the default policy will be overwritten by that. When AA is to be launched, if initdata is once processed, the launch arg will include --initdata parameter. Also, if /run/confidential-containers/initdata/aa.toml exists, the launch args will include -c /run/confidential-containers/initdata/aa.toml. When CDH is to be launched, if initdata is once processed, the launch args will include -c /run/confidential-containers/initdata/cdh.toml Signed-off-by: Xynnn007 --- src/agent/Cargo.lock | 7 +- src/agent/Cargo.toml | 7 +- src/agent/src/initdata.rs | 191 ++++++++++++++++++++++++++++++++ src/agent/src/main.rs | 85 +++++++++++--- src/agent/testdata/initdata.img | Bin 0 -> 512 bytes 5 files changed, 270 insertions(+), 20 deletions(-) create mode 100644 src/agent/src/initdata.rs create mode 100644 src/agent/testdata/initdata.img diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index c81667cf3c..4e533fe63d 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -263,9 +263,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.20" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "310c9bcae737a48ef5cdee3174184e6d548b292739ede61a1f955ef76a738861" +checksum = "59a194f9d963d8099596278594b3107448656ba73831c9d8c783e613ce86da64" dependencies = [ "flate2", "futures-core", @@ -3041,9 +3041,11 @@ name = "kata-agent" version = "0.1.0" dependencies = [ "anyhow", + "async-compression", "async-recursion 0.3.2", "async-std", "async-trait", + "base64 0.22.1", "capctl", "cdi", "cfg-if 1.0.0", @@ -3083,6 +3085,7 @@ dependencies = [ "serde", "serde_json", "serial_test", + "sha2", "slog", "slog-scope", "slog-stdlog", diff --git a/src/agent/Cargo.toml b/src/agent/Cargo.toml index f8d1474bb3..f2bb54e12c 100644 --- a/src/agent/Cargo.toml +++ b/src/agent/Cargo.toml @@ -85,6 +85,11 @@ image-rs = { git = "https://github.com/confidential-containers/guest-components" cdi = { git = "https://github.com/cncf-tags/container-device-interface-rs", rev = "fba5677a8e7cc962fc6e495fcec98d7d765e332a" } kata-agent-policy = { path = "policy", optional = true } +# Initdata +base64 = "0.22" +sha2 = "0.10.8" +async-compression = { version = "0.4.22", features = ["tokio", "gzip"] } + [dev-dependencies] tempfile = "3.1.0" test-utils = { path = "../libs/test-utils" } @@ -103,7 +108,7 @@ lto = true default-pull = [] seccomp = ["rustjail/seccomp"] standard-oci-runtime = ["rustjail/standard-oci-runtime"] -agent-policy = [ "kata-agent-policy" ] +agent-policy = ["kata-agent-policy"] guest-pull = ["image-rs/kata-cc-rustls-tls"] [[bin]] diff --git a/src/agent/src/initdata.rs b/src/agent/src/initdata.rs new file mode 100644 index 0000000000..7afc899974 --- /dev/null +++ b/src/agent/src/initdata.rs @@ -0,0 +1,191 @@ +//! # Initdata Module +//! +//! This module will do the following things if a proper initdata device with initdata exists. +//! 1. Parse the initdata block device and extract the config files to [`INITDATA_PATH`]. +//! 2. Return the initdata and the policy (if any). + +// Copyright (c) 2025 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{os::unix::fs::FileTypeExt, path::Path}; + +use anyhow::{bail, Context, Result}; +use async_compression::tokio::bufread::GzipDecoder; +use base64::{engine::general_purpose::STANDARD, Engine}; +use const_format::concatcp; +use serde::Deserialize; +use sha2::{Digest, Sha256, Sha384, Sha512}; +use slog::Logger; +use tokio::io::{AsyncReadExt, AsyncSeekExt}; + +/// This is the target directory to store the extracted initdata. +pub const INITDATA_PATH: &str = "/run/confidential-containers/initdata"; + +/// The path of AA's config file +pub const AA_CONFIG_PATH: &str = concatcp!(INITDATA_PATH, "/aa.toml"); + +/// The path of CDH's config file +pub const CDH_CONFIG_PATH: &str = concatcp!(INITDATA_PATH, "/cdh.toml"); + +/// Magic number of initdata device +pub const INITDATA_MAGIC_NUMBER: &[u8] = b"initdata"; + +/// Now only initdata `0.1.0` is defined. +const INITDATA_VERSION: &str = "0.1.0"; + +/// Initdata defined in +/// +#[derive(Deserialize)] +pub struct Initdata { + version: String, + algorithm: String, + data: DefinedFields, +} + +/// Well-defined keys for initdata of kata/CoCo +#[derive(Deserialize, Default)] +#[serde(deny_unknown_fields)] +pub struct DefinedFields { + #[serde(rename = "aa.toml")] + aa_config: Option, + #[serde(rename = "cdh.toml")] + cdh_config: Option, + #[serde(rename = "policy.rego")] + policy: Option, +} + +async fn detect_initdata_device(logger: &Logger) -> Result> { + let dev_dir = Path::new("/dev"); + let mut read_dir = tokio::fs::read_dir(dev_dir).await?; + while let Some(entry) = read_dir.next_entry().await? { + let filename = entry.file_name(); + let filename = filename.to_string_lossy(); + debug!(logger, "Initdata check device `{filename}`"); + if !filename.starts_with("vd") { + continue; + } + let path = entry.path(); + + debug!(logger, "Initdata find potential device: `{path:?}`"); + let metadata = std::fs::metadata(path.clone())?; + if !metadata.file_type().is_block_device() { + continue; + } + + let mut file = tokio::fs::File::open(&path).await?; + let mut magic = [0; 8]; + match file.read_exact(&mut magic).await { + Ok(_) => { + debug!( + logger, + "Initdata read device `{filename}` first 8 bytes: {magic:?}" + ); + if magic == INITDATA_MAGIC_NUMBER { + let path = path.as_path().to_string_lossy().to_string(); + debug!(logger, "Found initdata device {path}"); + return Ok(Some(path)); + } + } + Err(e) => debug!(logger, "Initdata read device `{filename}` failed: {e:?}"), + } + } + + Ok(None) +} + +pub async fn read_initdata(device_path: &str) -> Result> { + let initdata_devfile = tokio::fs::File::open(device_path).await?; + let mut buf_reader = tokio::io::BufReader::new(initdata_devfile); + // skip the magic number "initdata" + buf_reader.seek(std::io::SeekFrom::Start(8)).await?; + + let mut len_buf = [0u8; 8]; + buf_reader.read_exact(&mut len_buf).await?; + let length = u64::from_le_bytes(len_buf) as usize; + + let mut buf = vec![0; length]; + buf_reader.read_exact(&mut buf).await?; + let mut gzip_decoder = GzipDecoder::new(&buf[..]); + + let mut initdata = Vec::new(); + let _ = gzip_decoder.read_to_end(&mut initdata).await?; + Ok(initdata) +} + +pub struct InitdataReturnValue { + pub digest: Vec, + pub _policy: Option, +} + +pub async fn initialize_initdata(logger: &Logger) -> Result> { + let logger = logger.new(o!("subsystem" => "initdata")); + let Some(initdata_device) = detect_initdata_device(&logger).await? else { + info!( + logger, + "Initdata device not found, skip initdata initialization" + ); + return Ok(None); + }; + + tokio::fs::create_dir_all(INITDATA_PATH) + .await + .inspect_err(|e| error!(logger, "Failed to create initdata dir: {e:?}"))?; + + let initdata_content = read_initdata(&initdata_device) + .await + .inspect_err(|e| error!(logger, "Failed to read initdata: {e:?}"))?; + + let initdata: Initdata = + toml::from_slice(&initdata_content).context("parse initdata failed")?; + info!(logger, "Initdata version: {}", initdata.version); + + if initdata.version != INITDATA_VERSION { + bail!("Unsupported initdata version"); + } + + let digest = match &initdata.algorithm[..] { + "sha256" => Sha256::digest(&initdata_content).to_vec(), + "sha384" => Sha384::digest(&initdata_content).to_vec(), + "sha512" => Sha512::digest(&initdata_content).to_vec(), + others => bail!("Unsupported hash algorithm {others}"), + }; + + if let Some(config) = initdata.data.aa_config { + tokio::fs::write(AA_CONFIG_PATH, config) + .await + .context("write aa config failed")?; + info!(logger, "write AA config from initdata"); + } + + if let Some(config) = initdata.data.cdh_config { + tokio::fs::write(CDH_CONFIG_PATH, config) + .await + .context("write cdh config failed")?; + info!(logger, "write CDH config from initdata"); + } + + debug!(logger, "Initdata digest: {}", STANDARD.encode(&digest)); + + let res = InitdataReturnValue { + digest, + _policy: initdata.data.policy, + }; + + Ok(Some(res)) +} + +#[cfg(test)] +mod tests { + use crate::initdata::read_initdata; + + const INITDATA_IMG_PATH: &str = "testdata/initdata.img"; + const INITDATA_PLAINTEXT: &[u8] = b"some content"; + + #[tokio::test] + async fn parse_initdata() { + let initdata = read_initdata(INITDATA_IMG_PATH).await.unwrap(); + assert_eq!(initdata, INITDATA_PLAINTEXT); + } +} diff --git a/src/agent/src/main.rs b/src/agent/src/main.rs index 2e7698706c..cc9fa53ad5 100644 --- a/src/agent/src/main.rs +++ b/src/agent/src/main.rs @@ -18,10 +18,12 @@ extern crate scopeguard; #[macro_use] extern crate slog; -use anyhow::{anyhow, Context, Result}; +use anyhow::{anyhow, bail, Context, Result}; +use base64::Engine; use cfg_if::cfg_if; use clap::{AppSettings, Parser}; use const_format::{concatcp, formatcp}; +use initdata::{InitdataReturnValue, AA_CONFIG_PATH, CDH_CONFIG_PATH}; use nix::fcntl::OFlag; use nix::sys::reboot::{reboot, RebootMode}; use nix::sys::socket::{self, AddressFamily, SockFlag, SockType, VsockAddr}; @@ -33,7 +35,6 @@ use std::os::unix::fs::{self as unixfs, FileTypeExt}; use std::os::unix::io::AsRawFd; use std::path::Path; use std::process::exit; -use std::process::Command; use std::sync::Arc; use tracing::{instrument, span}; @@ -42,6 +43,7 @@ mod config; mod console; mod device; mod features; +mod initdata; mod linux_abi; mod metrics; mod mount; @@ -419,6 +421,8 @@ async fn start_sandbox( let (tx, rx) = tokio::sync::oneshot::channel(); sandbox.lock().await.sender = Some(tx); + let initdata_return_value = initdata::initialize_initdata(logger).await?; + let gc_procs = config.guest_components_procs; if !attestation_binaries_available(logger, &gc_procs) { warn!( @@ -426,7 +430,21 @@ async fn start_sandbox( "attestation binaries requested for launch not available" ); } else { - init_attestation_components(logger, config).await?; + init_attestation_components(logger, config, &initdata_return_value).await?; + } + + // if policy is given via initdata, use it + #[cfg(feature = "agent-policy")] + if let Some(initdata_return_value) = initdata_return_value { + if let Some(policy) = &initdata_return_value._policy { + info!(logger, "using policy from initdata"); + AGENT_POLICY + .lock() + .await + .set_policy(policy) + .await + .context("Failed to set policy from initdata")?; + } } let mut oma = None; @@ -472,19 +490,34 @@ fn attestation_binaries_available(logger: &Logger, procs: &GuestComponentsProcs) true } -async fn launch_guest_component_procs(logger: &Logger, config: &AgentConfig) -> Result<()> { +async fn launch_guest_component_procs( + logger: &Logger, + config: &AgentConfig, + initdata_return_value: &Option, +) -> Result<()> { if config.guest_components_procs == GuestComponentsProcs::None { return Ok(()); } debug!(logger, "spawning attestation-agent process {}", AA_PATH); + let mut aa_args = vec!["--attestation_sock", AA_ATTESTATION_URI]; + let initdata_parameter; + if let Some(initdata_return_value) = initdata_return_value { + initdata_parameter = + base64::engine::general_purpose::STANDARD.encode(&initdata_return_value.digest); + aa_args.push("--initdata"); + aa_args.push(&initdata_parameter); + } + launch_process( logger, AA_PATH, - &vec!["--attestation_sock", AA_ATTESTATION_URI], + aa_args, + Some(AA_CONFIG_PATH), AA_ATTESTATION_SOCKET, DEFAULT_LAUNCH_PROCESS_TIMEOUT, ) + .await .map_err(|e| anyhow!("launch_process {} failed: {:?}", AA_PATH, e))?; // skip launch of confidential-data-hub and api-server-rest @@ -500,10 +533,12 @@ async fn launch_guest_component_procs(logger: &Logger, config: &AgentConfig) -> launch_process( logger, CDH_PATH, - &vec![], + vec![], + Some(CDH_CONFIG_PATH), CDH_SOCKET, DEFAULT_LAUNCH_PROCESS_TIMEOUT, ) + .await .map_err(|e| anyhow!("launch_process {} failed: {:?}", CDH_PATH, e))?; // skip launch of api-server-rest @@ -519,10 +554,12 @@ async fn launch_guest_component_procs(logger: &Logger, config: &AgentConfig) -> launch_process( logger, API_SERVER_PATH, - &vec!["--features", &features.to_string()], + vec!["--features", &features.to_string()], + None, "", 0, ) + .await .map_err(|e| anyhow!("launch_process {} failed: {:?}", API_SERVER_PATH, e))?; Ok(()) @@ -532,8 +569,12 @@ async fn launch_guest_component_procs(logger: &Logger, config: &AgentConfig) -> // and the corresponding procs are enabled in the agent configuration. the process will be // launched in the background and the function will return immediately. // If the CDH is started, a CDH client will be instantiated and returned. -async fn init_attestation_components(logger: &Logger, config: &AgentConfig) -> Result<()> { - launch_guest_component_procs(logger, config).await?; +async fn init_attestation_components( + logger: &Logger, + config: &AgentConfig, + initdata_return_value: &Option, +) -> Result<()> { + launch_guest_component_procs(logger, config, initdata_return_value).await?; // If a CDH socket exists, initialize the CDH client and enable ocicrypt match tokio::fs::metadata(CDH_SOCKET).await { @@ -555,11 +596,11 @@ async fn init_attestation_components(logger: &Logger, config: &AgentConfig) -> R Ok(()) } -fn wait_for_path_to_exist(logger: &Logger, path: &str, timeout_secs: i32) -> Result<()> { +async fn wait_for_path_to_exist(logger: &Logger, path: &str, timeout_secs: i32) -> Result<()> { let p = Path::new(path); let mut attempts = 0; loop { - std::thread::sleep(std::time::Duration::from_secs(1)); + tokio::time::sleep(std::time::Duration::from_secs(1)).await; if p.exists() { return Ok(()); } @@ -576,22 +617,32 @@ fn wait_for_path_to_exist(logger: &Logger, path: &str, timeout_secs: i32) -> Res Err(anyhow!("wait for {} to exist timeout.", path)) } -fn launch_process( +async fn launch_process( logger: &Logger, path: &str, - args: &Vec<&str>, + mut args: Vec<&str>, + config: Option<&str>, unix_socket_path: &str, timeout_secs: i32, ) -> Result<()> { if !Path::new(path).exists() { - return Err(anyhow!("path {} does not exist.", path)); + bail!("path {} does not exist.", path); } + + if let Some(config_path) = config { + if Path::new(config_path).exists() { + args.push("-c"); + args.push(config_path); + } + } + if !unix_socket_path.is_empty() && Path::new(unix_socket_path).exists() { - fs::remove_file(unix_socket_path)?; + tokio::fs::remove_file(unix_socket_path).await?; } - Command::new(path).args(args).spawn()?; + + tokio::process::Command::new(path).args(args).spawn()?; if !unix_socket_path.is_empty() && timeout_secs > 0 { - wait_for_path_to_exist(logger, unix_socket_path, timeout_secs)?; + wait_for_path_to_exist(logger, unix_socket_path, timeout_secs).await?; } Ok(()) diff --git a/src/agent/testdata/initdata.img b/src/agent/testdata/initdata.img new file mode 100644 index 0000000000000000000000000000000000000000..3c5fe59181e0516aa6cfe4ce74c0832debb84242 GIT binary patch literal 512 zcmd1I%PdJrEJ;*hfB^Y!4hZAF*17Y(p&sYXYkQv6W@KSt`2XMDP~Mpbs&14T9w7h# D9wQCE literal 0 HcmV?d00001 From 91bb6b7c344e6097a058029a73509b1fe9b854f5 Mon Sep 17 00:00:00 2001 From: Xynnn007 Date: Thu, 3 Apr 2025 12:52:25 +0800 Subject: [PATCH 2/3] runtime: add support for io.katacontainers.config.runtime.cc_init_data io.katacontainers.config.runtime.cc_init_data specifies initdata used by the pod in base64(gzip(initdata toml)) format. The initdata will be encapsulated into an initdata image and mount it as a raw block device to the guest. The initdata image will be aligned with 512 bytes, which is chosen as a usual sector size supported by different hypervisors like qemu, clh and dragonball. Note that this patch only adds support for qemu hypervisor. Signed-off-by: Xynnn007 --- src/runtime/pkg/govmm/qemu/qemu.go | 31 ++++- src/runtime/pkg/oci/utils.go | 61 ++++++++- src/runtime/pkg/oci/utils_test.go | 36 ++++- src/runtime/pkg/types/types.go | 6 + src/runtime/virtcontainers/hypervisor.go | 8 ++ src/runtime/virtcontainers/qemu.go | 128 +++++++++++++++++- src/runtime/virtcontainers/qemu_amd64.go | 4 +- src/runtime/virtcontainers/qemu_amd64_test.go | 25 ++-- src/runtime/virtcontainers/qemu_arch_base.go | 4 +- src/runtime/virtcontainers/qemu_arm64.go | 2 +- src/runtime/virtcontainers/qemu_arm64_test.go | 12 +- src/runtime/virtcontainers/qemu_ppc64le.go | 2 +- .../virtcontainers/qemu_ppc64le_test.go | 12 +- src/runtime/virtcontainers/qemu_s390x.go | 2 +- src/runtime/virtcontainers/qemu_s390x_test.go | 12 +- src/runtime/virtcontainers/qemu_test.go | 55 ++++++++ 16 files changed, 355 insertions(+), 45 deletions(-) diff --git a/src/runtime/pkg/govmm/qemu/qemu.go b/src/runtime/pkg/govmm/qemu/qemu.go index c883965ff5..86a70e441a 100644 --- a/src/runtime/pkg/govmm/qemu/qemu.go +++ b/src/runtime/pkg/govmm/qemu/qemu.go @@ -15,6 +15,7 @@ package qemu import ( "context" + "encoding/base64" "encoding/json" "fmt" "io" @@ -328,6 +329,9 @@ type Object struct { // SnpIdAuth is the 4096-byte, base64-encoded blob to provide the ‘ID Authentication Information Structure’ // for the SNP_LAUNCH_FINISH command defined in the SEV-SNP firmware ABI (default: all-zero) SnpIdAuth string + + // Raw byte slice of initdata digest + InitdataDigest []byte } // Valid returns true if the Object structure is valid and complete. @@ -353,6 +357,12 @@ func (object Object) Valid() bool { } } +func adjustProperLength(data []byte, len int) []byte { + adjusted := make([]byte, len) + copy(adjusted, data) + return adjusted +} + // QemuParams returns the qemu parameters built out of this Object device. func (object Object) QemuParams(config *Config) []string { var objectParams []string @@ -394,6 +404,14 @@ func (object Object) QemuParams(config *Config) []string { driveParams = append(driveParams, "if=pflash,format=raw,readonly=on") driveParams = append(driveParams, fmt.Sprintf("file=%s", object.File)) case SNPGuest: + if len(object.InitdataDigest) > 0 { + // due to https://github.com/confidential-containers/qemu/blob/amd-snp-202402240000/qapi/qom.json#L926-L929 + // hostdata in SEV-SNP should be exactly 32 bytes + hostdataSlice := adjustProperLength(object.InitdataDigest, 32) + hostdata := base64.StdEncoding.EncodeToString(hostdataSlice) + objectParams = append(objectParams, fmt.Sprintf("host-data=%s", hostdata)) + } + objectParams = append(objectParams, string(object.Type)) objectParams = append(objectParams, fmt.Sprintf("id=%s", object.ID)) objectParams = append(objectParams, fmt.Sprintf("cbitpos=%d", object.CBitPos)) @@ -485,10 +503,21 @@ func getQgsSocketAddress(portNum uint32) SocketAddress { func prepareTDXObject(object Object) string { qgsSocket := getQgsSocketAddress(object.QgsPort) + // due to https://github.com/intel-staging/qemu-tdx/blob/tdx-qemu-upstream-2023.9.21-v8.1.0/qapi/qom.json#L880 + // mrconfigid in TDX should be exactly 48 bytes + + var mrconfigid string + if len(object.InitdataDigest) > 0 { + mrconfigidSlice := adjustProperLength(object.InitdataDigest, 48) + mrconfigid = base64.StdEncoding.EncodeToString(mrconfigidSlice) + + } else { + mrconfigid = "" + } tdxObject := TdxQomObject{ string(object.Type), // qom-type object.ID, // id - "", // mrconfigid + mrconfigid, // mrconfigid "", // mrowner "", // mrownerconfig qgsSocket, // quote-generation-socket diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index e4f1e562e4..e8f792ef58 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -7,11 +7,16 @@ package oci import ( + "compress/gzip" "context" + "crypto/sha256" + "crypto/sha512" "encoding/base64" "encoding/json" "errors" "fmt" + "hash" + "io" "math" "os" "path/filepath" @@ -21,6 +26,7 @@ import ( "strings" "syscall" + "github.com/BurntSushi/toml" ctrAnnotations "github.com/containerd/containerd/pkg/cri/annotations" podmanAnnotations "github.com/containers/podman/v4/pkg/annotations" specs "github.com/opencontainers/runtime-spec/specs-go" @@ -31,6 +37,7 @@ import ( vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" + kataTypes "github.com/kata-containers/kata-containers/src/runtime/pkg/types" exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental" vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" dockershimAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations/dockershim" @@ -485,6 +492,10 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig, return err } + if err := addHypervisorInitdataOverrides(ocispec, config); err != nil { + return err + } + if value, ok := ocispec.Annotations[vcAnnotations.MachineType]; ok { if value != "" { config.HypervisorConfig.HypervisorMachineType = value @@ -556,9 +567,6 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig, config.HypervisorConfig.SGXEPCSize = size } - if initdata, ok := ocispec.Annotations[vcAnnotations.Initdata]; ok { - config.HypervisorConfig.Initdata = initdata - } if err := addHypervisorGPUOverrides(ocispec, config); err != nil { return err @@ -919,6 +927,53 @@ func addHypervisorNetworkOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfi }) } +func addHypervisorInitdataOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error { + if value, ok := ocispec.Annotations[vcAnnotations.Initdata]; ok { + if len(value) == 0 { + ociLog.Debug("Initdata annotation set without any value") + return nil + } + b64Reader := base64.NewDecoder(base64.StdEncoding, strings.NewReader(value)) + gzipReader, err := gzip.NewReader(b64Reader) + if err != nil { + return fmt.Errorf("initdata create gzip reader error: %v", err) + } + + initdataToml, err := io.ReadAll(gzipReader) + if err != nil { + return fmt.Errorf("uncompressing initdata with gzip error: %v", err) + } + + initdataStr := string(initdataToml) + var initdata kataTypes.Initdata + if _, err := toml.Decode(initdataStr, &initdata); err != nil { + return fmt.Errorf("parsing initdata annotation failed: %v", err) + } + + var initdataDigest []byte + var h hash.Hash + switch initdata.Algorithm { + case "sha256": + h = sha256.New() + case "sha384": + h = sha512.New384() + case "sha512": + h = sha512.New() + } + + h.Write([]byte(initdataToml)) + initdataDigest = h.Sum(nil) + + ociLog.Debugf("Initdata digest set to: %v", initdataDigest) + + sbConfig.HypervisorConfig.Initdata = initdataStr + + sbConfig.HypervisorConfig.InitdataDigest = initdataDigest + } + + return nil +} + func addRuntimeConfigOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, runtime RuntimeConfig) error { if err := newAnnotationConfiguration(ocispec, vcAnnotations.DisableGuestSeccomp).setBool(func(disableGuestSeccomp bool) { diff --git a/src/runtime/pkg/oci/utils_test.go b/src/runtime/pkg/oci/utils_test.go index c4c91cb590..d096053ec7 100644 --- a/src/runtime/pkg/oci/utils_test.go +++ b/src/runtime/pkg/oci/utils_test.go @@ -771,10 +771,23 @@ func TestAddRemoteHypervisorAnnotations(t *testing.T) { assert.NoError(err) // When initdata specified, remote hypervisor annotations do have the annotation added. - ocispec.Annotations[vcAnnotations.Initdata] = "initdata" + // Note that the initdata annotation parsing logic will extract it into plaintext + ocispec.Annotations[vcAnnotations.Initdata] = "H4sIAFlC92cAAytLLSrOzM9TsFVQMtAz1DNQ4krMSc8vyizJyAWJFWckGpmaKXFFpySWJMZyKSUm6pXk5+YoAeXU1dW5QJhLKTklA4toQX5OZnKlXlFqej6yBABS/5JkcQAAAA==" err = addAnnotations(ocispec, &sbConfig, runtimeConfig) assert.NoError(err) - assert.Equal(sbConfig.HypervisorConfig.Initdata, "initdata") + assert.Equal(sbConfig.HypervisorConfig.Initdata, `version = "0.1.0" +algorithm = "sha256" +[data] +"aa.toml" = ''' +''' + +"cdh.toml" = ''' +''' + +"policy.rego" = ''' +''' +`) + assert.Equal(sbConfig.HypervisorConfig.InitdataDigest, []byte{0xc6, 0x69, 0x4b, 0xb7, 0xa2, 0x9d, 0x6f, 0x37, 0xec, 0x72, 0xa1, 0x55, 0x82, 0xe0, 0x4, 0xb9, 0xf3, 0x14, 0x21, 0x59, 0x68, 0x2d, 0xb8, 0x50, 0x9a, 0x30, 0x44, 0x7, 0x41, 0x9a, 0x49, 0xe5}) // When GPU annotations are specified, remote hypervisor annotations have the annotation added ocispec.Annotations[vcAnnotations.DefaultGPUs] = "-1" @@ -879,7 +892,9 @@ func TestAddRuntimeAnnotations(t *testing.T) { ocispec.Annotations[vcAnnotations.DisableNewNetNs] = "true" ocispec.Annotations[vcAnnotations.InterNetworkModel] = "macvtap" ocispec.Annotations[vcAnnotations.CreateContainerTimeout] = "100" - ocispec.Annotations[vcAnnotations.Initdata] = "initdata" + + // Note that the initdata annotation parsing logic will extract it into plaintext + ocispec.Annotations[vcAnnotations.Initdata] = "H4sIAFlC92cAAytLLSrOzM9TsFVQMtAz1DNQ4krMSc8vyizJyAWJFWckGpmaKXFFpySWJMZyKSUm6pXk5+YoAeXU1dW5QJhLKTklA4toQX5OZnKlXlFqej6yBABS/5JkcQAAAA==" addAnnotations(ocispec, &config, runtimeConfig) assert.Equal(config.DisableGuestSeccomp, true) @@ -887,7 +902,20 @@ func TestAddRuntimeAnnotations(t *testing.T) { assert.Equal(config.NetworkConfig.DisableNewNetwork, true) assert.Equal(config.NetworkConfig.InterworkingModel, vc.NetXConnectMacVtapModel) assert.Equal(config.CreateContainerTimeout, uint64(100)) - assert.Equal(config.HypervisorConfig.Initdata, "initdata") + assert.Equal(config.HypervisorConfig.Initdata, `version = "0.1.0" +algorithm = "sha256" +[data] +"aa.toml" = ''' +''' + +"cdh.toml" = ''' +''' + +"policy.rego" = ''' +''' +`) + assert.Equal(config.HypervisorConfig.InitdataDigest, []byte{0xc6, 0x69, 0x4b, 0xb7, 0xa2, 0x9d, 0x6f, 0x37, 0xec, 0x72, 0xa1, 0x55, 0x82, 0xe0, 0x4, 0xb9, 0xf3, 0x14, 0x21, 0x59, 0x68, 0x2d, 0xb8, 0x50, 0x9a, 0x30, 0x44, 0x7, 0x41, 0x9a, 0x49, 0xe5}) + } func TestRegexpContains(t *testing.T) { diff --git a/src/runtime/pkg/types/types.go b/src/runtime/pkg/types/types.go index 73a255117e..11ec1a5f9a 100644 --- a/src/runtime/pkg/types/types.go +++ b/src/runtime/pkg/types/types.go @@ -10,3 +10,9 @@ const ( KataRuntimeNameRegexp = `io\.containerd\.kata.*\.v2` ContainerdRuntimeTaskPath = "io.containerd.runtime.v2.task" ) + +type Initdata struct { + Version string `toml:"version"` + Algorithm string `toml:"algorithm"` + Data map[string]string `toml:"data"` +} diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index b6b75d5499..b0ac28b287 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -682,6 +682,14 @@ type HypervisorConfig struct { // Initdata defines the initdata passed into guest when CreateVM Initdata string + // InitdataDigest represents opaque binary data attached to a TEE and typically used + // for Guest attestation. This will be encoded in the format expected by QEMU for each TEE type. + InitdataDigest []byte + + // The initdata image on the host side to store the initdata and be mounted + // as a raw block device to guest + InitdataImage string + // GPU specific annotations (currently only applicable for Remote Hypervisor) //DefaultGPUs specifies the number of GPUs required for the Kata VM DefaultGPUs uint32 diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 9072170008..71982ce280 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -9,7 +9,10 @@ package virtcontainers import ( "bufio" + "bytes" + "compress/gzip" "context" + "encoding/binary" "encoding/hex" "encoding/json" "fmt" @@ -394,6 +397,23 @@ func (q *qemu) createQmpSocket() ([]govmmQemu.QMPSocket, error) { return sockets, nil } +func (q *qemu) buildInitdataDevice(devices []govmmQemu.Device, InitdataImage string) []govmmQemu.Device { + device := govmmQemu.BlockDevice{ + Driver: govmmQemu.VirtioBlock, + Transport: govmmQemu.TransportPCI, + ID: "initdata", + File: InitdataImage, + SCSI: false, + WCE: false, + AIO: govmmQemu.Threads, + Interface: "none", + Format: "raw", + } + + devices = append(devices, device) + return devices +} + func (q *qemu) buildDevices(ctx context.Context, kernelPath string) ([]govmmQemu.Device, *govmmQemu.IOThread, *govmmQemu.Kernel, error) { var devices []govmmQemu.Device @@ -540,6 +560,94 @@ func (q *qemu) createVirtiofsDaemon(sharedPath string) (VirtiofsDaemon, error) { }, nil } +// prepareInitdataImage will create an image with a very simple layout +// +// There will be multiple sectors. The first 8 bytes are Magic number "initdata". +// Then a "length" field of 8 bytes follows (unsigned int64). +// Finally the gzipped initdata toml. The image will be padded to an +// integer multiple of the sector size for alignment. +// +// offset 0 8 16 +// 0 'i' 'n' 'i' 't' 'd' 'a' 't' 'a' | gzip length in le | +// 16 gzip(initdata toml) ... +// (end of the last sector) '\0' paddings +func prepareInitdataImage(initdata string, imagePath string) error { + SectorSize := 512 + var buf bytes.Buffer + gzipper := gzip.NewWriter(&buf) + defer gzipper.Close() + + gzipper.Write([]byte(initdata)) + err := gzipper.Close() + if err != nil { + return fmt.Errorf("failed to compress initdata: %v", err) + } + + compressedInitdata := buf.Bytes() + + compressedInitdataLength := len(compressedInitdata) + lengthBuffer := make([]byte, 8) + binary.LittleEndian.PutUint64(lengthBuffer, uint64(compressedInitdataLength)) + + paddingLength := (compressedInitdataLength+16+SectorSize-1)/SectorSize*SectorSize - (compressedInitdataLength + 16) + paddingBuffer := make([]byte, paddingLength) + + file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_RDWR, 0640) + if err != nil { + return fmt.Errorf("failed to create initdata image: %v", err) + } + defer file.Close() + + _, err = file.Write([]byte("initdata")) + if err != nil { + return fmt.Errorf("failed to write magic number to initdata image: %v", err) + } + + _, err = file.Write(lengthBuffer) + if err != nil { + return fmt.Errorf("failed to write data length to initdata image: %v", err) + } + + _, err = file.Write([]byte(compressedInitdata)) + if err != nil { + return fmt.Errorf("failed to write compressed initdata to initdata image: %v", err) + } + + _, err = file.Write(paddingBuffer) + if err != nil { + return fmt.Errorf("failed to write compressed initdata to initdata image: %v", err) + } + + return nil +} + +func (q *qemu) prepareInitdataMount(config *HypervisorConfig) error { + if len(config.Initdata) == 0 { + q.Logger().Info("No initdata provided. Skip prepare initdata device") + return nil + } + + q.Logger().Info("Start to prepare initdata") + initdataWorkdir := filepath.Join("/run/kata-containers/shared/initdata", q.id) + initdataImagePath := filepath.Join(initdataWorkdir, "data.img") + + err := os.MkdirAll(initdataWorkdir, 0755) + if err != nil { + q.Logger().WithField("initdata", "create initdata image path").WithError(err) + return err + } + + err = prepareInitdataImage(config.Initdata, initdataImagePath) + if err != nil { + q.Logger().WithField("initdata", "prepare initdata image").WithError(err) + return err + } + + config.InitdataImage = initdataImagePath + + return nil +} + // CreateVM is the Hypervisor VM creation implementation for govmmQemu. func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error { // Save the tracing context @@ -552,6 +660,10 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi return err } + if err := q.prepareInitdataMount(hypervisorConfig); err != nil { + return err + } + machine, err := q.getQemuMachine() if err != nil { return err @@ -650,6 +762,10 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi return err } + if len(hypervisorConfig.Initdata) > 0 { + devices = q.buildInitdataDevice(devices, hypervisorConfig.InitdataImage) + } + // some devices configuration may also change kernel params, make sure this is called afterwards kernel.Params = q.kernelParameters() q.checkBpfEnabled() @@ -681,7 +797,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi Debug: hypervisorConfig.Debug, } - qemuConfig.Devices, qemuConfig.Bios, err = q.arch.appendProtectionDevice(qemuConfig.Devices, firmwarePath, firmwareVolumePath) + qemuConfig.Devices, qemuConfig.Bios, err = q.arch.appendProtectionDevice(qemuConfig.Devices, firmwarePath, firmwareVolumePath, hypervisorConfig.InitdataDigest) if err != nil { return err } @@ -1254,6 +1370,7 @@ func (q *qemu) StopVM(ctx context.Context, waitOnly bool) (err error) { } } } + if q.config.SharedFS == config.VirtioFS || q.config.SharedFS == config.VirtioFSNydus { if err := q.stopVirtiofsDaemon(ctx); err != nil { return err @@ -1318,6 +1435,15 @@ func (q *qemu) cleanupVM() error { }).Debug("successfully removed the non root user") } + // If we have initdata, we should drop initdata image path + hypervisorConfig := q.HypervisorConfig() + if len(hypervisorConfig.Initdata) > 0 { + initdataWorkdir := filepath.Join(string(filepath.Separator), "/run/kata-containers/shared/initdata", q.id) + if err := os.RemoveAll(initdataWorkdir); err != nil { + q.Logger().WithError(err).Warnf("failed to remove initdata work dir %s", initdataWorkdir) + } + } + return nil } diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index dd0a929df0..b0c5aa7950 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -274,7 +274,7 @@ func (q *qemuAmd64) enableProtection() error { } // append protection device -func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) { +func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error) { if q.sgxEPCSize != 0 { devices = append(devices, govmmQemu.Object{ @@ -299,6 +299,7 @@ func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware, Debug: false, File: firmware, FirmwareVolume: firmwareVolume, + InitdataDigest: initdataDigest, }), "", nil case sevProtection: return append(devices, @@ -318,6 +319,7 @@ func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware, File: firmware, CBitPos: cpuid.AMDMemEncrypt.CBitPosition, ReducedPhysBits: 1, + InitdataDigest: initdataDigest, } if q.snpIdBlock != "" && q.snpIdAuth != "" { obj.SnpIdBlock = q.snpIdBlock diff --git a/src/runtime/virtcontainers/qemu_amd64_test.go b/src/runtime/virtcontainers/qemu_amd64_test.go index 1425cb38cf..2756cb2be0 100644 --- a/src/runtime/virtcontainers/qemu_amd64_test.go +++ b/src/runtime/virtcontainers/qemu_amd64_test.go @@ -257,7 +257,7 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) { firmware := "tdvf.fd" var bios string var err error - devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []byte("")) assert.NoError(err) // non-protection @@ -265,20 +265,20 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) { // pef protection amd64.(*qemuAmd64).protection = pefProtection - devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []byte("")) assert.Error(err) assert.Empty(bios) // Secure Execution protection amd64.(*qemuAmd64).protection = seProtection - devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []byte("")) assert.Error(err) assert.Empty(bios) // sev protection amd64.(*qemuAmd64).protection = sevProtection - devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []byte("")) assert.NoError(err) assert.Empty(bios) @@ -298,7 +298,7 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) { // snp protection amd64.(*qemuAmd64).protection = snpProtection - devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []uint8(nil)) assert.NoError(err) assert.Empty(bios) @@ -318,18 +318,19 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) { // tdxProtection amd64.(*qemuAmd64).protection = tdxProtection - devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []byte("")) assert.NoError(err) assert.Empty(bios) expectedOut = append(expectedOut, govmmQemu.Object{ - Driver: govmmQemu.Loader, - Type: govmmQemu.TDXGuest, - ID: "tdx", - DeviceID: fmt.Sprintf("fd%d", id), - Debug: false, - File: firmware, + Driver: govmmQemu.Loader, + Type: govmmQemu.TDXGuest, + ID: "tdx", + DeviceID: fmt.Sprintf("fd%d", id), + Debug: false, + File: firmware, + InitdataDigest: []byte(""), }, ) diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index fe330d01a8..0bdf9092d4 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -165,7 +165,7 @@ type qemuArch interface { // This implementation is architecture specific, some archs may need // a firmware, returns a string containing the path to the firmware that should // be used with the -bios option, ommit -bios option if the path is empty. - appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) + appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error) // scans the PCIe space and returns the biggest BAR sizes for 32-bit // and 64-bit addressable memory @@ -920,7 +920,7 @@ func (q *qemuArchBase) setPFlash(p []string) { } // append protection device -func (q *qemuArchBase) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) { +func (q *qemuArchBase) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error) { hvLogger.WithField("arch", runtime.GOARCH).Warnf("Confidential Computing has not been implemented for this architecture") return devices, firmware, nil } diff --git a/src/runtime/virtcontainers/qemu_arm64.go b/src/runtime/virtcontainers/qemu_arm64.go index 373321d2bb..bfe6e2d405 100644 --- a/src/runtime/virtcontainers/qemu_arm64.go +++ b/src/runtime/virtcontainers/qemu_arm64.go @@ -154,7 +154,7 @@ func (q *qemuArm64) enableProtection() error { return nil } -func (q *qemuArm64) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) { +func (q *qemuArm64) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error) { err := q.enableProtection() if err != nil { hvLogger.WithField("arch", runtime.GOARCH).Error(err) diff --git a/src/runtime/virtcontainers/qemu_arm64_test.go b/src/runtime/virtcontainers/qemu_arm64_test.go index 924dd32ba0..aca9497412 100644 --- a/src/runtime/virtcontainers/qemu_arm64_test.go +++ b/src/runtime/virtcontainers/qemu_arm64_test.go @@ -183,42 +183,42 @@ func TestQemuArm64AppendProtectionDevice(t *testing.T) { var err error // no protection - devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Empty(devices) assert.Empty(bios) assert.NoError(err) // PEF protection arm64.(*qemuArm64).protection = pefProtection - devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Empty(devices) assert.Empty(bios) assert.NoError(err) // Secure Execution protection arm64.(*qemuArm64).protection = seProtection - devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Empty(devices) assert.Empty(bios) assert.NoError(err) // SEV protection arm64.(*qemuArm64).protection = sevProtection - devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Empty(devices) assert.Empty(bios) assert.NoError(err) // SNP protection arm64.(*qemuArm64).protection = snpProtection - devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Empty(devices) assert.Empty(bios) assert.NoError(err) // TDX protection arm64.(*qemuArm64).protection = tdxProtection - devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Empty(devices) assert.Empty(bios) assert.NoError(err) diff --git a/src/runtime/virtcontainers/qemu_ppc64le.go b/src/runtime/virtcontainers/qemu_ppc64le.go index d2e0228c8b..87c2139b2b 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le.go +++ b/src/runtime/virtcontainers/qemu_ppc64le.go @@ -157,7 +157,7 @@ func (q *qemuPPC64le) enableProtection() error { } // append protection device -func (q *qemuPPC64le) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) { +func (q *qemuPPC64le) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error) { switch q.protection { case pefProtection: return append(devices, diff --git a/src/runtime/virtcontainers/qemu_ppc64le_test.go b/src/runtime/virtcontainers/qemu_ppc64le_test.go index 85e1dfe805..7bb79bc0e5 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le_test.go +++ b/src/runtime/virtcontainers/qemu_ppc64le_test.go @@ -60,7 +60,7 @@ func TestQemuPPC64leAppendProtectionDevice(t *testing.T) { var devices []govmmQemu.Device var bios, firmware string var err error - devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "") + devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.NoError(err) //no protection @@ -68,31 +68,31 @@ func TestQemuPPC64leAppendProtectionDevice(t *testing.T) { //Secure Execution protection ppc64le.(*qemuPPC64le).protection = seProtection - devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "") + devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Error(err) assert.Empty(bios) //SEV protection ppc64le.(*qemuPPC64le).protection = sevProtection - devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "") + devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Error(err) assert.Empty(bios) //SNP protection ppc64le.(*qemuPPC64le).protection = snpProtection - devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "") + devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Error(err) assert.Empty(bios) //TDX protection ppc64le.(*qemuPPC64le).protection = tdxProtection - devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "") + devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Error(err) assert.Empty(bios) //PEF protection ppc64le.(*qemuPPC64le).protection = pefProtection - devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "") + devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.NoError(err) assert.Empty(bios) diff --git a/src/runtime/virtcontainers/qemu_s390x.go b/src/runtime/virtcontainers/qemu_s390x.go index a333c3bb93..5f00e9ea76 100644 --- a/src/runtime/virtcontainers/qemu_s390x.go +++ b/src/runtime/virtcontainers/qemu_s390x.go @@ -344,7 +344,7 @@ func (q *qemuS390x) enableProtection() error { // appendProtectionDevice appends a QEMU object for Secure Execution. // Takes devices and returns updated version. Takes BIOS and returns it (no modification on s390x). -func (q *qemuS390x) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) { +func (q *qemuS390x) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error) { switch q.protection { case seProtection: return append(devices, diff --git a/src/runtime/virtcontainers/qemu_s390x_test.go b/src/runtime/virtcontainers/qemu_s390x_test.go index 24a67bdd9e..db88b4690f 100644 --- a/src/runtime/virtcontainers/qemu_s390x_test.go +++ b/src/runtime/virtcontainers/qemu_s390x_test.go @@ -111,7 +111,7 @@ func TestQemuS390xAppendProtectionDevice(t *testing.T) { var devices []govmmQemu.Device var bios, firmware string var err error - devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "") + devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.NoError(err) // no protection @@ -119,32 +119,32 @@ func TestQemuS390xAppendProtectionDevice(t *testing.T) { // PEF protection s390x.(*qemuS390x).protection = pefProtection - devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "") + devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Error(err) assert.Empty(bios) // TDX protection s390x.(*qemuS390x).protection = tdxProtection - devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "") + devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Error(err) assert.Empty(bios) // SEV protection s390x.(*qemuS390x).protection = sevProtection - devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "") + devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Error(err) assert.Empty(bios) // SNP protection s390x.(*qemuS390x).protection = snpProtection - devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "") + devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Error(err) assert.Empty(bios) // Secure Execution protection s390x.(*qemuS390x).protection = seProtection - devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "") + devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.NoError(err) assert.Empty(bios) diff --git a/src/runtime/virtcontainers/qemu_test.go b/src/runtime/virtcontainers/qemu_test.go index 4c51517ed5..53b0ff716c 100644 --- a/src/runtime/virtcontainers/qemu_test.go +++ b/src/runtime/virtcontainers/qemu_test.go @@ -8,9 +8,14 @@ package virtcontainers import ( + "bytes" + "compress/gzip" "context" + "encoding/binary" "fmt" + "io" "os" + "path" "path/filepath" "testing" @@ -770,3 +775,53 @@ func TestQemuStartSandbox(t *testing.T) { err = q.StartVM(context.Background(), 10) assert.Error(err) } + +func TestPrepareInitdataImage(t *testing.T) { + tests := []struct { + name string + content string + }{ + { + "create an initdata image", + "some content", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + imageDir := t.TempDir() + imagePath := path.Join(imageDir, "initdata.img") + err := prepareInitdataImage(tt.content, imagePath) + if err != nil { + t.Errorf("prepareInitdataImage() error = %v", err) + } + defer os.Remove(imagePath) + + fullContent, err := os.ReadFile(imagePath) + if err != nil { + t.Errorf("read initdata image failed: %v", err) + } + + magicNumber := fullContent[:8] + if string(magicNumber) != "initdata" { + t.Errorf("initdata magic number is not correct, got %s, want initdata", string(magicNumber)) + } + + length := binary.LittleEndian.Uint64(fullContent[8:16]) + contentSlice := fullContent[16 : 16+length] + gzipReader, err := gzip.NewReader(bytes.NewBuffer(contentSlice)) + if err != nil { + t.Errorf("read gzipped initdata failed: %v", err) + } + defer gzipReader.Close() + + content, err := io.ReadAll(gzipReader) + if err != nil { + t.Errorf("read gzipped initdata failed: %v", err) + } + + if string(content) != tt.content { + t.Errorf("initdata content is not correct, got %s, want %s", string(content), tt.content) + } + }) + } +} From b1c72c709406f04d4180fa267d6f5503e196f836 Mon Sep 17 00:00:00 2001 From: Xynnn007 Date: Thu, 3 Apr 2025 17:28:53 +0800 Subject: [PATCH 3/3] test: add integration test for initdata This test we will test initdata in the following logic 1. Enable image signature verification via kernel commandline 2. Set Trustee address via initdata 3. Pull an image from a banned registry 4. Check if the pulling fails with log `image security validation failed` the initdata works. Note that if initdata does not work, the pod still fails to launch. But the error information is `[CDH] [ERROR]: Get Resource failed` which internally means that the KBS URL has not been set correctly. This test now only runs on qemu-coco-dev+x86_64 and qemu-tdx Signed-off-by: Xynnn007 --- .../kubernetes/confidential_common.sh | 36 ++++ .../integration/kubernetes/k8s-initdata.bats | 197 ++++++++++++++++++ .../kubernetes/run_kubernetes_tests.sh | 1 + 3 files changed, 234 insertions(+) create mode 100644 tests/integration/kubernetes/k8s-initdata.bats diff --git a/tests/integration/kubernetes/confidential_common.sh b/tests/integration/kubernetes/confidential_common.sh index 75fb14e854..648d544151 100644 --- a/tests/integration/kubernetes/confidential_common.sh +++ b/tests/integration/kubernetes/confidential_common.sh @@ -171,3 +171,39 @@ function create_coco_pod_yaml() { set_node "${kata_pod}" "$node" fi } + +# This function creates pod yaml. Parameters +# - $1: image reference +# - $2: annotation `io.katacontainers.config.hypervisor.kernel_params` +# - $3: anootation `io.katacontainers.config.runtime.cc_init_data` +# - $4: node +function create_coco_pod_yaml_with_annotations() { + image=$1 + kernel_params_annotation_value=${2:-} + cc_initdata_annotation_value=${3:-} + node=${4:-} + + kernel_params_annotation_key="io.katacontainers.config.hypervisor.kernel_params" + cc_initdata_annotation_key="io.katacontainers.config.runtime.cc_init_data" + + # Note: this is not local as we use it in the caller test + kata_pod="$(new_pod_config "$image" "kata-${KATA_HYPERVISOR}")" + set_container_command "${kata_pod}" "0" "sleep" "30" + + # Set annotations + set_metadata_annotation "${kata_pod}" \ + "io.containerd.cri.runtime-handler" \ + "kata-${KATA_HYPERVISOR}" + set_metadata_annotation "${kata_pod}" \ + "${kernel_params_annotation_key}" \ + "${kernel_params_annotation_value}" + set_metadata_annotation "${kata_pod}" \ + "${cc_initdata_annotation_key}" \ + "${cc_initdata_annotation_value}" + + add_allow_all_policy_to_yaml "${kata_pod}" + + if [ -n "$node" ]; then + set_node "${kata_pod}" "$node" + fi +} \ No newline at end of file diff --git a/tests/integration/kubernetes/k8s-initdata.bats b/tests/integration/kubernetes/k8s-initdata.bats new file mode 100644 index 0000000000..fe42c78a45 --- /dev/null +++ b/tests/integration/kubernetes/k8s-initdata.bats @@ -0,0 +1,197 @@ +#!/usr/bin/env bats +# Copyright (c) 2025 Alibaba Cloud +# +# SPDX-License-Identifier: Apache-2.0 +# + +# This test we will test initdata in the following logic +# 1. Enable image signature verification via kernel commandline +# 2. Set Trustee address via initdata +# 3. Pull an image from a banned registry +# 4. Check if the pulling fails with log `image security validation failed`, +# the initdata works. +# +# Note that if initdata does not work, the pod still fails to launch (hang at +# CreatingContainer status). The error information is +# `[CDH] [ERROR]: Get Resource failed` which internally means that the KBS URL +# has not been set correctly. +# +# TODO: After https://github.com/kata-containers/kata-containers/issues/9266 +# is resolved, both KBS URI and policy URI can be set via initdata. + +load "${BATS_TEST_DIRNAME}/lib.sh" +load "${BATS_TEST_DIRNAME}/confidential_common.sh" + +export KBS="${KBS:-false}" +export KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" + +setup() { + if ! is_confidential_runtime_class; then + skip "Test not supported for ${KATA_HYPERVISOR}." + fi + + [ "${SNAPSHOTTER:-}" = "nydus" ] || skip "None snapshotter was found but this test requires one" + + setup_common || die "setup_common failed" + + FAIL_TEST_IMAGE="quay.io/prometheus/busybox:latest" + + SECURITY_POLICY_KBS_URI="kbs:///default/security-policy/test" +} + +function setup_kbs_image_policy_for_initdata() { + if [ "${KBS}" = "false" ]; then + skip "Test skipped as KBS not setup" + fi + + export CURRENT_ARCH=$(uname -m) + if [ "${CURRENT_ARCH}" != "x86_64" ]; then + skip "Test skipped as only x86-64 supports, while current platform is ${CURRENT_ARCH}" + fi + + # TODO: Enable for more archs + case "$KATA_HYPERVISOR" in + "qemu-tdx"|"qemu-coco-dev") + ;; + *) + skip "Test not supported for ${KATA_HYPERVISOR}." + ;; + esac + + default_policy="${1:-insecureAcceptAnything}" + policy_json=$(cat << EOF +{ + "default": [ + { + "type": "${default_policy}" + } + ], + "transports": { + "docker": { + "quay.io/prometheus": [ + { + "type": "reject" + } + ] + } + } +} +EOF + ) + + if ! is_confidential_hardware; then + kbs_set_allow_all_resources + fi + + kbs_set_resource "default" "security-policy" "test" "${policy_json}" +} + +@test "Test that creating a container from an rejected image configured by initdata, fails according to policy reject" { + setup_kbs_image_policy_for_initdata + + CC_KBS_ADDRESS=$(kbs_k8s_svc_http_addr) + + kernel_parameter="agent.image_policy_file=${SECURITY_POLICY_KBS_URI} agent.enable_signature_verification=true" + initdata_annotation=$(gzip -c << EOF | base64 -w0 +version = "0.1.0" +algorithm = "sha256" +[data] +"aa.toml" = ''' +[token_configs] +[token_configs.coco_as] +# TODO: we should fix this on AA side to set this a default value if not set. +url = "${CC_KBS_ADDRESS}" + +[token_configs.kbs] +url = "${CC_KBS_ADDRESS}" +''' + +"cdh.toml" = ''' +[kbc] +name = "cc_kbc" +url = "${CC_KBS_ADDRESS}" +''' + +"policy.rego" = ''' +# Copyright (c) 2023 Microsoft Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +package agent_policy + +default AddARPNeighborsRequest := true +default AddSwapRequest := true +default CloseStdinRequest := true +default CopyFileRequest := true +default CreateContainerRequest := true +default CreateSandboxRequest := true +default DestroySandboxRequest := true +default ExecProcessRequest := true +default GetMetricsRequest := true +default GetOOMEventRequest := true +default GuestDetailsRequest := true +default ListInterfacesRequest := true +default ListRoutesRequest := true +default MemHotplugByProbeRequest := true +default OnlineCPUMemRequest := true +default PauseContainerRequest := true +default PullImageRequest := true +default ReadStreamRequest := true +default RemoveContainerRequest := true +default RemoveStaleVirtiofsShareMountsRequest := true +default ReseedRandomDevRequest := true +default ResumeContainerRequest := true +default SetGuestDateTimeRequest := true +default SetPolicyRequest := true +default SignalProcessRequest := true +default StartContainerRequest := true +default StartTracingRequest := true +default StatsContainerRequest := true +default StopTracingRequest := true +default TtyWinResizeRequest := true +default UpdateContainerRequest := true +default UpdateEphemeralMountsRequest := true +default UpdateInterfaceRequest := true +default UpdateRoutesRequest := true +default WaitProcessRequest := true +default WriteStreamRequest := true +''' +EOF + ) + create_coco_pod_yaml_with_annotations "${FAIL_TEST_IMAGE}" "${kernel_parameter}" "${initdata_annotation}" "${node}" + + # For debug sake + echo "Pod ${kata_pod}: $(cat ${kata_pod})" + + assert_pod_fail "${kata_pod}" + assert_logs_contain "${node}" kata "${node_start_time}" "image security validation failed" +} + +@test "Test that creating a container from an rejected image not configured by initdata, fails according to CDH error" { + setup_kbs_image_policy_for_initdata + + kernel_parameter="agent.image_policy_file=${SECURITY_POLICY_KBS_URI} agent.enable_signature_verification=true" + + create_coco_pod_yaml_with_annotations "${FAIL_TEST_IMAGE}" "${kernel_parameter}" "" "${node}" + + # For debug sake + echo "Pod ${kata_pod}: $(cat ${kata_pod})" + + if k8s_create_pod "${kata_pod}" ; then + echo "Expected failure, but pod ${kata_pod} launched successfully." + return 1 + fi + + assert_logs_contain "${node}" kata "${node_start_time}" "\[CDH\] \[ERROR\]: Get Resource failed" +} + +teardown() { + if ! is_confidential_runtime_class; then + skip "Test not supported for ${KATA_HYPERVISOR}." + fi + + [ "${SNAPSHOTTER:-}" = "nydus" ] || skip "None snapshotter was found but this test requires one" + + teardown_common "${node}" "${node_start_time:-}" +} diff --git a/tests/integration/kubernetes/run_kubernetes_tests.sh b/tests/integration/kubernetes/run_kubernetes_tests.sh index a62dcfe073..2bfc296473 100755 --- a/tests/integration/kubernetes/run_kubernetes_tests.sh +++ b/tests/integration/kubernetes/run_kubernetes_tests.sh @@ -37,6 +37,7 @@ else "k8s-guest-pull-image-encrypted.bats" \ "k8s-guest-pull-image-authenticated.bats" \ "k8s-guest-pull-image-signature.bats" \ + "k8s-initdata.bats" \ "k8s-confidential-attestation.bats" \ )