diff --git a/src/agent/Cargo.lock b/src/agent/Cargo.lock index 3113790377..58bc4ac24c 100644 --- a/src/agent/Cargo.lock +++ b/src/agent/Cargo.lock @@ -263,9 +263,9 @@ dependencies = [ [[package]] name = "async-compression" -version = "0.4.20" +version = "0.4.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "310c9bcae737a48ef5cdee3174184e6d548b292739ede61a1f955ef76a738861" +checksum = "59a194f9d963d8099596278594b3107448656ba73831c9d8c783e613ce86da64" dependencies = [ "flate2", "futures-core", @@ -3035,9 +3035,11 @@ name = "kata-agent" version = "0.1.0" dependencies = [ "anyhow", + "async-compression", "async-recursion 0.3.2", "async-std", "async-trait", + "base64 0.22.1", "capctl", "cdi", "cfg-if", @@ -3077,6 +3079,7 @@ dependencies = [ "serde", "serde_json", "serial_test", + "sha2", "slog", "slog-scope", "slog-stdlog", diff --git a/src/agent/Cargo.toml b/src/agent/Cargo.toml index b4ec995c84..708b4632f6 100644 --- a/src/agent/Cargo.toml +++ b/src/agent/Cargo.toml @@ -182,6 +182,11 @@ safe-path.workspace = true logging.workspace = true vsock-exporter.workspace = true +# Initdata +base64 = "0.22" +sha2 = "0.10.8" +async-compression = { version = "0.4.22", features = ["tokio", "gzip"] } + [dev-dependencies] tempfile.workspace = true which.workspace = true diff --git a/src/agent/src/initdata.rs b/src/agent/src/initdata.rs new file mode 100644 index 0000000000..7afc899974 --- /dev/null +++ b/src/agent/src/initdata.rs @@ -0,0 +1,191 @@ +//! # Initdata Module +//! +//! This module will do the following things if a proper initdata device with initdata exists. +//! 1. Parse the initdata block device and extract the config files to [`INITDATA_PATH`]. +//! 2. Return the initdata and the policy (if any). + +// Copyright (c) 2025 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::{os::unix::fs::FileTypeExt, path::Path}; + +use anyhow::{bail, Context, Result}; +use async_compression::tokio::bufread::GzipDecoder; +use base64::{engine::general_purpose::STANDARD, Engine}; +use const_format::concatcp; +use serde::Deserialize; +use sha2::{Digest, Sha256, Sha384, Sha512}; +use slog::Logger; +use tokio::io::{AsyncReadExt, AsyncSeekExt}; + +/// This is the target directory to store the extracted initdata. +pub const INITDATA_PATH: &str = "/run/confidential-containers/initdata"; + +/// The path of AA's config file +pub const AA_CONFIG_PATH: &str = concatcp!(INITDATA_PATH, "/aa.toml"); + +/// The path of CDH's config file +pub const CDH_CONFIG_PATH: &str = concatcp!(INITDATA_PATH, "/cdh.toml"); + +/// Magic number of initdata device +pub const INITDATA_MAGIC_NUMBER: &[u8] = b"initdata"; + +/// Now only initdata `0.1.0` is defined. +const INITDATA_VERSION: &str = "0.1.0"; + +/// Initdata defined in +/// +#[derive(Deserialize)] +pub struct Initdata { + version: String, + algorithm: String, + data: DefinedFields, +} + +/// Well-defined keys for initdata of kata/CoCo +#[derive(Deserialize, Default)] +#[serde(deny_unknown_fields)] +pub struct DefinedFields { + #[serde(rename = "aa.toml")] + aa_config: Option, + #[serde(rename = "cdh.toml")] + cdh_config: Option, + #[serde(rename = "policy.rego")] + policy: Option, +} + +async fn detect_initdata_device(logger: &Logger) -> Result> { + let dev_dir = Path::new("/dev"); + let mut read_dir = tokio::fs::read_dir(dev_dir).await?; + while let Some(entry) = read_dir.next_entry().await? { + let filename = entry.file_name(); + let filename = filename.to_string_lossy(); + debug!(logger, "Initdata check device `{filename}`"); + if !filename.starts_with("vd") { + continue; + } + let path = entry.path(); + + debug!(logger, "Initdata find potential device: `{path:?}`"); + let metadata = std::fs::metadata(path.clone())?; + if !metadata.file_type().is_block_device() { + continue; + } + + let mut file = tokio::fs::File::open(&path).await?; + let mut magic = [0; 8]; + match file.read_exact(&mut magic).await { + Ok(_) => { + debug!( + logger, + "Initdata read device `{filename}` first 8 bytes: {magic:?}" + ); + if magic == INITDATA_MAGIC_NUMBER { + let path = path.as_path().to_string_lossy().to_string(); + debug!(logger, "Found initdata device {path}"); + return Ok(Some(path)); + } + } + Err(e) => debug!(logger, "Initdata read device `{filename}` failed: {e:?}"), + } + } + + Ok(None) +} + +pub async fn read_initdata(device_path: &str) -> Result> { + let initdata_devfile = tokio::fs::File::open(device_path).await?; + let mut buf_reader = tokio::io::BufReader::new(initdata_devfile); + // skip the magic number "initdata" + buf_reader.seek(std::io::SeekFrom::Start(8)).await?; + + let mut len_buf = [0u8; 8]; + buf_reader.read_exact(&mut len_buf).await?; + let length = u64::from_le_bytes(len_buf) as usize; + + let mut buf = vec![0; length]; + buf_reader.read_exact(&mut buf).await?; + let mut gzip_decoder = GzipDecoder::new(&buf[..]); + + let mut initdata = Vec::new(); + let _ = gzip_decoder.read_to_end(&mut initdata).await?; + Ok(initdata) +} + +pub struct InitdataReturnValue { + pub digest: Vec, + pub _policy: Option, +} + +pub async fn initialize_initdata(logger: &Logger) -> Result> { + let logger = logger.new(o!("subsystem" => "initdata")); + let Some(initdata_device) = detect_initdata_device(&logger).await? else { + info!( + logger, + "Initdata device not found, skip initdata initialization" + ); + return Ok(None); + }; + + tokio::fs::create_dir_all(INITDATA_PATH) + .await + .inspect_err(|e| error!(logger, "Failed to create initdata dir: {e:?}"))?; + + let initdata_content = read_initdata(&initdata_device) + .await + .inspect_err(|e| error!(logger, "Failed to read initdata: {e:?}"))?; + + let initdata: Initdata = + toml::from_slice(&initdata_content).context("parse initdata failed")?; + info!(logger, "Initdata version: {}", initdata.version); + + if initdata.version != INITDATA_VERSION { + bail!("Unsupported initdata version"); + } + + let digest = match &initdata.algorithm[..] { + "sha256" => Sha256::digest(&initdata_content).to_vec(), + "sha384" => Sha384::digest(&initdata_content).to_vec(), + "sha512" => Sha512::digest(&initdata_content).to_vec(), + others => bail!("Unsupported hash algorithm {others}"), + }; + + if let Some(config) = initdata.data.aa_config { + tokio::fs::write(AA_CONFIG_PATH, config) + .await + .context("write aa config failed")?; + info!(logger, "write AA config from initdata"); + } + + if let Some(config) = initdata.data.cdh_config { + tokio::fs::write(CDH_CONFIG_PATH, config) + .await + .context("write cdh config failed")?; + info!(logger, "write CDH config from initdata"); + } + + debug!(logger, "Initdata digest: {}", STANDARD.encode(&digest)); + + let res = InitdataReturnValue { + digest, + _policy: initdata.data.policy, + }; + + Ok(Some(res)) +} + +#[cfg(test)] +mod tests { + use crate::initdata::read_initdata; + + const INITDATA_IMG_PATH: &str = "testdata/initdata.img"; + const INITDATA_PLAINTEXT: &[u8] = b"some content"; + + #[tokio::test] + async fn parse_initdata() { + let initdata = read_initdata(INITDATA_IMG_PATH).await.unwrap(); + assert_eq!(initdata, INITDATA_PLAINTEXT); + } +} diff --git a/src/agent/src/main.rs b/src/agent/src/main.rs index 2e7698706c..cc9fa53ad5 100644 --- a/src/agent/src/main.rs +++ b/src/agent/src/main.rs @@ -18,10 +18,12 @@ extern crate scopeguard; #[macro_use] extern crate slog; -use anyhow::{anyhow, Context, Result}; +use anyhow::{anyhow, bail, Context, Result}; +use base64::Engine; use cfg_if::cfg_if; use clap::{AppSettings, Parser}; use const_format::{concatcp, formatcp}; +use initdata::{InitdataReturnValue, AA_CONFIG_PATH, CDH_CONFIG_PATH}; use nix::fcntl::OFlag; use nix::sys::reboot::{reboot, RebootMode}; use nix::sys::socket::{self, AddressFamily, SockFlag, SockType, VsockAddr}; @@ -33,7 +35,6 @@ use std::os::unix::fs::{self as unixfs, FileTypeExt}; use std::os::unix::io::AsRawFd; use std::path::Path; use std::process::exit; -use std::process::Command; use std::sync::Arc; use tracing::{instrument, span}; @@ -42,6 +43,7 @@ mod config; mod console; mod device; mod features; +mod initdata; mod linux_abi; mod metrics; mod mount; @@ -419,6 +421,8 @@ async fn start_sandbox( let (tx, rx) = tokio::sync::oneshot::channel(); sandbox.lock().await.sender = Some(tx); + let initdata_return_value = initdata::initialize_initdata(logger).await?; + let gc_procs = config.guest_components_procs; if !attestation_binaries_available(logger, &gc_procs) { warn!( @@ -426,7 +430,21 @@ async fn start_sandbox( "attestation binaries requested for launch not available" ); } else { - init_attestation_components(logger, config).await?; + init_attestation_components(logger, config, &initdata_return_value).await?; + } + + // if policy is given via initdata, use it + #[cfg(feature = "agent-policy")] + if let Some(initdata_return_value) = initdata_return_value { + if let Some(policy) = &initdata_return_value._policy { + info!(logger, "using policy from initdata"); + AGENT_POLICY + .lock() + .await + .set_policy(policy) + .await + .context("Failed to set policy from initdata")?; + } } let mut oma = None; @@ -472,19 +490,34 @@ fn attestation_binaries_available(logger: &Logger, procs: &GuestComponentsProcs) true } -async fn launch_guest_component_procs(logger: &Logger, config: &AgentConfig) -> Result<()> { +async fn launch_guest_component_procs( + logger: &Logger, + config: &AgentConfig, + initdata_return_value: &Option, +) -> Result<()> { if config.guest_components_procs == GuestComponentsProcs::None { return Ok(()); } debug!(logger, "spawning attestation-agent process {}", AA_PATH); + let mut aa_args = vec!["--attestation_sock", AA_ATTESTATION_URI]; + let initdata_parameter; + if let Some(initdata_return_value) = initdata_return_value { + initdata_parameter = + base64::engine::general_purpose::STANDARD.encode(&initdata_return_value.digest); + aa_args.push("--initdata"); + aa_args.push(&initdata_parameter); + } + launch_process( logger, AA_PATH, - &vec!["--attestation_sock", AA_ATTESTATION_URI], + aa_args, + Some(AA_CONFIG_PATH), AA_ATTESTATION_SOCKET, DEFAULT_LAUNCH_PROCESS_TIMEOUT, ) + .await .map_err(|e| anyhow!("launch_process {} failed: {:?}", AA_PATH, e))?; // skip launch of confidential-data-hub and api-server-rest @@ -500,10 +533,12 @@ async fn launch_guest_component_procs(logger: &Logger, config: &AgentConfig) -> launch_process( logger, CDH_PATH, - &vec![], + vec![], + Some(CDH_CONFIG_PATH), CDH_SOCKET, DEFAULT_LAUNCH_PROCESS_TIMEOUT, ) + .await .map_err(|e| anyhow!("launch_process {} failed: {:?}", CDH_PATH, e))?; // skip launch of api-server-rest @@ -519,10 +554,12 @@ async fn launch_guest_component_procs(logger: &Logger, config: &AgentConfig) -> launch_process( logger, API_SERVER_PATH, - &vec!["--features", &features.to_string()], + vec!["--features", &features.to_string()], + None, "", 0, ) + .await .map_err(|e| anyhow!("launch_process {} failed: {:?}", API_SERVER_PATH, e))?; Ok(()) @@ -532,8 +569,12 @@ async fn launch_guest_component_procs(logger: &Logger, config: &AgentConfig) -> // and the corresponding procs are enabled in the agent configuration. the process will be // launched in the background and the function will return immediately. // If the CDH is started, a CDH client will be instantiated and returned. -async fn init_attestation_components(logger: &Logger, config: &AgentConfig) -> Result<()> { - launch_guest_component_procs(logger, config).await?; +async fn init_attestation_components( + logger: &Logger, + config: &AgentConfig, + initdata_return_value: &Option, +) -> Result<()> { + launch_guest_component_procs(logger, config, initdata_return_value).await?; // If a CDH socket exists, initialize the CDH client and enable ocicrypt match tokio::fs::metadata(CDH_SOCKET).await { @@ -555,11 +596,11 @@ async fn init_attestation_components(logger: &Logger, config: &AgentConfig) -> R Ok(()) } -fn wait_for_path_to_exist(logger: &Logger, path: &str, timeout_secs: i32) -> Result<()> { +async fn wait_for_path_to_exist(logger: &Logger, path: &str, timeout_secs: i32) -> Result<()> { let p = Path::new(path); let mut attempts = 0; loop { - std::thread::sleep(std::time::Duration::from_secs(1)); + tokio::time::sleep(std::time::Duration::from_secs(1)).await; if p.exists() { return Ok(()); } @@ -576,22 +617,32 @@ fn wait_for_path_to_exist(logger: &Logger, path: &str, timeout_secs: i32) -> Res Err(anyhow!("wait for {} to exist timeout.", path)) } -fn launch_process( +async fn launch_process( logger: &Logger, path: &str, - args: &Vec<&str>, + mut args: Vec<&str>, + config: Option<&str>, unix_socket_path: &str, timeout_secs: i32, ) -> Result<()> { if !Path::new(path).exists() { - return Err(anyhow!("path {} does not exist.", path)); + bail!("path {} does not exist.", path); } + + if let Some(config_path) = config { + if Path::new(config_path).exists() { + args.push("-c"); + args.push(config_path); + } + } + if !unix_socket_path.is_empty() && Path::new(unix_socket_path).exists() { - fs::remove_file(unix_socket_path)?; + tokio::fs::remove_file(unix_socket_path).await?; } - Command::new(path).args(args).spawn()?; + + tokio::process::Command::new(path).args(args).spawn()?; if !unix_socket_path.is_empty() && timeout_secs > 0 { - wait_for_path_to_exist(logger, unix_socket_path, timeout_secs)?; + wait_for_path_to_exist(logger, unix_socket_path, timeout_secs).await?; } Ok(()) diff --git a/src/agent/testdata/initdata.img b/src/agent/testdata/initdata.img new file mode 100644 index 0000000000..3c5fe59181 Binary files /dev/null and b/src/agent/testdata/initdata.img differ diff --git a/src/runtime/pkg/govmm/qemu/qemu.go b/src/runtime/pkg/govmm/qemu/qemu.go index c883965ff5..86a70e441a 100644 --- a/src/runtime/pkg/govmm/qemu/qemu.go +++ b/src/runtime/pkg/govmm/qemu/qemu.go @@ -15,6 +15,7 @@ package qemu import ( "context" + "encoding/base64" "encoding/json" "fmt" "io" @@ -328,6 +329,9 @@ type Object struct { // SnpIdAuth is the 4096-byte, base64-encoded blob to provide the ‘ID Authentication Information Structure’ // for the SNP_LAUNCH_FINISH command defined in the SEV-SNP firmware ABI (default: all-zero) SnpIdAuth string + + // Raw byte slice of initdata digest + InitdataDigest []byte } // Valid returns true if the Object structure is valid and complete. @@ -353,6 +357,12 @@ func (object Object) Valid() bool { } } +func adjustProperLength(data []byte, len int) []byte { + adjusted := make([]byte, len) + copy(adjusted, data) + return adjusted +} + // QemuParams returns the qemu parameters built out of this Object device. func (object Object) QemuParams(config *Config) []string { var objectParams []string @@ -394,6 +404,14 @@ func (object Object) QemuParams(config *Config) []string { driveParams = append(driveParams, "if=pflash,format=raw,readonly=on") driveParams = append(driveParams, fmt.Sprintf("file=%s", object.File)) case SNPGuest: + if len(object.InitdataDigest) > 0 { + // due to https://github.com/confidential-containers/qemu/blob/amd-snp-202402240000/qapi/qom.json#L926-L929 + // hostdata in SEV-SNP should be exactly 32 bytes + hostdataSlice := adjustProperLength(object.InitdataDigest, 32) + hostdata := base64.StdEncoding.EncodeToString(hostdataSlice) + objectParams = append(objectParams, fmt.Sprintf("host-data=%s", hostdata)) + } + objectParams = append(objectParams, string(object.Type)) objectParams = append(objectParams, fmt.Sprintf("id=%s", object.ID)) objectParams = append(objectParams, fmt.Sprintf("cbitpos=%d", object.CBitPos)) @@ -485,10 +503,21 @@ func getQgsSocketAddress(portNum uint32) SocketAddress { func prepareTDXObject(object Object) string { qgsSocket := getQgsSocketAddress(object.QgsPort) + // due to https://github.com/intel-staging/qemu-tdx/blob/tdx-qemu-upstream-2023.9.21-v8.1.0/qapi/qom.json#L880 + // mrconfigid in TDX should be exactly 48 bytes + + var mrconfigid string + if len(object.InitdataDigest) > 0 { + mrconfigidSlice := adjustProperLength(object.InitdataDigest, 48) + mrconfigid = base64.StdEncoding.EncodeToString(mrconfigidSlice) + + } else { + mrconfigid = "" + } tdxObject := TdxQomObject{ string(object.Type), // qom-type object.ID, // id - "", // mrconfigid + mrconfigid, // mrconfigid "", // mrowner "", // mrownerconfig qgsSocket, // quote-generation-socket diff --git a/src/runtime/pkg/oci/utils.go b/src/runtime/pkg/oci/utils.go index e4f1e562e4..e8f792ef58 100644 --- a/src/runtime/pkg/oci/utils.go +++ b/src/runtime/pkg/oci/utils.go @@ -7,11 +7,16 @@ package oci import ( + "compress/gzip" "context" + "crypto/sha256" + "crypto/sha512" "encoding/base64" "encoding/json" "errors" "fmt" + "hash" + "io" "math" "os" "path/filepath" @@ -21,6 +26,7 @@ import ( "strings" "syscall" + "github.com/BurntSushi/toml" ctrAnnotations "github.com/containerd/containerd/pkg/cri/annotations" podmanAnnotations "github.com/containers/podman/v4/pkg/annotations" specs "github.com/opencontainers/runtime-spec/specs-go" @@ -31,6 +37,7 @@ import ( vc "github.com/kata-containers/kata-containers/src/runtime/virtcontainers" "github.com/kata-containers/kata-containers/src/runtime/pkg/device/config" + kataTypes "github.com/kata-containers/kata-containers/src/runtime/pkg/types" exp "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/experimental" vcAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations" dockershimAnnotations "github.com/kata-containers/kata-containers/src/runtime/virtcontainers/pkg/annotations/dockershim" @@ -485,6 +492,10 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig, return err } + if err := addHypervisorInitdataOverrides(ocispec, config); err != nil { + return err + } + if value, ok := ocispec.Annotations[vcAnnotations.MachineType]; ok { if value != "" { config.HypervisorConfig.HypervisorMachineType = value @@ -556,9 +567,6 @@ func addHypervisorConfigOverrides(ocispec specs.Spec, config *vc.SandboxConfig, config.HypervisorConfig.SGXEPCSize = size } - if initdata, ok := ocispec.Annotations[vcAnnotations.Initdata]; ok { - config.HypervisorConfig.Initdata = initdata - } if err := addHypervisorGPUOverrides(ocispec, config); err != nil { return err @@ -919,6 +927,53 @@ func addHypervisorNetworkOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfi }) } +func addHypervisorInitdataOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig) error { + if value, ok := ocispec.Annotations[vcAnnotations.Initdata]; ok { + if len(value) == 0 { + ociLog.Debug("Initdata annotation set without any value") + return nil + } + b64Reader := base64.NewDecoder(base64.StdEncoding, strings.NewReader(value)) + gzipReader, err := gzip.NewReader(b64Reader) + if err != nil { + return fmt.Errorf("initdata create gzip reader error: %v", err) + } + + initdataToml, err := io.ReadAll(gzipReader) + if err != nil { + return fmt.Errorf("uncompressing initdata with gzip error: %v", err) + } + + initdataStr := string(initdataToml) + var initdata kataTypes.Initdata + if _, err := toml.Decode(initdataStr, &initdata); err != nil { + return fmt.Errorf("parsing initdata annotation failed: %v", err) + } + + var initdataDigest []byte + var h hash.Hash + switch initdata.Algorithm { + case "sha256": + h = sha256.New() + case "sha384": + h = sha512.New384() + case "sha512": + h = sha512.New() + } + + h.Write([]byte(initdataToml)) + initdataDigest = h.Sum(nil) + + ociLog.Debugf("Initdata digest set to: %v", initdataDigest) + + sbConfig.HypervisorConfig.Initdata = initdataStr + + sbConfig.HypervisorConfig.InitdataDigest = initdataDigest + } + + return nil +} + func addRuntimeConfigOverrides(ocispec specs.Spec, sbConfig *vc.SandboxConfig, runtime RuntimeConfig) error { if err := newAnnotationConfiguration(ocispec, vcAnnotations.DisableGuestSeccomp).setBool(func(disableGuestSeccomp bool) { diff --git a/src/runtime/pkg/oci/utils_test.go b/src/runtime/pkg/oci/utils_test.go index c4c91cb590..d096053ec7 100644 --- a/src/runtime/pkg/oci/utils_test.go +++ b/src/runtime/pkg/oci/utils_test.go @@ -771,10 +771,23 @@ func TestAddRemoteHypervisorAnnotations(t *testing.T) { assert.NoError(err) // When initdata specified, remote hypervisor annotations do have the annotation added. - ocispec.Annotations[vcAnnotations.Initdata] = "initdata" + // Note that the initdata annotation parsing logic will extract it into plaintext + ocispec.Annotations[vcAnnotations.Initdata] = "H4sIAFlC92cAAytLLSrOzM9TsFVQMtAz1DNQ4krMSc8vyizJyAWJFWckGpmaKXFFpySWJMZyKSUm6pXk5+YoAeXU1dW5QJhLKTklA4toQX5OZnKlXlFqej6yBABS/5JkcQAAAA==" err = addAnnotations(ocispec, &sbConfig, runtimeConfig) assert.NoError(err) - assert.Equal(sbConfig.HypervisorConfig.Initdata, "initdata") + assert.Equal(sbConfig.HypervisorConfig.Initdata, `version = "0.1.0" +algorithm = "sha256" +[data] +"aa.toml" = ''' +''' + +"cdh.toml" = ''' +''' + +"policy.rego" = ''' +''' +`) + assert.Equal(sbConfig.HypervisorConfig.InitdataDigest, []byte{0xc6, 0x69, 0x4b, 0xb7, 0xa2, 0x9d, 0x6f, 0x37, 0xec, 0x72, 0xa1, 0x55, 0x82, 0xe0, 0x4, 0xb9, 0xf3, 0x14, 0x21, 0x59, 0x68, 0x2d, 0xb8, 0x50, 0x9a, 0x30, 0x44, 0x7, 0x41, 0x9a, 0x49, 0xe5}) // When GPU annotations are specified, remote hypervisor annotations have the annotation added ocispec.Annotations[vcAnnotations.DefaultGPUs] = "-1" @@ -879,7 +892,9 @@ func TestAddRuntimeAnnotations(t *testing.T) { ocispec.Annotations[vcAnnotations.DisableNewNetNs] = "true" ocispec.Annotations[vcAnnotations.InterNetworkModel] = "macvtap" ocispec.Annotations[vcAnnotations.CreateContainerTimeout] = "100" - ocispec.Annotations[vcAnnotations.Initdata] = "initdata" + + // Note that the initdata annotation parsing logic will extract it into plaintext + ocispec.Annotations[vcAnnotations.Initdata] = "H4sIAFlC92cAAytLLSrOzM9TsFVQMtAz1DNQ4krMSc8vyizJyAWJFWckGpmaKXFFpySWJMZyKSUm6pXk5+YoAeXU1dW5QJhLKTklA4toQX5OZnKlXlFqej6yBABS/5JkcQAAAA==" addAnnotations(ocispec, &config, runtimeConfig) assert.Equal(config.DisableGuestSeccomp, true) @@ -887,7 +902,20 @@ func TestAddRuntimeAnnotations(t *testing.T) { assert.Equal(config.NetworkConfig.DisableNewNetwork, true) assert.Equal(config.NetworkConfig.InterworkingModel, vc.NetXConnectMacVtapModel) assert.Equal(config.CreateContainerTimeout, uint64(100)) - assert.Equal(config.HypervisorConfig.Initdata, "initdata") + assert.Equal(config.HypervisorConfig.Initdata, `version = "0.1.0" +algorithm = "sha256" +[data] +"aa.toml" = ''' +''' + +"cdh.toml" = ''' +''' + +"policy.rego" = ''' +''' +`) + assert.Equal(config.HypervisorConfig.InitdataDigest, []byte{0xc6, 0x69, 0x4b, 0xb7, 0xa2, 0x9d, 0x6f, 0x37, 0xec, 0x72, 0xa1, 0x55, 0x82, 0xe0, 0x4, 0xb9, 0xf3, 0x14, 0x21, 0x59, 0x68, 0x2d, 0xb8, 0x50, 0x9a, 0x30, 0x44, 0x7, 0x41, 0x9a, 0x49, 0xe5}) + } func TestRegexpContains(t *testing.T) { diff --git a/src/runtime/pkg/types/types.go b/src/runtime/pkg/types/types.go index 73a255117e..11ec1a5f9a 100644 --- a/src/runtime/pkg/types/types.go +++ b/src/runtime/pkg/types/types.go @@ -10,3 +10,9 @@ const ( KataRuntimeNameRegexp = `io\.containerd\.kata.*\.v2` ContainerdRuntimeTaskPath = "io.containerd.runtime.v2.task" ) + +type Initdata struct { + Version string `toml:"version"` + Algorithm string `toml:"algorithm"` + Data map[string]string `toml:"data"` +} diff --git a/src/runtime/virtcontainers/hypervisor.go b/src/runtime/virtcontainers/hypervisor.go index b6b75d5499..b0ac28b287 100644 --- a/src/runtime/virtcontainers/hypervisor.go +++ b/src/runtime/virtcontainers/hypervisor.go @@ -682,6 +682,14 @@ type HypervisorConfig struct { // Initdata defines the initdata passed into guest when CreateVM Initdata string + // InitdataDigest represents opaque binary data attached to a TEE and typically used + // for Guest attestation. This will be encoded in the format expected by QEMU for each TEE type. + InitdataDigest []byte + + // The initdata image on the host side to store the initdata and be mounted + // as a raw block device to guest + InitdataImage string + // GPU specific annotations (currently only applicable for Remote Hypervisor) //DefaultGPUs specifies the number of GPUs required for the Kata VM DefaultGPUs uint32 diff --git a/src/runtime/virtcontainers/qemu.go b/src/runtime/virtcontainers/qemu.go index 9072170008..71982ce280 100644 --- a/src/runtime/virtcontainers/qemu.go +++ b/src/runtime/virtcontainers/qemu.go @@ -9,7 +9,10 @@ package virtcontainers import ( "bufio" + "bytes" + "compress/gzip" "context" + "encoding/binary" "encoding/hex" "encoding/json" "fmt" @@ -394,6 +397,23 @@ func (q *qemu) createQmpSocket() ([]govmmQemu.QMPSocket, error) { return sockets, nil } +func (q *qemu) buildInitdataDevice(devices []govmmQemu.Device, InitdataImage string) []govmmQemu.Device { + device := govmmQemu.BlockDevice{ + Driver: govmmQemu.VirtioBlock, + Transport: govmmQemu.TransportPCI, + ID: "initdata", + File: InitdataImage, + SCSI: false, + WCE: false, + AIO: govmmQemu.Threads, + Interface: "none", + Format: "raw", + } + + devices = append(devices, device) + return devices +} + func (q *qemu) buildDevices(ctx context.Context, kernelPath string) ([]govmmQemu.Device, *govmmQemu.IOThread, *govmmQemu.Kernel, error) { var devices []govmmQemu.Device @@ -540,6 +560,94 @@ func (q *qemu) createVirtiofsDaemon(sharedPath string) (VirtiofsDaemon, error) { }, nil } +// prepareInitdataImage will create an image with a very simple layout +// +// There will be multiple sectors. The first 8 bytes are Magic number "initdata". +// Then a "length" field of 8 bytes follows (unsigned int64). +// Finally the gzipped initdata toml. The image will be padded to an +// integer multiple of the sector size for alignment. +// +// offset 0 8 16 +// 0 'i' 'n' 'i' 't' 'd' 'a' 't' 'a' | gzip length in le | +// 16 gzip(initdata toml) ... +// (end of the last sector) '\0' paddings +func prepareInitdataImage(initdata string, imagePath string) error { + SectorSize := 512 + var buf bytes.Buffer + gzipper := gzip.NewWriter(&buf) + defer gzipper.Close() + + gzipper.Write([]byte(initdata)) + err := gzipper.Close() + if err != nil { + return fmt.Errorf("failed to compress initdata: %v", err) + } + + compressedInitdata := buf.Bytes() + + compressedInitdataLength := len(compressedInitdata) + lengthBuffer := make([]byte, 8) + binary.LittleEndian.PutUint64(lengthBuffer, uint64(compressedInitdataLength)) + + paddingLength := (compressedInitdataLength+16+SectorSize-1)/SectorSize*SectorSize - (compressedInitdataLength + 16) + paddingBuffer := make([]byte, paddingLength) + + file, err := os.OpenFile(imagePath, os.O_CREATE|os.O_RDWR, 0640) + if err != nil { + return fmt.Errorf("failed to create initdata image: %v", err) + } + defer file.Close() + + _, err = file.Write([]byte("initdata")) + if err != nil { + return fmt.Errorf("failed to write magic number to initdata image: %v", err) + } + + _, err = file.Write(lengthBuffer) + if err != nil { + return fmt.Errorf("failed to write data length to initdata image: %v", err) + } + + _, err = file.Write([]byte(compressedInitdata)) + if err != nil { + return fmt.Errorf("failed to write compressed initdata to initdata image: %v", err) + } + + _, err = file.Write(paddingBuffer) + if err != nil { + return fmt.Errorf("failed to write compressed initdata to initdata image: %v", err) + } + + return nil +} + +func (q *qemu) prepareInitdataMount(config *HypervisorConfig) error { + if len(config.Initdata) == 0 { + q.Logger().Info("No initdata provided. Skip prepare initdata device") + return nil + } + + q.Logger().Info("Start to prepare initdata") + initdataWorkdir := filepath.Join("/run/kata-containers/shared/initdata", q.id) + initdataImagePath := filepath.Join(initdataWorkdir, "data.img") + + err := os.MkdirAll(initdataWorkdir, 0755) + if err != nil { + q.Logger().WithField("initdata", "create initdata image path").WithError(err) + return err + } + + err = prepareInitdataImage(config.Initdata, initdataImagePath) + if err != nil { + q.Logger().WithField("initdata", "prepare initdata image").WithError(err) + return err + } + + config.InitdataImage = initdataImagePath + + return nil +} + // CreateVM is the Hypervisor VM creation implementation for govmmQemu. func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervisorConfig *HypervisorConfig) error { // Save the tracing context @@ -552,6 +660,10 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi return err } + if err := q.prepareInitdataMount(hypervisorConfig); err != nil { + return err + } + machine, err := q.getQemuMachine() if err != nil { return err @@ -650,6 +762,10 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi return err } + if len(hypervisorConfig.Initdata) > 0 { + devices = q.buildInitdataDevice(devices, hypervisorConfig.InitdataImage) + } + // some devices configuration may also change kernel params, make sure this is called afterwards kernel.Params = q.kernelParameters() q.checkBpfEnabled() @@ -681,7 +797,7 @@ func (q *qemu) CreateVM(ctx context.Context, id string, network Network, hypervi Debug: hypervisorConfig.Debug, } - qemuConfig.Devices, qemuConfig.Bios, err = q.arch.appendProtectionDevice(qemuConfig.Devices, firmwarePath, firmwareVolumePath) + qemuConfig.Devices, qemuConfig.Bios, err = q.arch.appendProtectionDevice(qemuConfig.Devices, firmwarePath, firmwareVolumePath, hypervisorConfig.InitdataDigest) if err != nil { return err } @@ -1254,6 +1370,7 @@ func (q *qemu) StopVM(ctx context.Context, waitOnly bool) (err error) { } } } + if q.config.SharedFS == config.VirtioFS || q.config.SharedFS == config.VirtioFSNydus { if err := q.stopVirtiofsDaemon(ctx); err != nil { return err @@ -1318,6 +1435,15 @@ func (q *qemu) cleanupVM() error { }).Debug("successfully removed the non root user") } + // If we have initdata, we should drop initdata image path + hypervisorConfig := q.HypervisorConfig() + if len(hypervisorConfig.Initdata) > 0 { + initdataWorkdir := filepath.Join(string(filepath.Separator), "/run/kata-containers/shared/initdata", q.id) + if err := os.RemoveAll(initdataWorkdir); err != nil { + q.Logger().WithError(err).Warnf("failed to remove initdata work dir %s", initdataWorkdir) + } + } + return nil } diff --git a/src/runtime/virtcontainers/qemu_amd64.go b/src/runtime/virtcontainers/qemu_amd64.go index dd0a929df0..b0c5aa7950 100644 --- a/src/runtime/virtcontainers/qemu_amd64.go +++ b/src/runtime/virtcontainers/qemu_amd64.go @@ -274,7 +274,7 @@ func (q *qemuAmd64) enableProtection() error { } // append protection device -func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) { +func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error) { if q.sgxEPCSize != 0 { devices = append(devices, govmmQemu.Object{ @@ -299,6 +299,7 @@ func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware, Debug: false, File: firmware, FirmwareVolume: firmwareVolume, + InitdataDigest: initdataDigest, }), "", nil case sevProtection: return append(devices, @@ -318,6 +319,7 @@ func (q *qemuAmd64) appendProtectionDevice(devices []govmmQemu.Device, firmware, File: firmware, CBitPos: cpuid.AMDMemEncrypt.CBitPosition, ReducedPhysBits: 1, + InitdataDigest: initdataDigest, } if q.snpIdBlock != "" && q.snpIdAuth != "" { obj.SnpIdBlock = q.snpIdBlock diff --git a/src/runtime/virtcontainers/qemu_amd64_test.go b/src/runtime/virtcontainers/qemu_amd64_test.go index 1425cb38cf..2756cb2be0 100644 --- a/src/runtime/virtcontainers/qemu_amd64_test.go +++ b/src/runtime/virtcontainers/qemu_amd64_test.go @@ -257,7 +257,7 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) { firmware := "tdvf.fd" var bios string var err error - devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []byte("")) assert.NoError(err) // non-protection @@ -265,20 +265,20 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) { // pef protection amd64.(*qemuAmd64).protection = pefProtection - devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []byte("")) assert.Error(err) assert.Empty(bios) // Secure Execution protection amd64.(*qemuAmd64).protection = seProtection - devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []byte("")) assert.Error(err) assert.Empty(bios) // sev protection amd64.(*qemuAmd64).protection = sevProtection - devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []byte("")) assert.NoError(err) assert.Empty(bios) @@ -298,7 +298,7 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) { // snp protection amd64.(*qemuAmd64).protection = snpProtection - devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []uint8(nil)) assert.NoError(err) assert.Empty(bios) @@ -318,18 +318,19 @@ func TestQemuAmd64AppendProtectionDevice(t *testing.T) { // tdxProtection amd64.(*qemuAmd64).protection = tdxProtection - devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = amd64.appendProtectionDevice(devices, firmware, "", []byte("")) assert.NoError(err) assert.Empty(bios) expectedOut = append(expectedOut, govmmQemu.Object{ - Driver: govmmQemu.Loader, - Type: govmmQemu.TDXGuest, - ID: "tdx", - DeviceID: fmt.Sprintf("fd%d", id), - Debug: false, - File: firmware, + Driver: govmmQemu.Loader, + Type: govmmQemu.TDXGuest, + ID: "tdx", + DeviceID: fmt.Sprintf("fd%d", id), + Debug: false, + File: firmware, + InitdataDigest: []byte(""), }, ) diff --git a/src/runtime/virtcontainers/qemu_arch_base.go b/src/runtime/virtcontainers/qemu_arch_base.go index fe330d01a8..0bdf9092d4 100644 --- a/src/runtime/virtcontainers/qemu_arch_base.go +++ b/src/runtime/virtcontainers/qemu_arch_base.go @@ -165,7 +165,7 @@ type qemuArch interface { // This implementation is architecture specific, some archs may need // a firmware, returns a string containing the path to the firmware that should // be used with the -bios option, ommit -bios option if the path is empty. - appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) + appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error) // scans the PCIe space and returns the biggest BAR sizes for 32-bit // and 64-bit addressable memory @@ -920,7 +920,7 @@ func (q *qemuArchBase) setPFlash(p []string) { } // append protection device -func (q *qemuArchBase) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) { +func (q *qemuArchBase) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error) { hvLogger.WithField("arch", runtime.GOARCH).Warnf("Confidential Computing has not been implemented for this architecture") return devices, firmware, nil } diff --git a/src/runtime/virtcontainers/qemu_arm64.go b/src/runtime/virtcontainers/qemu_arm64.go index 373321d2bb..bfe6e2d405 100644 --- a/src/runtime/virtcontainers/qemu_arm64.go +++ b/src/runtime/virtcontainers/qemu_arm64.go @@ -154,7 +154,7 @@ func (q *qemuArm64) enableProtection() error { return nil } -func (q *qemuArm64) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) { +func (q *qemuArm64) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error) { err := q.enableProtection() if err != nil { hvLogger.WithField("arch", runtime.GOARCH).Error(err) diff --git a/src/runtime/virtcontainers/qemu_arm64_test.go b/src/runtime/virtcontainers/qemu_arm64_test.go index 924dd32ba0..aca9497412 100644 --- a/src/runtime/virtcontainers/qemu_arm64_test.go +++ b/src/runtime/virtcontainers/qemu_arm64_test.go @@ -183,42 +183,42 @@ func TestQemuArm64AppendProtectionDevice(t *testing.T) { var err error // no protection - devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Empty(devices) assert.Empty(bios) assert.NoError(err) // PEF protection arm64.(*qemuArm64).protection = pefProtection - devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Empty(devices) assert.Empty(bios) assert.NoError(err) // Secure Execution protection arm64.(*qemuArm64).protection = seProtection - devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Empty(devices) assert.Empty(bios) assert.NoError(err) // SEV protection arm64.(*qemuArm64).protection = sevProtection - devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Empty(devices) assert.Empty(bios) assert.NoError(err) // SNP protection arm64.(*qemuArm64).protection = snpProtection - devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Empty(devices) assert.Empty(bios) assert.NoError(err) // TDX protection arm64.(*qemuArm64).protection = tdxProtection - devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "") + devices, bios, err = arm64.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Empty(devices) assert.Empty(bios) assert.NoError(err) diff --git a/src/runtime/virtcontainers/qemu_ppc64le.go b/src/runtime/virtcontainers/qemu_ppc64le.go index d2e0228c8b..87c2139b2b 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le.go +++ b/src/runtime/virtcontainers/qemu_ppc64le.go @@ -157,7 +157,7 @@ func (q *qemuPPC64le) enableProtection() error { } // append protection device -func (q *qemuPPC64le) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) { +func (q *qemuPPC64le) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error) { switch q.protection { case pefProtection: return append(devices, diff --git a/src/runtime/virtcontainers/qemu_ppc64le_test.go b/src/runtime/virtcontainers/qemu_ppc64le_test.go index 85e1dfe805..7bb79bc0e5 100644 --- a/src/runtime/virtcontainers/qemu_ppc64le_test.go +++ b/src/runtime/virtcontainers/qemu_ppc64le_test.go @@ -60,7 +60,7 @@ func TestQemuPPC64leAppendProtectionDevice(t *testing.T) { var devices []govmmQemu.Device var bios, firmware string var err error - devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "") + devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.NoError(err) //no protection @@ -68,31 +68,31 @@ func TestQemuPPC64leAppendProtectionDevice(t *testing.T) { //Secure Execution protection ppc64le.(*qemuPPC64le).protection = seProtection - devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "") + devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Error(err) assert.Empty(bios) //SEV protection ppc64le.(*qemuPPC64le).protection = sevProtection - devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "") + devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Error(err) assert.Empty(bios) //SNP protection ppc64le.(*qemuPPC64le).protection = snpProtection - devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "") + devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Error(err) assert.Empty(bios) //TDX protection ppc64le.(*qemuPPC64le).protection = tdxProtection - devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "") + devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Error(err) assert.Empty(bios) //PEF protection ppc64le.(*qemuPPC64le).protection = pefProtection - devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "") + devices, bios, err = ppc64le.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.NoError(err) assert.Empty(bios) diff --git a/src/runtime/virtcontainers/qemu_s390x.go b/src/runtime/virtcontainers/qemu_s390x.go index a333c3bb93..5f00e9ea76 100644 --- a/src/runtime/virtcontainers/qemu_s390x.go +++ b/src/runtime/virtcontainers/qemu_s390x.go @@ -344,7 +344,7 @@ func (q *qemuS390x) enableProtection() error { // appendProtectionDevice appends a QEMU object for Secure Execution. // Takes devices and returns updated version. Takes BIOS and returns it (no modification on s390x). -func (q *qemuS390x) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string) ([]govmmQemu.Device, string, error) { +func (q *qemuS390x) appendProtectionDevice(devices []govmmQemu.Device, firmware, firmwareVolume string, initdataDigest []byte) ([]govmmQemu.Device, string, error) { switch q.protection { case seProtection: return append(devices, diff --git a/src/runtime/virtcontainers/qemu_s390x_test.go b/src/runtime/virtcontainers/qemu_s390x_test.go index 24a67bdd9e..db88b4690f 100644 --- a/src/runtime/virtcontainers/qemu_s390x_test.go +++ b/src/runtime/virtcontainers/qemu_s390x_test.go @@ -111,7 +111,7 @@ func TestQemuS390xAppendProtectionDevice(t *testing.T) { var devices []govmmQemu.Device var bios, firmware string var err error - devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "") + devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.NoError(err) // no protection @@ -119,32 +119,32 @@ func TestQemuS390xAppendProtectionDevice(t *testing.T) { // PEF protection s390x.(*qemuS390x).protection = pefProtection - devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "") + devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Error(err) assert.Empty(bios) // TDX protection s390x.(*qemuS390x).protection = tdxProtection - devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "") + devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Error(err) assert.Empty(bios) // SEV protection s390x.(*qemuS390x).protection = sevProtection - devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "") + devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Error(err) assert.Empty(bios) // SNP protection s390x.(*qemuS390x).protection = snpProtection - devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "") + devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.Error(err) assert.Empty(bios) // Secure Execution protection s390x.(*qemuS390x).protection = seProtection - devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "") + devices, bios, err = s390x.appendProtectionDevice(devices, firmware, "", []byte(nil)) assert.NoError(err) assert.Empty(bios) diff --git a/src/runtime/virtcontainers/qemu_test.go b/src/runtime/virtcontainers/qemu_test.go index 4c51517ed5..53b0ff716c 100644 --- a/src/runtime/virtcontainers/qemu_test.go +++ b/src/runtime/virtcontainers/qemu_test.go @@ -8,9 +8,14 @@ package virtcontainers import ( + "bytes" + "compress/gzip" "context" + "encoding/binary" "fmt" + "io" "os" + "path" "path/filepath" "testing" @@ -770,3 +775,53 @@ func TestQemuStartSandbox(t *testing.T) { err = q.StartVM(context.Background(), 10) assert.Error(err) } + +func TestPrepareInitdataImage(t *testing.T) { + tests := []struct { + name string + content string + }{ + { + "create an initdata image", + "some content", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + imageDir := t.TempDir() + imagePath := path.Join(imageDir, "initdata.img") + err := prepareInitdataImage(tt.content, imagePath) + if err != nil { + t.Errorf("prepareInitdataImage() error = %v", err) + } + defer os.Remove(imagePath) + + fullContent, err := os.ReadFile(imagePath) + if err != nil { + t.Errorf("read initdata image failed: %v", err) + } + + magicNumber := fullContent[:8] + if string(magicNumber) != "initdata" { + t.Errorf("initdata magic number is not correct, got %s, want initdata", string(magicNumber)) + } + + length := binary.LittleEndian.Uint64(fullContent[8:16]) + contentSlice := fullContent[16 : 16+length] + gzipReader, err := gzip.NewReader(bytes.NewBuffer(contentSlice)) + if err != nil { + t.Errorf("read gzipped initdata failed: %v", err) + } + defer gzipReader.Close() + + content, err := io.ReadAll(gzipReader) + if err != nil { + t.Errorf("read gzipped initdata failed: %v", err) + } + + if string(content) != tt.content { + t.Errorf("initdata content is not correct, got %s, want %s", string(content), tt.content) + } + }) + } +} diff --git a/tests/integration/kubernetes/confidential_common.sh b/tests/integration/kubernetes/confidential_common.sh index 75fb14e854..648d544151 100644 --- a/tests/integration/kubernetes/confidential_common.sh +++ b/tests/integration/kubernetes/confidential_common.sh @@ -171,3 +171,39 @@ function create_coco_pod_yaml() { set_node "${kata_pod}" "$node" fi } + +# This function creates pod yaml. Parameters +# - $1: image reference +# - $2: annotation `io.katacontainers.config.hypervisor.kernel_params` +# - $3: anootation `io.katacontainers.config.runtime.cc_init_data` +# - $4: node +function create_coco_pod_yaml_with_annotations() { + image=$1 + kernel_params_annotation_value=${2:-} + cc_initdata_annotation_value=${3:-} + node=${4:-} + + kernel_params_annotation_key="io.katacontainers.config.hypervisor.kernel_params" + cc_initdata_annotation_key="io.katacontainers.config.runtime.cc_init_data" + + # Note: this is not local as we use it in the caller test + kata_pod="$(new_pod_config "$image" "kata-${KATA_HYPERVISOR}")" + set_container_command "${kata_pod}" "0" "sleep" "30" + + # Set annotations + set_metadata_annotation "${kata_pod}" \ + "io.containerd.cri.runtime-handler" \ + "kata-${KATA_HYPERVISOR}" + set_metadata_annotation "${kata_pod}" \ + "${kernel_params_annotation_key}" \ + "${kernel_params_annotation_value}" + set_metadata_annotation "${kata_pod}" \ + "${cc_initdata_annotation_key}" \ + "${cc_initdata_annotation_value}" + + add_allow_all_policy_to_yaml "${kata_pod}" + + if [ -n "$node" ]; then + set_node "${kata_pod}" "$node" + fi +} \ No newline at end of file diff --git a/tests/integration/kubernetes/k8s-initdata.bats b/tests/integration/kubernetes/k8s-initdata.bats new file mode 100644 index 0000000000..fe42c78a45 --- /dev/null +++ b/tests/integration/kubernetes/k8s-initdata.bats @@ -0,0 +1,197 @@ +#!/usr/bin/env bats +# Copyright (c) 2025 Alibaba Cloud +# +# SPDX-License-Identifier: Apache-2.0 +# + +# This test we will test initdata in the following logic +# 1. Enable image signature verification via kernel commandline +# 2. Set Trustee address via initdata +# 3. Pull an image from a banned registry +# 4. Check if the pulling fails with log `image security validation failed`, +# the initdata works. +# +# Note that if initdata does not work, the pod still fails to launch (hang at +# CreatingContainer status). The error information is +# `[CDH] [ERROR]: Get Resource failed` which internally means that the KBS URL +# has not been set correctly. +# +# TODO: After https://github.com/kata-containers/kata-containers/issues/9266 +# is resolved, both KBS URI and policy URI can be set via initdata. + +load "${BATS_TEST_DIRNAME}/lib.sh" +load "${BATS_TEST_DIRNAME}/confidential_common.sh" + +export KBS="${KBS:-false}" +export KATA_HYPERVISOR="${KATA_HYPERVISOR:-qemu}" + +setup() { + if ! is_confidential_runtime_class; then + skip "Test not supported for ${KATA_HYPERVISOR}." + fi + + [ "${SNAPSHOTTER:-}" = "nydus" ] || skip "None snapshotter was found but this test requires one" + + setup_common || die "setup_common failed" + + FAIL_TEST_IMAGE="quay.io/prometheus/busybox:latest" + + SECURITY_POLICY_KBS_URI="kbs:///default/security-policy/test" +} + +function setup_kbs_image_policy_for_initdata() { + if [ "${KBS}" = "false" ]; then + skip "Test skipped as KBS not setup" + fi + + export CURRENT_ARCH=$(uname -m) + if [ "${CURRENT_ARCH}" != "x86_64" ]; then + skip "Test skipped as only x86-64 supports, while current platform is ${CURRENT_ARCH}" + fi + + # TODO: Enable for more archs + case "$KATA_HYPERVISOR" in + "qemu-tdx"|"qemu-coco-dev") + ;; + *) + skip "Test not supported for ${KATA_HYPERVISOR}." + ;; + esac + + default_policy="${1:-insecureAcceptAnything}" + policy_json=$(cat << EOF +{ + "default": [ + { + "type": "${default_policy}" + } + ], + "transports": { + "docker": { + "quay.io/prometheus": [ + { + "type": "reject" + } + ] + } + } +} +EOF + ) + + if ! is_confidential_hardware; then + kbs_set_allow_all_resources + fi + + kbs_set_resource "default" "security-policy" "test" "${policy_json}" +} + +@test "Test that creating a container from an rejected image configured by initdata, fails according to policy reject" { + setup_kbs_image_policy_for_initdata + + CC_KBS_ADDRESS=$(kbs_k8s_svc_http_addr) + + kernel_parameter="agent.image_policy_file=${SECURITY_POLICY_KBS_URI} agent.enable_signature_verification=true" + initdata_annotation=$(gzip -c << EOF | base64 -w0 +version = "0.1.0" +algorithm = "sha256" +[data] +"aa.toml" = ''' +[token_configs] +[token_configs.coco_as] +# TODO: we should fix this on AA side to set this a default value if not set. +url = "${CC_KBS_ADDRESS}" + +[token_configs.kbs] +url = "${CC_KBS_ADDRESS}" +''' + +"cdh.toml" = ''' +[kbc] +name = "cc_kbc" +url = "${CC_KBS_ADDRESS}" +''' + +"policy.rego" = ''' +# Copyright (c) 2023 Microsoft Corporation +# +# SPDX-License-Identifier: Apache-2.0 +# + +package agent_policy + +default AddARPNeighborsRequest := true +default AddSwapRequest := true +default CloseStdinRequest := true +default CopyFileRequest := true +default CreateContainerRequest := true +default CreateSandboxRequest := true +default DestroySandboxRequest := true +default ExecProcessRequest := true +default GetMetricsRequest := true +default GetOOMEventRequest := true +default GuestDetailsRequest := true +default ListInterfacesRequest := true +default ListRoutesRequest := true +default MemHotplugByProbeRequest := true +default OnlineCPUMemRequest := true +default PauseContainerRequest := true +default PullImageRequest := true +default ReadStreamRequest := true +default RemoveContainerRequest := true +default RemoveStaleVirtiofsShareMountsRequest := true +default ReseedRandomDevRequest := true +default ResumeContainerRequest := true +default SetGuestDateTimeRequest := true +default SetPolicyRequest := true +default SignalProcessRequest := true +default StartContainerRequest := true +default StartTracingRequest := true +default StatsContainerRequest := true +default StopTracingRequest := true +default TtyWinResizeRequest := true +default UpdateContainerRequest := true +default UpdateEphemeralMountsRequest := true +default UpdateInterfaceRequest := true +default UpdateRoutesRequest := true +default WaitProcessRequest := true +default WriteStreamRequest := true +''' +EOF + ) + create_coco_pod_yaml_with_annotations "${FAIL_TEST_IMAGE}" "${kernel_parameter}" "${initdata_annotation}" "${node}" + + # For debug sake + echo "Pod ${kata_pod}: $(cat ${kata_pod})" + + assert_pod_fail "${kata_pod}" + assert_logs_contain "${node}" kata "${node_start_time}" "image security validation failed" +} + +@test "Test that creating a container from an rejected image not configured by initdata, fails according to CDH error" { + setup_kbs_image_policy_for_initdata + + kernel_parameter="agent.image_policy_file=${SECURITY_POLICY_KBS_URI} agent.enable_signature_verification=true" + + create_coco_pod_yaml_with_annotations "${FAIL_TEST_IMAGE}" "${kernel_parameter}" "" "${node}" + + # For debug sake + echo "Pod ${kata_pod}: $(cat ${kata_pod})" + + if k8s_create_pod "${kata_pod}" ; then + echo "Expected failure, but pod ${kata_pod} launched successfully." + return 1 + fi + + assert_logs_contain "${node}" kata "${node_start_time}" "\[CDH\] \[ERROR\]: Get Resource failed" +} + +teardown() { + if ! is_confidential_runtime_class; then + skip "Test not supported for ${KATA_HYPERVISOR}." + fi + + [ "${SNAPSHOTTER:-}" = "nydus" ] || skip "None snapshotter was found but this test requires one" + + teardown_common "${node}" "${node_start_time:-}" +} diff --git a/tests/integration/kubernetes/run_kubernetes_tests.sh b/tests/integration/kubernetes/run_kubernetes_tests.sh index a62dcfe073..2bfc296473 100755 --- a/tests/integration/kubernetes/run_kubernetes_tests.sh +++ b/tests/integration/kubernetes/run_kubernetes_tests.sh @@ -37,6 +37,7 @@ else "k8s-guest-pull-image-encrypted.bats" \ "k8s-guest-pull-image-authenticated.bats" \ "k8s-guest-pull-image-signature.bats" \ + "k8s-initdata.bats" \ "k8s-confidential-attestation.bats" \ )