diff --git a/src/tools/genpolicy/genpolicy-settings.json b/src/tools/genpolicy/genpolicy-settings.json index 56f8154fc..f2d7abb07 100644 --- a/src/tools/genpolicy/genpolicy-settings.json +++ b/src/tools/genpolicy/genpolicy-settings.json @@ -34,6 +34,13 @@ "io.katacontainers.pkg.oci.bundle_path": "/run/containerd/io.containerd.runtime.v2.task/k8s.io/$(bundle-id)" }, "Process": { + "NoNewPrivileges": true, + "User": { + "UID": 65535, + "GID": 65535, + "AdditionalGids": [], + "Username": "" + }, "Args": [ "/pause" ] @@ -315,7 +322,8 @@ "oci_version": "1.1.0" }, "cluster_config": { - "pause_container_image": "mcr.microsoft.com/oss/kubernetes/pause:3.6" + "pause_container_image": "mcr.microsoft.com/oss/kubernetes/pause:3.6", + "guest_pull": false }, "request_defaults": { "CreateContainerRequest": { diff --git a/src/tools/genpolicy/rules.rego b/src/tools/genpolicy/rules.rego index 015571d40..86eb47e0b 100644 --- a/src/tools/genpolicy/rules.rego +++ b/src/tools/genpolicy/rules.rego @@ -694,11 +694,8 @@ allow_user(p_process, i_process) { print("allow_user: input uid =", i_user.UID, "policy uid =", p_user.UID) p_user.UID == i_user.UID - # TODO: track down the reason for registry.k8s.io/pause:3.9 being - # executed with gid = 0 despite having "65535:65535" in its container image - # config. - #print("allow_user: input gid =", i_user.GID, "policy gid =", p_user.GID) - #p_user.GID == i_user.GID + print("allow_user: input gid =", i_user.GID, "policy gid =", p_user.GID) + p_user.GID == i_user.GID # TODO: compare the additionalGids field too after computing its value # based on /etc/passwd and /etc/group from the container image. diff --git a/src/tools/genpolicy/src/cronjob.rs b/src/tools/genpolicy/src/cronjob.rs index 3719a89c2..2f6d4e2ff 100644 --- a/src/tools/genpolicy/src/cronjob.rs +++ b/src/tools/genpolicy/src/cronjob.rs @@ -148,10 +148,11 @@ impl yaml::K8sResource for CronJob { false } - fn get_process_fields(&self, process: &mut policy::KataProcess) { + fn get_process_fields(&self, process: &mut policy::KataProcess, must_check_passwd: &mut bool) { yaml::get_process_fields( process, &self.spec.jobTemplate.spec.template.spec.securityContext, + must_check_passwd, ); } diff --git a/src/tools/genpolicy/src/daemon_set.rs b/src/tools/genpolicy/src/daemon_set.rs index 012e25adb..159fd67c9 100644 --- a/src/tools/genpolicy/src/daemon_set.rs +++ b/src/tools/genpolicy/src/daemon_set.rs @@ -148,8 +148,12 @@ impl yaml::K8sResource for DaemonSet { .or_else(|| Some(String::new())) } - fn get_process_fields(&self, process: &mut policy::KataProcess) { - yaml::get_process_fields(process, &self.spec.template.spec.securityContext); + fn get_process_fields(&self, process: &mut policy::KataProcess, must_check_passwd: &mut bool) { + yaml::get_process_fields( + process, + &self.spec.template.spec.securityContext, + must_check_passwd, + ); } fn get_sysctls(&self) -> Vec { diff --git a/src/tools/genpolicy/src/deployment.rs b/src/tools/genpolicy/src/deployment.rs index 3a59bccb7..5d0fdf22e 100644 --- a/src/tools/genpolicy/src/deployment.rs +++ b/src/tools/genpolicy/src/deployment.rs @@ -146,8 +146,12 @@ impl yaml::K8sResource for Deployment { .or_else(|| Some(String::new())) } - fn get_process_fields(&self, process: &mut policy::KataProcess) { - yaml::get_process_fields(process, &self.spec.template.spec.securityContext); + fn get_process_fields(&self, process: &mut policy::KataProcess, must_check_passwd: &mut bool) { + yaml::get_process_fields( + process, + &self.spec.template.spec.securityContext, + must_check_passwd, + ); } fn get_sysctls(&self) -> Vec { diff --git a/src/tools/genpolicy/src/job.rs b/src/tools/genpolicy/src/job.rs index f5a723a1a..0e4cbf82c 100644 --- a/src/tools/genpolicy/src/job.rs +++ b/src/tools/genpolicy/src/job.rs @@ -111,8 +111,12 @@ impl yaml::K8sResource for Job { false } - fn get_process_fields(&self, process: &mut policy::KataProcess) { - yaml::get_process_fields(process, &self.spec.template.spec.securityContext); + fn get_process_fields(&self, process: &mut policy::KataProcess, must_check_passwd: &mut bool) { + yaml::get_process_fields( + process, + &self.spec.template.spec.securityContext, + must_check_passwd, + ); } fn get_sysctls(&self) -> Vec { diff --git a/src/tools/genpolicy/src/pod.rs b/src/tools/genpolicy/src/pod.rs index 18f5ee5ba..4604f4948 100644 --- a/src/tools/genpolicy/src/pod.rs +++ b/src/tools/genpolicy/src/pod.rs @@ -296,6 +296,9 @@ struct SecurityContext { #[serde(skip_serializing_if = "Option::is_none")] runAsUser: Option, + #[serde(skip_serializing_if = "Option::is_none")] + runAsGroup: Option, + #[serde(skip_serializing_if = "Option::is_none")] seccompProfile: Option, } @@ -318,6 +321,12 @@ pub struct PodSecurityContext { #[serde(skip_serializing_if = "Option::is_none")] pub sysctls: Option>, + + #[serde(skip_serializing_if = "Option::is_none")] + pub runAsGroup: Option, + + #[serde(skip_serializing_if = "Option::is_none")] + pub allowPrivilegeEscalation: Option, // TODO: additional fields. } @@ -902,8 +911,8 @@ impl yaml::K8sResource for Pod { .or_else(|| Some(String::new())) } - fn get_process_fields(&self, process: &mut policy::KataProcess) { - yaml::get_process_fields(process, &self.spec.securityContext); + fn get_process_fields(&self, process: &mut policy::KataProcess, must_check_passwd: &mut bool) { + yaml::get_process_fields(process, &self.spec.securityContext, must_check_passwd); } fn get_sysctls(&self) -> Vec { @@ -961,7 +970,25 @@ impl Container { if let Some(context) = &self.securityContext { if let Some(uid) = context.runAsUser { process.User.UID = uid.try_into().unwrap(); + // Changing the UID can break the GID mapping + // if a /etc/passwd file is present. + // The proper GID is determined, in order of preference: + // 1. the securityContext runAsGroup field (applied last in code) + // 2. lacking an explicit runAsGroup, /etc/passwd (get_gid_from_passwd_uid) + // 3. fall back to pod-level GID if there is one (unwrap_or) + // + // This behavior comes from the containerd runtime implementation: + // WithUser https://github.com/containerd/containerd/blob/main/pkg/oci/spec_opts.go#L592 + process.User.GID = self + .registry + .get_gid_from_passwd_uid(process.User.UID) + .unwrap_or(process.User.GID); } + + if let Some(gid) = context.runAsGroup { + process.User.GID = gid.try_into().unwrap(); + } + if let Some(allow) = context.allowPrivilegeEscalation { process.NoNewPrivileges = !allow } @@ -1008,6 +1035,7 @@ pub async fn add_pause_container(containers: &mut Vec, config: &Confi privileged: None, capabilities: None, runAsUser: None, + runAsGroup: None, seccompProfile: None, }), ..Default::default() diff --git a/src/tools/genpolicy/src/policy.rs b/src/tools/genpolicy/src/policy.rs index b9e7e187c..e55ef4951 100644 --- a/src/tools/genpolicy/src/policy.rs +++ b/src/tools/genpolicy/src/policy.rs @@ -425,6 +425,10 @@ pub struct CommonData { pub struct ClusterConfig { /// Pause container image reference. pub pause_container_image: String, + /// Whether or not the cluster uses the guest pull mechanism + /// In guest pull, host can't look into layers to determine GID. + /// See issue https://github.com/kata-containers/kata-containers/issues/11162 + pub guest_pull: bool, } /// Struct used to read data from the settings file and copy that data into the policy. @@ -713,7 +717,17 @@ impl AgentPolicy { substitute_args_env_variables(&mut process.Args, &process.Env); c_settings.get_process_fields(&mut process); - resource.get_process_fields(&mut process); + let mut must_check_passwd = false; + resource.get_process_fields(&mut process, &mut must_check_passwd); + + // The actual GID of the process run by the CRI + // Depends on the contents of /etc/passwd in the container + if must_check_passwd { + process.User.GID = yaml_container + .registry + .get_gid_from_passwd_uid(process.User.UID) + .unwrap_or(0); + } yaml_container.get_process_fields(&mut process); process diff --git a/src/tools/genpolicy/src/registry.rs b/src/tools/genpolicy/src/registry.rs index e36350bb5..654f698ba 100644 --- a/src/tools/genpolicy/src/registry.rs +++ b/src/tools/genpolicy/src/registry.rs @@ -36,6 +36,8 @@ pub struct Container { pub image: String, pub config_layer: DockerConfigLayer, pub image_layers: Vec, + pub passwd: String, + pub group: String, } /// Image config layer properties. @@ -71,6 +73,7 @@ pub struct ImageLayer { pub diff_id: String, pub verity_hash: String, pub passwd: String, + pub group: String, } /// See https://docs.docker.com/reference/dockerfile/#volume. @@ -98,17 +101,35 @@ struct PasswdRecord { pub shell: String, } +/// A single record in a Unix group file. +#[derive(Debug)] +struct GroupRecord { + pub name: String, + #[allow(dead_code)] + pub validate: bool, + pub gid: u32, + #[allow(dead_code)] + pub user_list: String, +} + /// Path to /etc/passwd in a container layer's tar file. const PASSWD_FILE_TAR_PATH: &str = "etc/passwd"; +/// Path to /etc/group in a container layer's tar file. +const GROUP_FILE_TAR_PATH: &str = "etc/group"; + /// Path to a file indicating a whiteout of the /etc/passwd file in a container /// layer's tar file (i.e., /etc/passwd was deleted in the layer). const PASSWD_FILE_WHITEOUT_TAR_PATH: &str = "etc/.wh.passwd"; +/// Path to a file indicating a whiteout of the /etc/group file in a container +/// layer's tar file (i.e., /etc/group was deleted in the layer). +const GROUP_FILE_WHITEOUT_TAR_PATH: &str = "etc/.wh.group"; + /// A marker used to track whether a particular container layer has had its -/// /etc/passwd file deleted, and thus any such files read from previous, lower +/// /etc/* file deleted, and thus any such files read from previous, lower /// layers should be discarded. -const WHITEOUT_MARKER: &str = "WHITEOUT"; +pub const WHITEOUT_MARKER: &str = "WHITEOUT"; impl Container { pub async fn new(config: &Config, image: &str) -> Result { @@ -153,10 +174,35 @@ impl Container { .await .unwrap(); + // Find the last layer with an /etc/* file, respecting whiteouts. + let mut passwd = String::new(); + let mut group = String::new(); + // Nydus/guest_pull doesn't make available passwd/group files from layers properly. + // See issue https://github.com/kata-containers/kata-containers/issues/11162 + if !config.settings.cluster_config.guest_pull { + for layer in &image_layers { + if layer.passwd == WHITEOUT_MARKER { + passwd = String::new(); + } else if !layer.passwd.is_empty() { + passwd = layer.passwd.clone(); + } + + if layer.group == WHITEOUT_MARKER { + group = String::new(); + } else if !layer.group.is_empty() { + group = layer.group.clone(); + } + } + } else { + info!("Guest pull is enabled, skipping passwd/group file parsing"); + } + Ok(Container { image: image_string, config_layer, image_layers, + passwd, + group, }) } Err(oci_client::errors::OciDistributionError::AuthenticationFailure(message)) => { @@ -171,6 +217,110 @@ impl Container { } } + pub fn get_gid_from_passwd_uid(&self, uid: u32) -> Result { + if self.passwd.is_empty() { + return Err(anyhow!( + "No /etc/passwd file is available, unable to parse gids from uid" + )); + } + match parse_passwd_file(&self.passwd) { + Ok(records) => { + if let Some(record) = records.iter().find(|&r| r.uid == uid) { + Ok(record.gid) + } else { + Err(anyhow!("Failed to find uid {} in /etc/passwd", uid)) + } + } + Err(inner_e) => Err(anyhow!("Failed to parse /etc/passwd - error {inner_e}")), + } + } + + pub fn get_uid_gid_from_passwd_user(&self, user: String) -> Result<(u32, u32)> { + if user.is_empty() || self.passwd.is_empty() { + return Ok((0, 0)); + } + match parse_passwd_file(&self.passwd) { + Ok(records) => { + if let Some(record) = records.iter().find(|&r| r.user == user) { + Ok((record.uid, record.gid)) + } else { + Err(anyhow!("Failed to find user {} in /etc/passwd", user)) + } + } + Err(inner_e) => { + warn!("Failed to parse /etc/passwd - error {inner_e}, using uid = gid = 0"); + Ok((0, 0)) + } + } + } + + fn get_gid_from_group_name(&self, name: &str) -> Result { + if self.group.is_empty() { + return Err(anyhow!( + "No /etc/group file is available, unable to parse gids from group name" + )); + } + match parse_group_file(&self.group) { + Ok(records) => { + if let Some(record) = records.iter().find(|&r| r.name == name) { + Ok(record.gid) + } else { + Err(anyhow!("Failed to find name {} in /etc/group", name)) + } + } + Err(inner_e) => Err(anyhow!("Failed to parse /etc/group - error {inner_e}")), + } + } + + fn parse_user_string(&self, user: &str) -> u32 { + if user.is_empty() { + return 0; + } + + match user.parse::() { + Ok(uid) => uid, + // If the user is not a number, interpret it as a user name. + Err(outer_e) => { + debug!( + "Failed to parse {} as u32, using it as a user name - error {outer_e}", + user + ); + let (uid, _) = self + .get_uid_gid_from_passwd_user(user.to_string().clone()) + .unwrap_or((0, 0)); + uid + } + } + } + + fn parse_group_string(&self, group: &str) -> u32 { + if group.is_empty() { + return 0; + } + + match group.parse::() { + Ok(id) => { + warn!( + concat!( + "Parsed gid {} from OCI container image config, but not using it. ", + "GIDs are only picked up by the runtime from /etc/passwd." + ), + id + ); + 0 + } + // If the group is not a number, interpret it as a group name. + Err(outer_e) => { + debug!( + "Failed to parse {} as u32, using it as a group name - error {outer_e}", + group + ); + + self.get_gid_from_group_name(group).unwrap_or(0) + } + } + } + // Convert Docker image config to policy data. pub fn get_process( &self, @@ -188,9 +338,14 @@ impl Container { * 2. Contain only a UID * 3. Contain a UID:GID pair, in that format * 4. Contain a user name, which we need to translate into a UID/GID pair - * 5. Be erroneus, somehow + * 5. Contain a (user name:group name) pair, which we need to translate into a UID/GID pair + * 6. Be erroneus, somehow */ if let Some(image_user) = &docker_config.User { + if self.passwd.is_empty() { + warn!("No /etc/passwd file is available, unable to parse gids from user"); + } + if !image_user.is_empty() { if image_user.contains(':') { debug!("Splitting Docker config user = {:?}", image_user); @@ -203,61 +358,23 @@ impl Container { ); } else { debug!("Parsing uid from user[0] = {}", &user[0]); - match user[0].parse() { - Ok(id) => process.User.UID = id, - Err(e) => { - warn!( - "Failed to parse {} as u32, using uid = 0 - error {e}", - &user[0] - ); - } - } + process.User.UID = self.parse_user_string(user[0]); debug!("Parsing gid from user[1] = {:?}", user[1]); - match user[1].parse() { - Ok(id) => process.User.GID = id, - Err(e) => { - warn!( - "Failed to parse {} as u32, using gid = 0 - error {e}", - &user[0] - ); - } - } + process.User.GID = self.parse_group_string(user[1]); + + debug!( + "Overriding OCI container GID with UID:GID mapping from /etc/passwd" + ); + process.User.GID = + self.get_gid_from_passwd_uid(process.User.UID).unwrap_or(0); } } else { - match image_user.parse::() { - Ok(uid) => process.User.UID = uid, - Err(outer_e) => { - // Find the last layer with an /etc/passwd file, - // respecting whiteouts. - let mut passwd = "".to_string(); - for layer in self.get_image_layers() { - if !layer.passwd.is_empty() { - passwd = layer.passwd - } else if layer.passwd == WHITEOUT_MARKER { - passwd = "".to_string(); - } - } + debug!("Parsing uid from image_user = {}", image_user); + process.User.UID = self.parse_user_string(image_user); - if passwd.is_empty() { - warn!("Failed to parse {} as u32 - error {outer_e} - and no /etc/passwd file is available, using uid = gid = 0", image_user); - } else { - match parse_passwd_file(passwd) { - Ok(records) => { - if let Some(record) = - records.iter().find(|&r| r.user == *image_user) - { - process.User.UID = record.uid; - process.User.GID = record.gid; - } - } - Err(inner_e) => { - warn!("Failed to parse {} as u32 - error {outer_e} - and failed to parse /etc/passwd - error {inner_e}, using uid = gid = 0", image_user); - } - } - } - } - } + debug!("Using UID:GID mapping from /etc/passwd"); + process.User.GID = self.get_gid_from_passwd_uid(process.User.UID).unwrap_or(0); } } } @@ -347,7 +464,7 @@ async fn get_image_layers( || layer.media_type.eq(manifest::IMAGE_LAYER_GZIP_MEDIA_TYPE) { if layer_index < config_layer.rootfs.diff_ids.len() { - let (verity_hash, passwd) = get_verity_and_users( + let (verity_hash, passwd, group) = get_verity_and_users( layers_cache_file_path.clone(), client, reference, @@ -359,6 +476,7 @@ async fn get_image_layers( diff_id: config_layer.rootfs.diff_ids[layer_index].clone(), verity_hash: verity_hash.to_owned(), passwd: passwd.to_owned(), + group: group.to_owned(), }); } else { return Err(anyhow!("Too many Docker gzip layers")); @@ -377,7 +495,7 @@ async fn get_verity_and_users( reference: &Reference, layer_digest: &str, diff_id: &str, -) -> Result<(String, String)> { +) -> Result<(String, String, String)> { let temp_dir = tempfile::tempdir_in(".")?; let base_dir = temp_dir.path(); // Use file names supported by both Linux and Windows. @@ -390,14 +508,13 @@ async fn get_verity_and_users( let mut verity_hash = "".to_string(); let mut passwd = "".to_string(); + let mut group = "".to_string(); let mut error_message = "".to_string(); let mut error = false; // get value from store and return if it exists if let Some(path) = layers_cache_file_path.as_ref() { - let res = read_verity_and_users_from_store(path, diff_id)?; - verity_hash = res.0; - passwd = res.1; + (verity_hash, passwd, group) = read_verity_and_users_from_store(path, diff_id)?; info!("Using cache file"); info!("dm-verity root hash: {verity_hash}"); } @@ -424,10 +541,15 @@ async fn get_verity_and_users( error = true; } Ok(res) => { - verity_hash = res.0; - passwd = res.1; + (verity_hash, passwd, group) = res; if let Some(path) = layers_cache_file_path.as_ref() { - add_verity_and_users_to_store(path, diff_id, &verity_hash, &passwd)?; + add_verity_and_users_to_store( + path, + diff_id, + &verity_hash, + &passwd, + &group, + )?; } info!("dm-verity root hash: {verity_hash}"); } @@ -443,7 +565,7 @@ async fn get_verity_and_users( } bail!(error_message); } - Ok((verity_hash, passwd)) + Ok((verity_hash, passwd, group)) } // the store is a json file that matches layer hashes to verity hashes @@ -452,6 +574,7 @@ pub fn add_verity_and_users_to_store( diff_id: &str, verity_hash: &str, passwd: &str, + group: &str, ) -> Result<()> { // open the json file in read mode, create it if it doesn't exist let read_file = OpenOptions::new() @@ -473,6 +596,7 @@ pub fn add_verity_and_users_to_store( diff_id: diff_id.to_string(), verity_hash: verity_hash.to_string(), passwd: passwd.to_string(), + group: group.to_string(), }); // Serialize in pretty format @@ -500,13 +624,13 @@ pub fn add_verity_and_users_to_store( pub fn read_verity_and_users_from_store( cache_file: &str, diff_id: &str, -) -> Result<(String, String)> { +) -> Result<(String, String, String)> { match OpenOptions::new().read(true).open(cache_file) { Ok(file) => match serde_json::from_reader(file) { Result::, _>::Ok(layers) => { for layer in layers { if layer.diff_id == diff_id { - return Ok((layer.verity_hash, layer.passwd)); + return Ok((layer.verity_hash, layer.passwd, layer.group)); } } } @@ -519,7 +643,7 @@ pub fn read_verity_and_users_from_store( } } - Ok((String::new(), String::new())) + Ok((String::new(), String::new(), String::new())) } async fn create_decompressed_layer_file( @@ -558,7 +682,7 @@ async fn create_decompressed_layer_file( Ok(()) } -pub fn get_verity_hash_and_users(path: &Path) -> Result<(String, String)> { +pub fn get_verity_hash_and_users(path: &Path) -> Result<(String, String, String)> { info!("Calculating dm-verity root hash"); let mut file = std::fs::File::open(path)?; let size = file.seek(std::io::SeekFrom::End(0))?; @@ -574,30 +698,45 @@ pub fn get_verity_hash_and_users(path: &Path) -> Result<(String, String)> { file.seek(std::io::SeekFrom::Start(0))?; let mut passwd = String::new(); + let mut group = String::new(); + let (mut found_passwd, mut found_group) = (false, false); for entry_wrap in tar::Archive::new(file).entries()? { let mut entry = entry_wrap?; let entry_path = entry.header().path()?; let path_str = entry_path.to_str().unwrap(); if path_str == PASSWD_FILE_TAR_PATH { entry.read_to_string(&mut passwd)?; - break; + found_passwd = true; + if found_passwd && found_group { + break; + } } else if path_str == PASSWD_FILE_WHITEOUT_TAR_PATH { passwd = WHITEOUT_MARKER.to_owned(); - break; + found_passwd = true; + if found_passwd && found_group { + break; + } + } else if path_str == GROUP_FILE_TAR_PATH { + entry.read_to_string(&mut group)?; + found_group = true; + if found_passwd && found_group { + break; + } + } else if path_str == GROUP_FILE_WHITEOUT_TAR_PATH { + group = WHITEOUT_MARKER.to_owned(); + found_group = true; + if found_passwd && found_group { + break; + } } } - Ok((result, passwd)) + Ok((result, passwd, group)) } pub async fn get_container(config: &Config, image: &str) -> Result { if let Some(socket_path) = &config.containerd_socket_path { - return Container::new_containerd_pull( - config.layers_cache_file_path.clone(), - image, - socket_path, - ) - .await; + return Container::new_containerd_pull(config, image, socket_path).await; } Container::new(config, image).await } @@ -643,7 +782,7 @@ fn build_auth(reference: &Reference) -> RegistryAuth { RegistryAuth::Anonymous } -fn parse_passwd_file(passwd: String) -> Result> { +fn parse_passwd_file(passwd: &str) -> Result> { let mut records = Vec::new(); for rec in passwd.lines() { @@ -670,3 +809,28 @@ fn parse_passwd_file(passwd: String) -> Result> { Ok(records) } + +fn parse_group_file(group: &str) -> Result> { + let mut records = Vec::new(); + + for rec in group.lines() { + let fields: Vec<&str> = rec.split(':').collect(); + + let field_count = fields.len(); + if field_count != 4 { + return Err(anyhow!( + "Incorrect group record, expected 3 fields, got {}", + field_count + )); + } + + records.push(GroupRecord { + name: fields[0].to_string(), + validate: fields[1] == "x", + gid: fields[2].parse().unwrap(), + user_list: fields[3].to_string(), + }); + } + + Ok(records) +} diff --git a/src/tools/genpolicy/src/registry_containerd.rs b/src/tools/genpolicy/src/registry_containerd.rs index b7b2bd555..40aa4a593 100644 --- a/src/tools/genpolicy/src/registry_containerd.rs +++ b/src/tools/genpolicy/src/registry_containerd.rs @@ -7,8 +7,9 @@ #![allow(non_snake_case)] use crate::registry::{ add_verity_and_users_to_store, get_verity_hash_and_users, read_verity_and_users_from_store, - Container, DockerConfigLayer, ImageLayer, + Container, DockerConfigLayer, ImageLayer, WHITEOUT_MARKER, }; +use crate::utils::Config; use anyhow::{anyhow, bail, Result}; use containerd_client::{services::v1::GetImageRequest, with_namespace}; @@ -28,7 +29,7 @@ use tower::service_fn; impl Container { pub async fn new_containerd_pull( - layers_cache_file_path: Option, + config: &Config, image: &str, containerd_socket_path: &str, ) -> Result { @@ -60,17 +61,42 @@ impl Container { .await .unwrap(); let image_layers = get_image_layers( - layers_cache_file_path, + config.layers_cache_file_path.clone(), &manifest, &config_layer, &ctrd_client, ) .await?; + // Find the last layer with an /etc/* file, respecting whiteouts. + let mut passwd = String::new(); + let mut group = String::new(); + // Nydus/guest_pull doesn't make available passwd/group files from layers properly. + // See issue https://github.com/kata-containers/kata-containers/issues/11162 + if !config.settings.cluster_config.guest_pull { + for layer in &image_layers { + if layer.passwd == WHITEOUT_MARKER { + passwd = String::new(); + } else if !layer.passwd.is_empty() { + passwd = layer.passwd.clone(); + } + + if layer.group == WHITEOUT_MARKER { + group = String::new(); + } else if !layer.group.is_empty() { + group = layer.group.clone(); + } + } + } else { + info!("Guest pull is enabled, skipping passwd/group file parsing"); + } + Ok(Container { image: image_str, config_layer, image_layers, + passwd, + group, }) } } @@ -265,7 +291,7 @@ pub async fn get_image_layers( || layer_media_type.eq("application/vnd.oci.image.layer.v1.tar+gzip") { if layer_index < config_layer.rootfs.diff_ids.len() { - let (verity_hash, passwd) = get_verity_and_users( + let (verity_hash, passwd, group) = get_verity_and_users( layers_cache_file_path.clone(), layer["digest"].as_str().unwrap(), client, @@ -276,6 +302,7 @@ pub async fn get_image_layers( diff_id: config_layer.rootfs.diff_ids[layer_index].clone(), verity_hash, passwd, + group, }; layersVec.push(imageLayer); } else { @@ -293,7 +320,7 @@ async fn get_verity_and_users( layer_digest: &str, client: &containerd_client::Client, diff_id: &str, -) -> Result<(String, String)> { +) -> Result<(String, String, String)> { let temp_dir = tempfile::tempdir_in(".")?; let base_dir = temp_dir.path(); // Use file names supported by both Linux and Windows. @@ -306,13 +333,12 @@ async fn get_verity_and_users( let mut verity_hash = "".to_string(); let mut passwd = "".to_string(); + let mut group = "".to_string(); let mut error_message = "".to_string(); let mut error = false; if let Some(path) = layers_cache_file_path.as_ref() { - let res = read_verity_and_users_from_store(path, diff_id)?; - verity_hash = res.0; - passwd = res.1; + (verity_hash, passwd, group) = read_verity_and_users_from_store(path, diff_id)?; info!("Using cache file"); info!("dm-verity root hash: {verity_hash}"); } @@ -338,10 +364,15 @@ async fn get_verity_and_users( error = true; } Ok(res) => { - verity_hash = res.0; - passwd = res.1; + (verity_hash, passwd, group) = res; if let Some(path) = layers_cache_file_path.as_ref() { - add_verity_and_users_to_store(path, diff_id, &verity_hash, &passwd)?; + add_verity_and_users_to_store( + path, + diff_id, + &verity_hash, + &passwd, + &group, + )?; } info!("dm-verity root hash: {verity_hash}"); } @@ -356,7 +387,7 @@ async fn get_verity_and_users( } bail!(error_message); } - Ok((verity_hash, passwd)) + Ok((verity_hash, passwd, group)) } async fn create_decompressed_layer_file( diff --git a/src/tools/genpolicy/src/replica_set.rs b/src/tools/genpolicy/src/replica_set.rs index 97d189f46..7d7c3781f 100644 --- a/src/tools/genpolicy/src/replica_set.rs +++ b/src/tools/genpolicy/src/replica_set.rs @@ -109,8 +109,12 @@ impl yaml::K8sResource for ReplicaSet { false } - fn get_process_fields(&self, process: &mut policy::KataProcess) { - yaml::get_process_fields(process, &self.spec.template.spec.securityContext); + fn get_process_fields(&self, process: &mut policy::KataProcess, must_check_passwd: &mut bool) { + yaml::get_process_fields( + process, + &self.spec.template.spec.securityContext, + must_check_passwd, + ); } fn get_sysctls(&self) -> Vec { diff --git a/src/tools/genpolicy/src/replication_controller.rs b/src/tools/genpolicy/src/replication_controller.rs index f5920b863..711971d7c 100644 --- a/src/tools/genpolicy/src/replication_controller.rs +++ b/src/tools/genpolicy/src/replication_controller.rs @@ -111,8 +111,12 @@ impl yaml::K8sResource for ReplicationController { false } - fn get_process_fields(&self, process: &mut policy::KataProcess) { - yaml::get_process_fields(process, &self.spec.template.spec.securityContext); + fn get_process_fields(&self, process: &mut policy::KataProcess, must_check_passwd: &mut bool) { + yaml::get_process_fields( + process, + &self.spec.template.spec.securityContext, + must_check_passwd, + ); } fn get_sysctls(&self) -> Vec { diff --git a/src/tools/genpolicy/src/stateful_set.rs b/src/tools/genpolicy/src/stateful_set.rs index afa859ef0..a1613998e 100644 --- a/src/tools/genpolicy/src/stateful_set.rs +++ b/src/tools/genpolicy/src/stateful_set.rs @@ -193,8 +193,12 @@ impl yaml::K8sResource for StatefulSet { .or_else(|| Some(String::new())) } - fn get_process_fields(&self, process: &mut policy::KataProcess) { - yaml::get_process_fields(process, &self.spec.template.spec.securityContext); + fn get_process_fields(&self, process: &mut policy::KataProcess, must_check_passwd: &mut bool) { + yaml::get_process_fields( + process, + &self.spec.template.spec.securityContext, + must_check_passwd, + ); } fn get_sysctls(&self) -> Vec { diff --git a/src/tools/genpolicy/src/yaml.rs b/src/tools/genpolicy/src/yaml.rs index b5d77d81e..bd2ab91be 100644 --- a/src/tools/genpolicy/src/yaml.rs +++ b/src/tools/genpolicy/src/yaml.rs @@ -96,7 +96,11 @@ pub trait K8sResource { None } - fn get_process_fields(&self, _process: &mut policy::KataProcess) { + fn get_process_fields( + &self, + _process: &mut policy::KataProcess, + _must_check_passwd: &mut bool, + ) { // No need to implement support for securityContext or similar fields // for some of the K8s resource types. } @@ -386,10 +390,36 @@ fn handle_unused_field(path: &str, silent_unsupported_fields: bool) { pub fn get_process_fields( process: &mut policy::KataProcess, security_context: &Option, + must_check_passwd: &mut bool, ) { if let Some(context) = security_context { if let Some(uid) = context.runAsUser { process.User.UID = uid.try_into().unwrap(); + // Changing the UID can break the GID mapping + // if a /etc/passwd file is present. + // The proper GID is determined, in order of preference: + // 1. the securityContext runAsGroup field (applied last in code) + // 2. lacking an explicit runAsGroup, /etc/passwd + // (parsed in policy::get_container_process()) + // 3. lacking an /etc/passwd, 0 (unwrap_or) + // + // This behavior comes from the containerd runtime implementation: + // WithUser https://github.com/containerd/containerd/blob/main/pkg/oci/spec_opts.go#L592 + // + // We can't parse the /etc/passwd file here because + // we are in the resource context. Defer execution to outside + // the resource context, in policy::get_container_process() + // IFF the UID is changed by the resource securityContext but not the GID. + *must_check_passwd = true; + } + + if let Some(gid) = context.runAsGroup { + process.User.GID = gid.try_into().unwrap(); + *must_check_passwd = false; + } + + if let Some(allow) = context.allowPrivilegeEscalation { + process.NoNewPrivileges = !allow } } } diff --git a/src/tools/genpolicy/tests/main.rs b/src/tools/genpolicy/tests/main.rs index c0ed5af14..4e01fa28e 100644 --- a/src/tools/genpolicy/tests/main.rs +++ b/src/tools/genpolicy/tests/main.rs @@ -197,4 +197,9 @@ mod tests { async fn test_state_exec_process() { runtests("state/execprocess").await; } + + #[tokio::test] + async fn test_create_container_security_context() { + runtests("createcontainer/security_context").await; + } } diff --git a/src/tools/genpolicy/tests/testdata/createcontainer/generate_name/testcases.json b/src/tools/genpolicy/tests/testdata/createcontainer/generate_name/testcases.json index 87865f4c5..458b324c7 100644 --- a/src/tools/genpolicy/tests/testdata/createcontainer/generate_name/testcases.json +++ b/src/tools/genpolicy/tests/testdata/createcontainer/generate_name/testcases.json @@ -65,7 +65,8 @@ "SelinuxLabel": "", "User": { "Username": "", - "UID": 65535 + "UID": 65535, + "GID": 65535 }, "Args": [ "/pause" @@ -197,7 +198,8 @@ "SelinuxLabel": "", "User": { "Username": "", - "UID": 65535 + "UID": 65535, + "GID": 65535 }, "Args": [ "/pause" @@ -263,4 +265,4 @@ } } } -] +] \ No newline at end of file diff --git a/src/tools/genpolicy/tests/testdata/createcontainer/network_namespace/testcases.json b/src/tools/genpolicy/tests/testdata/createcontainer/network_namespace/testcases.json index 67b780f71..1e37e157c 100644 --- a/src/tools/genpolicy/tests/testdata/createcontainer/network_namespace/testcases.json +++ b/src/tools/genpolicy/tests/testdata/createcontainer/network_namespace/testcases.json @@ -65,7 +65,8 @@ "SelinuxLabel": "", "User": { "Username": "", - "UID": 65535 + "UID": 65535, + "GID": 65535 }, "Args": [ "/pause" @@ -197,7 +198,8 @@ "SelinuxLabel": "", "User": { "Username": "", - "UID": 65535 + "UID": 65535, + "GID": 65535 }, "Args": [ "/pause" @@ -325,7 +327,8 @@ "SelinuxLabel": "", "User": { "Username": "", - "UID": 65535 + "UID": 65535, + "GID": 65535 }, "Args": [ "/pause" @@ -457,7 +460,8 @@ "SelinuxLabel": "", "User": { "Username": "", - "UID": 65535 + "UID": 65535, + "GID": 65535 }, "Args": [ "/pause" @@ -523,4 +527,4 @@ } } } -] +] \ No newline at end of file diff --git a/src/tools/genpolicy/tests/testdata/createcontainer/security_context/pod.yaml b/src/tools/genpolicy/tests/testdata/createcontainer/security_context/pod.yaml new file mode 100644 index 000000000..51008f05e --- /dev/null +++ b/src/tools/genpolicy/tests/testdata/createcontainer/security_context/pod.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: Pod +metadata: + name: dummy +spec: + runtimeClassName: kata-cc-isolation + securityContext: + runAsUser: 65534 + runAsGroup: 65534 + containers: + - name: dummy + image: quay.io/opstree/redis@sha256:2642c7b07713df6897fa88cbe6db85170690cf3650018ceb2ab16cfa0b4f8d48 \ No newline at end of file diff --git a/src/tools/genpolicy/tests/testdata/createcontainer/security_context/testcases.json b/src/tools/genpolicy/tests/testdata/createcontainer/security_context/testcases.json new file mode 100644 index 000000000..e5800c164 --- /dev/null +++ b/src/tools/genpolicy/tests/testdata/createcontainer/security_context/testcases.json @@ -0,0 +1,737 @@ +[ + { + "description": "Correct User for security context", + "allowed": true, + "request": { + "type": "CreateContainer", + "OCI": { + "Annotations": { + "io.katacontainers.pkg.oci.bundle_path": "/run/containerd/io.containerd.runtime.v2.task/k8s.io/a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4", + "io.katacontainers.pkg.oci.container_type": "pod_sandbox", + "io.kubernetes.cri.container-type": "sandbox", + "io.kubernetes.cri.sandbox-cpu-period": "100000", + "io.kubernetes.cri.sandbox-cpu-quota": "0", + "io.kubernetes.cri.sandbox-cpu-shares": "2", + "io.kubernetes.cri.sandbox-id": "a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4", + "io.kubernetes.cri.sandbox-log-directory": "/var/log/pods/kata-containers-k8s-tests_dummy_fd055c20-d44c-4fc5-aa90-283f629201af", + "io.kubernetes.cri.sandbox-memory": "0", + "io.kubernetes.cri.sandbox-name": "dummy", + "io.kubernetes.cri.sandbox-namespace": "kata-containers-k8s-tests", + "io.kubernetes.cri.sandbox-uid": "fd055c20-d44c-4fc5-aa90-283f629201af", + "nerdctl/network-namespace": "/var/run/netns/cni-50720768-bd65-bf4b-6185-5d5a2adf5305" + }, + "Hooks": null, + "Hostname": "dummy", + "Linux": { + "CgroupsPath": "kubepods-besteffort-podfd055c20_d44c_4fc5_aa90_283f629201af.slice:cri-containerd:a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4", + "Devices": [], + "GIDMappings": [], + "IntelRdt": null, + "MaskedPaths": [ + "/proc/acpi", + "/proc/asound", + "/proc/kcore", + "/proc/keys", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/sys/firmware", + "/sys/devices/virtual/powercap", + "/proc/scsi" + ], + "MountLabel": "", + "Namespaces": [ + { + "Path": "", + "Type": "ipc" + }, + { + "Path": "", + "Type": "uts" + }, + { + "Path": "", + "Type": "mount" + } + ], + "ReadonlyPaths": [ + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger" + ], + "Resources": { + "BlockIO": null, + "CPU": { + "Cpus": "", + "Mems": "", + "Period": 0, + "Quota": 0, + "RealtimePeriod": 0, + "RealtimeRuntime": 0, + "Shares": 2 + }, + "Devices": [], + "HugepageLimits": [], + "Memory": null, + "Network": null, + "Pids": null + }, + "RootfsPropagation": "", + "Seccomp": null, + "Sysctl": {}, + "UIDMappings": [] + }, + "Mounts": [ + { + "destination": "/proc", + "options": [ + "nosuid", + "noexec", + "nodev" + ], + "source": "proc", + "type_": "proc" + }, + { + "destination": "/dev", + "options": [ + "nosuid", + "strictatime", + "mode=755", + "size=65536k" + ], + "source": "tmpfs", + "type_": "tmpfs" + }, + { + "destination": "/dev/pts", + "options": [ + "nosuid", + "noexec", + "newinstance", + "ptmxmode=0666", + "mode=0620", + "gid=5" + ], + "source": "devpts", + "type_": "devpts" + }, + { + "destination": "/dev/mqueue", + "options": [ + "nosuid", + "noexec", + "nodev" + ], + "source": "mqueue", + "type_": "mqueue" + }, + { + "destination": "/sys", + "options": [ + "nosuid", + "noexec", + "nodev", + "ro" + ], + "source": "sysfs", + "type_": "sysfs" + }, + { + "destination": "/dev/shm", + "options": [ + "rbind" + ], + "source": "/run/kata-containers/sandbox/shm", + "type_": "bind" + }, + { + "destination": "/etc/resolv.conf", + "options": [ + "rbind", + "ro", + "nosuid", + "nodev", + "noexec" + ], + "source": "/run/kata-containers/shared/containers/a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4-8f7f27d37e8af290-resolv.conf", + "type_": "bind" + } + ], + "Process": { + "ApparmorProfile": "", + "Args": [ + "/pause" + ], + "Capabilities": { + "Ambient": [], + "Bounding": [ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE" + ], + "Effective": [ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE" + ], + "Inheritable": [], + "Permitted": [ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE" + ] + }, + "ConsoleSize": null, + "Cwd": "/", + "Env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + ], + "NoNewPrivileges": true, + "OOMScoreAdj": -998, + "Rlimits": [], + "SelinuxLabel": "", + "Terminal": false, + "User": { + "GID": 65534, + "UID": 65534, + "Username": "" + } + }, + "Root": { + "Path": "/run/kata-containers/shared/containers/a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4/rootfs", + "Readonly": true + }, + "Solaris": null, + "Version": "1.1.0", + "Windows": null + } + } + }, + { + "description": "Incorrect User.UID for security context", + "allowed": false, + "request": { + "type": "CreateContainer", + "OCI": { + "Annotations": { + "io.katacontainers.pkg.oci.bundle_path": "/run/containerd/io.containerd.runtime.v2.task/k8s.io/a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4", + "io.katacontainers.pkg.oci.container_type": "pod_sandbox", + "io.kubernetes.cri.container-type": "sandbox", + "io.kubernetes.cri.sandbox-cpu-period": "100000", + "io.kubernetes.cri.sandbox-cpu-quota": "0", + "io.kubernetes.cri.sandbox-cpu-shares": "2", + "io.kubernetes.cri.sandbox-id": "a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4", + "io.kubernetes.cri.sandbox-log-directory": "/var/log/pods/kata-containers-k8s-tests_dummy_fd055c20-d44c-4fc5-aa90-283f629201af", + "io.kubernetes.cri.sandbox-memory": "0", + "io.kubernetes.cri.sandbox-name": "dummy", + "io.kubernetes.cri.sandbox-namespace": "kata-containers-k8s-tests", + "io.kubernetes.cri.sandbox-uid": "fd055c20-d44c-4fc5-aa90-283f629201af", + "nerdctl/network-namespace": "/var/run/netns/cni-50720768-bd65-bf4b-6185-5d5a2adf5305" + }, + "Hooks": null, + "Hostname": "dummy", + "Linux": { + "CgroupsPath": "kubepods-besteffort-podfd055c20_d44c_4fc5_aa90_283f629201af.slice:cri-containerd:a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4", + "Devices": [], + "GIDMappings": [], + "IntelRdt": null, + "MaskedPaths": [ + "/proc/acpi", + "/proc/asound", + "/proc/kcore", + "/proc/keys", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/sys/firmware", + "/sys/devices/virtual/powercap", + "/proc/scsi" + ], + "MountLabel": "", + "Namespaces": [ + { + "Path": "", + "Type": "ipc" + }, + { + "Path": "", + "Type": "uts" + }, + { + "Path": "", + "Type": "mount" + } + ], + "ReadonlyPaths": [ + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger" + ], + "Resources": { + "BlockIO": null, + "CPU": { + "Cpus": "", + "Mems": "", + "Period": 0, + "Quota": 0, + "RealtimePeriod": 0, + "RealtimeRuntime": 0, + "Shares": 2 + }, + "Devices": [], + "HugepageLimits": [], + "Memory": null, + "Network": null, + "Pids": null + }, + "RootfsPropagation": "", + "Seccomp": null, + "Sysctl": {}, + "UIDMappings": [] + }, + "Mounts": [ + { + "destination": "/proc", + "options": [ + "nosuid", + "noexec", + "nodev" + ], + "source": "proc", + "type_": "proc" + }, + { + "destination": "/dev", + "options": [ + "nosuid", + "strictatime", + "mode=755", + "size=65536k" + ], + "source": "tmpfs", + "type_": "tmpfs" + }, + { + "destination": "/dev/pts", + "options": [ + "nosuid", + "noexec", + "newinstance", + "ptmxmode=0666", + "mode=0620", + "gid=5" + ], + "source": "devpts", + "type_": "devpts" + }, + { + "destination": "/dev/mqueue", + "options": [ + "nosuid", + "noexec", + "nodev" + ], + "source": "mqueue", + "type_": "mqueue" + }, + { + "destination": "/sys", + "options": [ + "nosuid", + "noexec", + "nodev", + "ro" + ], + "source": "sysfs", + "type_": "sysfs" + }, + { + "destination": "/dev/shm", + "options": [ + "rbind" + ], + "source": "/run/kata-containers/sandbox/shm", + "type_": "bind" + }, + { + "destination": "/etc/resolv.conf", + "options": [ + "rbind", + "ro", + "nosuid", + "nodev", + "noexec" + ], + "source": "/run/kata-containers/shared/containers/a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4-8f7f27d37e8af290-resolv.conf", + "type_": "bind" + } + ], + "Process": { + "ApparmorProfile": "", + "Args": [ + "/pause" + ], + "Capabilities": { + "Ambient": [], + "Bounding": [ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE" + ], + "Effective": [ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE" + ], + "Inheritable": [], + "Permitted": [ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE" + ] + }, + "ConsoleSize": null, + "Cwd": "/", + "Env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + ], + "NoNewPrivileges": true, + "OOMScoreAdj": -998, + "Rlimits": [], + "SelinuxLabel": "", + "Terminal": false, + "User": { + "GID": 65534, + "UID": 0, + "Username": "" + } + }, + "Root": { + "Path": "/run/kata-containers/shared/containers/a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4/rootfs", + "Readonly": true + }, + "Solaris": null, + "Version": "1.1.0", + "Windows": null + } + } + }, + { + "description": "Incorrect User.GID for security context", + "allowed": false, + "request": { + "type": "CreateContainer", + "OCI": { + "Annotations": { + "io.katacontainers.pkg.oci.bundle_path": "/run/containerd/io.containerd.runtime.v2.task/k8s.io/a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4", + "io.katacontainers.pkg.oci.container_type": "pod_sandbox", + "io.kubernetes.cri.container-type": "sandbox", + "io.kubernetes.cri.sandbox-cpu-period": "100000", + "io.kubernetes.cri.sandbox-cpu-quota": "0", + "io.kubernetes.cri.sandbox-cpu-shares": "2", + "io.kubernetes.cri.sandbox-id": "a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4", + "io.kubernetes.cri.sandbox-log-directory": "/var/log/pods/kata-containers-k8s-tests_dummy_fd055c20-d44c-4fc5-aa90-283f629201af", + "io.kubernetes.cri.sandbox-memory": "0", + "io.kubernetes.cri.sandbox-name": "dummy", + "io.kubernetes.cri.sandbox-namespace": "kata-containers-k8s-tests", + "io.kubernetes.cri.sandbox-uid": "fd055c20-d44c-4fc5-aa90-283f629201af", + "nerdctl/network-namespace": "/var/run/netns/cni-50720768-bd65-bf4b-6185-5d5a2adf5305" + }, + "Hooks": null, + "Hostname": "dummy", + "Linux": { + "CgroupsPath": "kubepods-besteffort-podfd055c20_d44c_4fc5_aa90_283f629201af.slice:cri-containerd:a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4", + "Devices": [], + "GIDMappings": [], + "IntelRdt": null, + "MaskedPaths": [ + "/proc/acpi", + "/proc/asound", + "/proc/kcore", + "/proc/keys", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/sys/firmware", + "/sys/devices/virtual/powercap", + "/proc/scsi" + ], + "MountLabel": "", + "Namespaces": [ + { + "Path": "", + "Type": "ipc" + }, + { + "Path": "", + "Type": "uts" + }, + { + "Path": "", + "Type": "mount" + } + ], + "ReadonlyPaths": [ + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger" + ], + "Resources": { + "BlockIO": null, + "CPU": { + "Cpus": "", + "Mems": "", + "Period": 0, + "Quota": 0, + "RealtimePeriod": 0, + "RealtimeRuntime": 0, + "Shares": 2 + }, + "Devices": [], + "HugepageLimits": [], + "Memory": null, + "Network": null, + "Pids": null + }, + "RootfsPropagation": "", + "Seccomp": null, + "Sysctl": {}, + "UIDMappings": [] + }, + "Mounts": [ + { + "destination": "/proc", + "options": [ + "nosuid", + "noexec", + "nodev" + ], + "source": "proc", + "type_": "proc" + }, + { + "destination": "/dev", + "options": [ + "nosuid", + "strictatime", + "mode=755", + "size=65536k" + ], + "source": "tmpfs", + "type_": "tmpfs" + }, + { + "destination": "/dev/pts", + "options": [ + "nosuid", + "noexec", + "newinstance", + "ptmxmode=0666", + "mode=0620", + "gid=5" + ], + "source": "devpts", + "type_": "devpts" + }, + { + "destination": "/dev/mqueue", + "options": [ + "nosuid", + "noexec", + "nodev" + ], + "source": "mqueue", + "type_": "mqueue" + }, + { + "destination": "/sys", + "options": [ + "nosuid", + "noexec", + "nodev", + "ro" + ], + "source": "sysfs", + "type_": "sysfs" + }, + { + "destination": "/dev/shm", + "options": [ + "rbind" + ], + "source": "/run/kata-containers/sandbox/shm", + "type_": "bind" + }, + { + "destination": "/etc/resolv.conf", + "options": [ + "rbind", + "ro", + "nosuid", + "nodev", + "noexec" + ], + "source": "/run/kata-containers/shared/containers/a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4-8f7f27d37e8af290-resolv.conf", + "type_": "bind" + } + ], + "Process": { + "ApparmorProfile": "", + "Args": [ + "/pause" + ], + "Capabilities": { + "Ambient": [], + "Bounding": [ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE" + ], + "Effective": [ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE" + ], + "Inheritable": [], + "Permitted": [ + "CAP_CHOWN", + "CAP_DAC_OVERRIDE", + "CAP_FSETID", + "CAP_FOWNER", + "CAP_MKNOD", + "CAP_NET_RAW", + "CAP_SETGID", + "CAP_SETUID", + "CAP_SETFCAP", + "CAP_SETPCAP", + "CAP_NET_BIND_SERVICE", + "CAP_SYS_CHROOT", + "CAP_KILL", + "CAP_AUDIT_WRITE" + ] + }, + "ConsoleSize": null, + "Cwd": "/", + "Env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin" + ], + "NoNewPrivileges": true, + "OOMScoreAdj": -998, + "Rlimits": [], + "SelinuxLabel": "", + "Terminal": false, + "User": { + "GID": 65534, + "UID": 0, + "Username": "" + } + }, + "Root": { + "Path": "/run/kata-containers/shared/containers/a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4/rootfs", + "Readonly": true + }, + "Solaris": null, + "Version": "1.1.0", + "Windows": null + } + } + } +] \ No newline at end of file diff --git a/src/tools/genpolicy/tests/testdata/createcontainer/sysctls/testcases.json b/src/tools/genpolicy/tests/testdata/createcontainer/sysctls/testcases.json index a3209134b..cbf3ab631 100644 --- a/src/tools/genpolicy/tests/testdata/createcontainer/sysctls/testcases.json +++ b/src/tools/genpolicy/tests/testdata/createcontainer/sysctls/testcases.json @@ -2,7 +2,9 @@ { "description": "sysctls listed in yaml or settings", "allowed": true, - "state": {"sandbox_name": "policy-redis-deployment-6674f9448-xjrzf"}, + "state": { + "sandbox_name": "policy-redis-deployment-6674f9448-xjrzf" + }, "request": { "type": "CreateContainer", "OCI": { @@ -238,7 +240,7 @@ "AdditionalGids": [ 0 ], - "GID": 0, + "GID": 65535, "UID": 65535, "Username": "" } @@ -278,7 +280,9 @@ { "description": "sysctl not listed in yaml or settings", "allowed": false, - "state": {"sandbox_name": "policy-redis-deployment-6674f9448-xjrzf"}, + "state": { + "sandbox_name": "policy-redis-deployment-6674f9448-xjrzf" + }, "request": { "type": "CreateContainer", "OCI": { @@ -514,7 +518,7 @@ "AdditionalGids": [ 0 ], - "GID": 0, + "GID": 65535, "UID": 65535, "Username": "" } @@ -551,4 +555,4 @@ "string_user": null } } -] +] \ No newline at end of file diff --git a/src/tools/genpolicy/tests/testdata/state/createcontainer/testcases.json b/src/tools/genpolicy/tests/testdata/state/createcontainer/testcases.json index 990879fdc..a9e5f049a 100644 --- a/src/tools/genpolicy/tests/testdata/state/createcontainer/testcases.json +++ b/src/tools/genpolicy/tests/testdata/state/createcontainer/testcases.json @@ -233,7 +233,7 @@ "AdditionalGids": [ 0 ], - "GID": 0, + "GID": 65535, "UID": 65535, "Username": "" } @@ -279,4 +279,4 @@ "timeout": 0 } } -] +] \ No newline at end of file diff --git a/tests/integration/kubernetes/k8s-policy-deployment-sc.bats b/tests/integration/kubernetes/k8s-policy-deployment-sc.bats new file mode 100644 index 000000000..444b34b47 --- /dev/null +++ b/tests/integration/kubernetes/k8s-policy-deployment-sc.bats @@ -0,0 +1,103 @@ +#!/usr/bin/env bats +# +# Copyright (c) 2024 Microsoft. +# +# SPDX-License-Identifier: Apache-2.0 +# + +load "${BATS_TEST_DIRNAME}/../../common.bash" +load "${BATS_TEST_DIRNAME}/tests_common.sh" + +setup() { + auto_generate_policy_enabled || skip "Auto-generated policy tests are disabled." + + get_pod_config_dir + + deployment_name="policy-redis-deployment" + pod_sc_deployment_yaml="${pod_config_dir}/k8s-pod-sc-deployment.yaml" + pod_sc_nobodyupdate_deployment_yaml="${pod_config_dir}/k8s-pod-sc-nobodyupdate-deployment.yaml" + pod_sc_layered_deployment_yaml="${pod_config_dir}/k8s-layered-sc-deployment.yaml" + + # Save some time by executing genpolicy a single time. + if [ "${BATS_TEST_NUMBER}" == "1" ]; then + # Add an appropriate policy to the correct YAML file. + policy_settings_dir="$(create_tmp_policy_settings_dir "${pod_config_dir}")" + add_requests_to_policy_settings "${policy_settings_dir}" "ReadStreamRequest" + auto_generate_policy "${policy_settings_dir}" "${pod_sc_deployment_yaml}" + auto_generate_policy "${policy_settings_dir}" "${pod_sc_nobodyupdate_deployment_yaml}" + auto_generate_policy "${policy_settings_dir}" "${pod_sc_layered_deployment_yaml}" + fi + + # Start each test case with a copy of the correct yaml file. + incorrect_deployment_yaml="${pod_config_dir}/k8s-layered-sc-deployment-incorrect.yaml" + cp "${pod_sc_layered_deployment_yaml}" "${incorrect_deployment_yaml}" +} + +@test "Successful sc deployment with auto-generated policy and container image volumes" { + # Initiate deployment + kubectl apply -f "${pod_sc_deployment_yaml}" + + # Wait for the deployment to be created + cmd="kubectl rollout status --timeout=1s deployment/${deployment_name} | grep 'successfully rolled out'" + info "Waiting for: ${cmd}" + waitForProcess "${wait_time}" "${sleep_time}" "${cmd}" +} + +@test "Successful sc deployment with security context choosing another valid user" { + # Initiate deployment + kubectl apply -f "${pod_sc_nobodyupdate_deployment_yaml}" + + # Wait for the deployment to be created + cmd="kubectl rollout status --timeout=1s deployment/${deployment_name} | grep 'successfully rolled out'" + info "Waiting for: ${cmd}" + waitForProcess "${wait_time}" "${sleep_time}" "${cmd}" +} + +@test "Successful layered sc deployment with auto-generated policy and container image volumes" { + # Initiate deployment + kubectl apply -f "${pod_sc_layered_deployment_yaml}" + + # Wait for the deployment to be created + cmd="kubectl rollout status --timeout=1s deployment/${deployment_name} | grep 'successfully rolled out'" + info "Waiting for: ${cmd}" + waitForProcess "${wait_time}" "${sleep_time}" "${cmd}" +} + +test_deployment_policy_error() { + # Initiate deployment + kubectl apply -f "${incorrect_deployment_yaml}" + + # Wait for the deployment pod to fail + wait_for_blocked_request "CreateContainerRequest" "${deployment_name}" +} + +@test "Policy failure: unexpected GID = 0 for layered securityContext deployment" { + # Change the pod GID to 0 after the policy has been generated using a different + # runAsGroup value. The policy would use GID = 0 by default, if there weren't + # a different runAsGroup value in the YAML file. + yq -i \ + '.spec.template.spec.securityContext.runAsGroup = 0' \ + "${incorrect_deployment_yaml}" + + test_deployment_policy_error +} + +teardown() { + auto_generate_policy_enabled || skip "Auto-generated policy tests are disabled." + + # Pod debugging information. Don't print the "Message:" line because it contains a truncated policy log. + info "Pod ${deployment_name}:" + kubectl describe pod "${deployment_name}" | grep -v "Message:" + + # Deployment debugging information. The --watch=false argument makes "kubectl rollout status" + # return instead of waiting for a possibly failed deployment to complete. + info "Deployment ${deployment_name}:" + kubectl describe deployment "${deployment_name}" + kubectl rollout status deployment/${deployment_name} --watch=false + + # Clean-up + kubectl delete deployment "${deployment_name}" + + delete_tmp_policy_settings_dir "${policy_settings_dir}" + rm -f "${incorrect_deployment_yaml}" +} diff --git a/tests/integration/kubernetes/run_kubernetes_tests.sh b/tests/integration/kubernetes/run_kubernetes_tests.sh index 2bfc29647..9baeb8194 100755 --- a/tests/integration/kubernetes/run_kubernetes_tests.sh +++ b/tests/integration/kubernetes/run_kubernetes_tests.sh @@ -74,6 +74,7 @@ else "k8s-pod-quota.bats" \ "k8s-policy-hard-coded.bats" \ "k8s-policy-deployment.bats" \ + "k8s-policy-deployment-sc.bats" \ "k8s-policy-job.bats" \ "k8s-policy-logs.bats" \ "k8s-policy-pod.bats" \ diff --git a/tests/integration/kubernetes/runtimeclass_workloads/k8s-layered-sc-deployment.yaml b/tests/integration/kubernetes/runtimeclass_workloads/k8s-layered-sc-deployment.yaml new file mode 100644 index 000000000..c006678fd --- /dev/null +++ b/tests/integration/kubernetes/runtimeclass_workloads/k8s-layered-sc-deployment.yaml @@ -0,0 +1,41 @@ +# +# Copyright (c) 2024 Microsoft +# +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: apps/v1 +kind: Deployment +metadata: + name: policy-redis-deployment + labels: + app: policyredis +spec: + selector: + matchLabels: + app: policyredis + role: master + tier: backend + replicas: 1 + template: + metadata: + labels: + app: policyredis + role: master + tier: backend + spec: + terminationGracePeriodSeconds: 0 + runtimeClassName: kata + securityContext: + runAsUser: 2000 + runAsGroup: 2000 + containers: + - name: master + image: quay.io/opstree/redis@sha256:2642c7b07713df6897fa88cbe6db85170690cf3650018ceb2ab16cfa0b4f8d48 + securityContext: + runAsUser: 3000 + resources: + requests: + cpu: 100m + memory: 100Mi + ports: + - containerPort: 6379 diff --git a/tests/integration/kubernetes/runtimeclass_workloads/k8s-pod-sc-deployment.yaml b/tests/integration/kubernetes/runtimeclass_workloads/k8s-pod-sc-deployment.yaml new file mode 100644 index 000000000..eff0d489a --- /dev/null +++ b/tests/integration/kubernetes/runtimeclass_workloads/k8s-pod-sc-deployment.yaml @@ -0,0 +1,39 @@ +# +# Copyright (c) 2024 Microsoft +# +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: apps/v1 +kind: Deployment +metadata: + name: policy-redis-deployment + labels: + app: policyredis +spec: + selector: + matchLabels: + app: policyredis + role: master + tier: backend + replicas: 1 + template: + metadata: + labels: + app: policyredis + role: master + tier: backend + spec: + terminationGracePeriodSeconds: 0 + runtimeClassName: kata + securityContext: + runAsUser: 2000 + runAsGroup: 2000 + containers: + - name: master + image: quay.io/opstree/redis@sha256:2642c7b07713df6897fa88cbe6db85170690cf3650018ceb2ab16cfa0b4f8d48 + resources: + requests: + cpu: 100m + memory: 100Mi + ports: + - containerPort: 6379 diff --git a/tests/integration/kubernetes/runtimeclass_workloads/k8s-pod-sc-nobodyupdate-deployment.yaml b/tests/integration/kubernetes/runtimeclass_workloads/k8s-pod-sc-nobodyupdate-deployment.yaml new file mode 100644 index 000000000..d0051b29b --- /dev/null +++ b/tests/integration/kubernetes/runtimeclass_workloads/k8s-pod-sc-nobodyupdate-deployment.yaml @@ -0,0 +1,38 @@ +# +# Copyright (c) 2024 Microsoft +# +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: apps/v1 +kind: Deployment +metadata: + name: policy-redis-deployment + labels: + app: policyredis +spec: + selector: + matchLabels: + app: policyredis + role: master + tier: backend + replicas: 1 + template: + metadata: + labels: + app: policyredis + role: master + tier: backend + spec: + terminationGracePeriodSeconds: 0 + runtimeClassName: kata + securityContext: + runAsUser: 65534 + containers: + - name: master + image: quay.io/opstree/redis@sha256:2642c7b07713df6897fa88cbe6db85170690cf3650018ceb2ab16cfa0b4f8d48 + resources: + requests: + cpu: 100m + memory: 100Mi + ports: + - containerPort: 6379 diff --git a/tests/integration/kubernetes/tests_common.sh b/tests/integration/kubernetes/tests_common.sh index 88637a014..a1a4adda3 100644 --- a/tests/integration/kubernetes/tests_common.sh +++ b/tests/integration/kubernetes/tests_common.sh @@ -116,6 +116,15 @@ adapt_common_policy_settings_for_cbl_mariner() { true } +# adapt common policy settings for guest-pull Hosts +# see issue https://github.com/kata-containers/kata-containers/issues/11162 +adapt_common_policy_settings_for_guest_pull() { + local settings_dir=$1 + + info "Adapting common policy settings for guest-pull environment" + jq '.cluster_config.guest_pull = true' "${settings_dir}/genpolicy-settings.json" > temp.json && sudo mv temp.json "${settings_dir}/genpolicy-settings.json" +} + # adapt common policy settings for various platforms adapt_common_policy_settings() { local settings_dir=$1 @@ -143,6 +152,12 @@ adapt_common_policy_settings() { adapt_common_policy_settings_for_cbl_mariner "${settings_dir}" ;; esac + + case "${PULL_TYPE}" in + "guest-pull") + adapt_common_policy_settings_for_guest_pull "${settings_dir}" + ;; + esac } # If auto-generated policy testing is enabled, make a copy of the genpolicy settings,