Merge pull request #11077 from microsoft/cameronbaird/address-gid-mismatch

genpolicy: Align GID behavior with CRI and enable GID policy checks.
This commit is contained in:
Aurélien Bombo 2025-04-29 22:23:23 +01:00 committed by GitHub
commit 46af7cf817
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
27 changed files with 1424 additions and 126 deletions

View File

@ -34,6 +34,13 @@
"io.katacontainers.pkg.oci.bundle_path": "/run/containerd/io.containerd.runtime.v2.task/k8s.io/$(bundle-id)" "io.katacontainers.pkg.oci.bundle_path": "/run/containerd/io.containerd.runtime.v2.task/k8s.io/$(bundle-id)"
}, },
"Process": { "Process": {
"NoNewPrivileges": true,
"User": {
"UID": 65535,
"GID": 65535,
"AdditionalGids": [],
"Username": ""
},
"Args": [ "Args": [
"/pause" "/pause"
] ]
@ -315,7 +322,8 @@
"oci_version": "1.1.0" "oci_version": "1.1.0"
}, },
"cluster_config": { "cluster_config": {
"pause_container_image": "mcr.microsoft.com/oss/kubernetes/pause:3.6" "pause_container_image": "mcr.microsoft.com/oss/kubernetes/pause:3.6",
"guest_pull": false
}, },
"request_defaults": { "request_defaults": {
"CreateContainerRequest": { "CreateContainerRequest": {

View File

@ -694,11 +694,8 @@ allow_user(p_process, i_process) {
print("allow_user: input uid =", i_user.UID, "policy uid =", p_user.UID) print("allow_user: input uid =", i_user.UID, "policy uid =", p_user.UID)
p_user.UID == i_user.UID p_user.UID == i_user.UID
# TODO: track down the reason for registry.k8s.io/pause:3.9 being print("allow_user: input gid =", i_user.GID, "policy gid =", p_user.GID)
# executed with gid = 0 despite having "65535:65535" in its container image p_user.GID == i_user.GID
# config.
#print("allow_user: input gid =", i_user.GID, "policy gid =", p_user.GID)
#p_user.GID == i_user.GID
# TODO: compare the additionalGids field too after computing its value # TODO: compare the additionalGids field too after computing its value
# based on /etc/passwd and /etc/group from the container image. # based on /etc/passwd and /etc/group from the container image.

View File

@ -148,10 +148,11 @@ impl yaml::K8sResource for CronJob {
false false
} }
fn get_process_fields(&self, process: &mut policy::KataProcess) { fn get_process_fields(&self, process: &mut policy::KataProcess, must_check_passwd: &mut bool) {
yaml::get_process_fields( yaml::get_process_fields(
process, process,
&self.spec.jobTemplate.spec.template.spec.securityContext, &self.spec.jobTemplate.spec.template.spec.securityContext,
must_check_passwd,
); );
} }

View File

@ -148,8 +148,12 @@ impl yaml::K8sResource for DaemonSet {
.or_else(|| Some(String::new())) .or_else(|| Some(String::new()))
} }
fn get_process_fields(&self, process: &mut policy::KataProcess) { fn get_process_fields(&self, process: &mut policy::KataProcess, must_check_passwd: &mut bool) {
yaml::get_process_fields(process, &self.spec.template.spec.securityContext); yaml::get_process_fields(
process,
&self.spec.template.spec.securityContext,
must_check_passwd,
);
} }
fn get_sysctls(&self) -> Vec<pod::Sysctl> { fn get_sysctls(&self) -> Vec<pod::Sysctl> {

View File

@ -146,8 +146,12 @@ impl yaml::K8sResource for Deployment {
.or_else(|| Some(String::new())) .or_else(|| Some(String::new()))
} }
fn get_process_fields(&self, process: &mut policy::KataProcess) { fn get_process_fields(&self, process: &mut policy::KataProcess, must_check_passwd: &mut bool) {
yaml::get_process_fields(process, &self.spec.template.spec.securityContext); yaml::get_process_fields(
process,
&self.spec.template.spec.securityContext,
must_check_passwd,
);
} }
fn get_sysctls(&self) -> Vec<pod::Sysctl> { fn get_sysctls(&self) -> Vec<pod::Sysctl> {

View File

@ -111,8 +111,12 @@ impl yaml::K8sResource for Job {
false false
} }
fn get_process_fields(&self, process: &mut policy::KataProcess) { fn get_process_fields(&self, process: &mut policy::KataProcess, must_check_passwd: &mut bool) {
yaml::get_process_fields(process, &self.spec.template.spec.securityContext); yaml::get_process_fields(
process,
&self.spec.template.spec.securityContext,
must_check_passwd,
);
} }
fn get_sysctls(&self) -> Vec<pod::Sysctl> { fn get_sysctls(&self) -> Vec<pod::Sysctl> {

View File

@ -296,6 +296,9 @@ struct SecurityContext {
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
runAsUser: Option<i64>, runAsUser: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
runAsGroup: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
seccompProfile: Option<SeccompProfile>, seccompProfile: Option<SeccompProfile>,
} }
@ -318,6 +321,12 @@ pub struct PodSecurityContext {
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub sysctls: Option<Vec<Sysctl>>, pub sysctls: Option<Vec<Sysctl>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub runAsGroup: Option<i64>,
#[serde(skip_serializing_if = "Option::is_none")]
pub allowPrivilegeEscalation: Option<bool>,
// TODO: additional fields. // TODO: additional fields.
} }
@ -902,8 +911,8 @@ impl yaml::K8sResource for Pod {
.or_else(|| Some(String::new())) .or_else(|| Some(String::new()))
} }
fn get_process_fields(&self, process: &mut policy::KataProcess) { fn get_process_fields(&self, process: &mut policy::KataProcess, must_check_passwd: &mut bool) {
yaml::get_process_fields(process, &self.spec.securityContext); yaml::get_process_fields(process, &self.spec.securityContext, must_check_passwd);
} }
fn get_sysctls(&self) -> Vec<Sysctl> { fn get_sysctls(&self) -> Vec<Sysctl> {
@ -961,7 +970,25 @@ impl Container {
if let Some(context) = &self.securityContext { if let Some(context) = &self.securityContext {
if let Some(uid) = context.runAsUser { if let Some(uid) = context.runAsUser {
process.User.UID = uid.try_into().unwrap(); process.User.UID = uid.try_into().unwrap();
// Changing the UID can break the GID mapping
// if a /etc/passwd file is present.
// The proper GID is determined, in order of preference:
// 1. the securityContext runAsGroup field (applied last in code)
// 2. lacking an explicit runAsGroup, /etc/passwd (get_gid_from_passwd_uid)
// 3. fall back to pod-level GID if there is one (unwrap_or)
//
// This behavior comes from the containerd runtime implementation:
// WithUser https://github.com/containerd/containerd/blob/main/pkg/oci/spec_opts.go#L592
process.User.GID = self
.registry
.get_gid_from_passwd_uid(process.User.UID)
.unwrap_or(process.User.GID);
} }
if let Some(gid) = context.runAsGroup {
process.User.GID = gid.try_into().unwrap();
}
if let Some(allow) = context.allowPrivilegeEscalation { if let Some(allow) = context.allowPrivilegeEscalation {
process.NoNewPrivileges = !allow process.NoNewPrivileges = !allow
} }
@ -1008,6 +1035,7 @@ pub async fn add_pause_container(containers: &mut Vec<Container>, config: &Confi
privileged: None, privileged: None,
capabilities: None, capabilities: None,
runAsUser: None, runAsUser: None,
runAsGroup: None,
seccompProfile: None, seccompProfile: None,
}), }),
..Default::default() ..Default::default()

View File

@ -425,6 +425,10 @@ pub struct CommonData {
pub struct ClusterConfig { pub struct ClusterConfig {
/// Pause container image reference. /// Pause container image reference.
pub pause_container_image: String, pub pause_container_image: String,
/// Whether or not the cluster uses the guest pull mechanism
/// In guest pull, host can't look into layers to determine GID.
/// See issue https://github.com/kata-containers/kata-containers/issues/11162
pub guest_pull: bool,
} }
/// Struct used to read data from the settings file and copy that data into the policy. /// Struct used to read data from the settings file and copy that data into the policy.
@ -713,7 +717,17 @@ impl AgentPolicy {
substitute_args_env_variables(&mut process.Args, &process.Env); substitute_args_env_variables(&mut process.Args, &process.Env);
c_settings.get_process_fields(&mut process); c_settings.get_process_fields(&mut process);
resource.get_process_fields(&mut process); let mut must_check_passwd = false;
resource.get_process_fields(&mut process, &mut must_check_passwd);
// The actual GID of the process run by the CRI
// Depends on the contents of /etc/passwd in the container
if must_check_passwd {
process.User.GID = yaml_container
.registry
.get_gid_from_passwd_uid(process.User.UID)
.unwrap_or(0);
}
yaml_container.get_process_fields(&mut process); yaml_container.get_process_fields(&mut process);
process process

View File

@ -36,6 +36,8 @@ pub struct Container {
pub image: String, pub image: String,
pub config_layer: DockerConfigLayer, pub config_layer: DockerConfigLayer,
pub image_layers: Vec<ImageLayer>, pub image_layers: Vec<ImageLayer>,
pub passwd: String,
pub group: String,
} }
/// Image config layer properties. /// Image config layer properties.
@ -71,6 +73,7 @@ pub struct ImageLayer {
pub diff_id: String, pub diff_id: String,
pub verity_hash: String, pub verity_hash: String,
pub passwd: String, pub passwd: String,
pub group: String,
} }
/// See https://docs.docker.com/reference/dockerfile/#volume. /// See https://docs.docker.com/reference/dockerfile/#volume.
@ -98,17 +101,35 @@ struct PasswdRecord {
pub shell: String, pub shell: String,
} }
/// A single record in a Unix group file.
#[derive(Debug)]
struct GroupRecord {
pub name: String,
#[allow(dead_code)]
pub validate: bool,
pub gid: u32,
#[allow(dead_code)]
pub user_list: String,
}
/// Path to /etc/passwd in a container layer's tar file. /// Path to /etc/passwd in a container layer's tar file.
const PASSWD_FILE_TAR_PATH: &str = "etc/passwd"; const PASSWD_FILE_TAR_PATH: &str = "etc/passwd";
/// Path to /etc/group in a container layer's tar file.
const GROUP_FILE_TAR_PATH: &str = "etc/group";
/// Path to a file indicating a whiteout of the /etc/passwd file in a container /// Path to a file indicating a whiteout of the /etc/passwd file in a container
/// layer's tar file (i.e., /etc/passwd was deleted in the layer). /// layer's tar file (i.e., /etc/passwd was deleted in the layer).
const PASSWD_FILE_WHITEOUT_TAR_PATH: &str = "etc/.wh.passwd"; const PASSWD_FILE_WHITEOUT_TAR_PATH: &str = "etc/.wh.passwd";
/// Path to a file indicating a whiteout of the /etc/group file in a container
/// layer's tar file (i.e., /etc/group was deleted in the layer).
const GROUP_FILE_WHITEOUT_TAR_PATH: &str = "etc/.wh.group";
/// A marker used to track whether a particular container layer has had its /// A marker used to track whether a particular container layer has had its
/// /etc/passwd file deleted, and thus any such files read from previous, lower /// /etc/* file deleted, and thus any such files read from previous, lower
/// layers should be discarded. /// layers should be discarded.
const WHITEOUT_MARKER: &str = "WHITEOUT"; pub const WHITEOUT_MARKER: &str = "WHITEOUT";
impl Container { impl Container {
pub async fn new(config: &Config, image: &str) -> Result<Self> { pub async fn new(config: &Config, image: &str) -> Result<Self> {
@ -153,10 +174,35 @@ impl Container {
.await .await
.unwrap(); .unwrap();
// Find the last layer with an /etc/* file, respecting whiteouts.
let mut passwd = String::new();
let mut group = String::new();
// Nydus/guest_pull doesn't make available passwd/group files from layers properly.
// See issue https://github.com/kata-containers/kata-containers/issues/11162
if !config.settings.cluster_config.guest_pull {
for layer in &image_layers {
if layer.passwd == WHITEOUT_MARKER {
passwd = String::new();
} else if !layer.passwd.is_empty() {
passwd = layer.passwd.clone();
}
if layer.group == WHITEOUT_MARKER {
group = String::new();
} else if !layer.group.is_empty() {
group = layer.group.clone();
}
}
} else {
info!("Guest pull is enabled, skipping passwd/group file parsing");
}
Ok(Container { Ok(Container {
image: image_string, image: image_string,
config_layer, config_layer,
image_layers, image_layers,
passwd,
group,
}) })
} }
Err(oci_client::errors::OciDistributionError::AuthenticationFailure(message)) => { Err(oci_client::errors::OciDistributionError::AuthenticationFailure(message)) => {
@ -171,6 +217,110 @@ impl Container {
} }
} }
pub fn get_gid_from_passwd_uid(&self, uid: u32) -> Result<u32> {
if self.passwd.is_empty() {
return Err(anyhow!(
"No /etc/passwd file is available, unable to parse gids from uid"
));
}
match parse_passwd_file(&self.passwd) {
Ok(records) => {
if let Some(record) = records.iter().find(|&r| r.uid == uid) {
Ok(record.gid)
} else {
Err(anyhow!("Failed to find uid {} in /etc/passwd", uid))
}
}
Err(inner_e) => Err(anyhow!("Failed to parse /etc/passwd - error {inner_e}")),
}
}
pub fn get_uid_gid_from_passwd_user(&self, user: String) -> Result<(u32, u32)> {
if user.is_empty() || self.passwd.is_empty() {
return Ok((0, 0));
}
match parse_passwd_file(&self.passwd) {
Ok(records) => {
if let Some(record) = records.iter().find(|&r| r.user == user) {
Ok((record.uid, record.gid))
} else {
Err(anyhow!("Failed to find user {} in /etc/passwd", user))
}
}
Err(inner_e) => {
warn!("Failed to parse /etc/passwd - error {inner_e}, using uid = gid = 0");
Ok((0, 0))
}
}
}
fn get_gid_from_group_name(&self, name: &str) -> Result<u32> {
if self.group.is_empty() {
return Err(anyhow!(
"No /etc/group file is available, unable to parse gids from group name"
));
}
match parse_group_file(&self.group) {
Ok(records) => {
if let Some(record) = records.iter().find(|&r| r.name == name) {
Ok(record.gid)
} else {
Err(anyhow!("Failed to find name {} in /etc/group", name))
}
}
Err(inner_e) => Err(anyhow!("Failed to parse /etc/group - error {inner_e}")),
}
}
fn parse_user_string(&self, user: &str) -> u32 {
if user.is_empty() {
return 0;
}
match user.parse::<u32>() {
Ok(uid) => uid,
// If the user is not a number, interpret it as a user name.
Err(outer_e) => {
debug!(
"Failed to parse {} as u32, using it as a user name - error {outer_e}",
user
);
let (uid, _) = self
.get_uid_gid_from_passwd_user(user.to_string().clone())
.unwrap_or((0, 0));
uid
}
}
}
fn parse_group_string(&self, group: &str) -> u32 {
if group.is_empty() {
return 0;
}
match group.parse::<u32>() {
Ok(id) => {
warn!(
concat!(
"Parsed gid {} from OCI container image config, but not using it. ",
"GIDs are only picked up by the runtime from /etc/passwd."
),
id
);
0
}
// If the group is not a number, interpret it as a group name.
Err(outer_e) => {
debug!(
"Failed to parse {} as u32, using it as a group name - error {outer_e}",
group
);
self.get_gid_from_group_name(group).unwrap_or(0)
}
}
}
// Convert Docker image config to policy data. // Convert Docker image config to policy data.
pub fn get_process( pub fn get_process(
&self, &self,
@ -188,9 +338,14 @@ impl Container {
* 2. Contain only a UID * 2. Contain only a UID
* 3. Contain a UID:GID pair, in that format * 3. Contain a UID:GID pair, in that format
* 4. Contain a user name, which we need to translate into a UID/GID pair * 4. Contain a user name, which we need to translate into a UID/GID pair
* 5. Be erroneus, somehow * 5. Contain a (user name:group name) pair, which we need to translate into a UID/GID pair
* 6. Be erroneus, somehow
*/ */
if let Some(image_user) = &docker_config.User { if let Some(image_user) = &docker_config.User {
if self.passwd.is_empty() {
warn!("No /etc/passwd file is available, unable to parse gids from user");
}
if !image_user.is_empty() { if !image_user.is_empty() {
if image_user.contains(':') { if image_user.contains(':') {
debug!("Splitting Docker config user = {:?}", image_user); debug!("Splitting Docker config user = {:?}", image_user);
@ -203,61 +358,23 @@ impl Container {
); );
} else { } else {
debug!("Parsing uid from user[0] = {}", &user[0]); debug!("Parsing uid from user[0] = {}", &user[0]);
match user[0].parse() { process.User.UID = self.parse_user_string(user[0]);
Ok(id) => process.User.UID = id,
Err(e) => {
warn!(
"Failed to parse {} as u32, using uid = 0 - error {e}",
&user[0]
);
}
}
debug!("Parsing gid from user[1] = {:?}", user[1]); debug!("Parsing gid from user[1] = {:?}", user[1]);
match user[1].parse() { process.User.GID = self.parse_group_string(user[1]);
Ok(id) => process.User.GID = id,
Err(e) => { debug!(
warn!( "Overriding OCI container GID with UID:GID mapping from /etc/passwd"
"Failed to parse {} as u32, using gid = 0 - error {e}", );
&user[0] process.User.GID =
); self.get_gid_from_passwd_uid(process.User.UID).unwrap_or(0);
}
}
} }
} else { } else {
match image_user.parse::<u32>() { debug!("Parsing uid from image_user = {}", image_user);
Ok(uid) => process.User.UID = uid, process.User.UID = self.parse_user_string(image_user);
Err(outer_e) => {
// Find the last layer with an /etc/passwd file,
// respecting whiteouts.
let mut passwd = "".to_string();
for layer in self.get_image_layers() {
if !layer.passwd.is_empty() {
passwd = layer.passwd
} else if layer.passwd == WHITEOUT_MARKER {
passwd = "".to_string();
}
}
if passwd.is_empty() { debug!("Using UID:GID mapping from /etc/passwd");
warn!("Failed to parse {} as u32 - error {outer_e} - and no /etc/passwd file is available, using uid = gid = 0", image_user); process.User.GID = self.get_gid_from_passwd_uid(process.User.UID).unwrap_or(0);
} else {
match parse_passwd_file(passwd) {
Ok(records) => {
if let Some(record) =
records.iter().find(|&r| r.user == *image_user)
{
process.User.UID = record.uid;
process.User.GID = record.gid;
}
}
Err(inner_e) => {
warn!("Failed to parse {} as u32 - error {outer_e} - and failed to parse /etc/passwd - error {inner_e}, using uid = gid = 0", image_user);
}
}
}
}
}
} }
} }
} }
@ -347,7 +464,7 @@ async fn get_image_layers(
|| layer.media_type.eq(manifest::IMAGE_LAYER_GZIP_MEDIA_TYPE) || layer.media_type.eq(manifest::IMAGE_LAYER_GZIP_MEDIA_TYPE)
{ {
if layer_index < config_layer.rootfs.diff_ids.len() { if layer_index < config_layer.rootfs.diff_ids.len() {
let (verity_hash, passwd) = get_verity_and_users( let (verity_hash, passwd, group) = get_verity_and_users(
layers_cache_file_path.clone(), layers_cache_file_path.clone(),
client, client,
reference, reference,
@ -359,6 +476,7 @@ async fn get_image_layers(
diff_id: config_layer.rootfs.diff_ids[layer_index].clone(), diff_id: config_layer.rootfs.diff_ids[layer_index].clone(),
verity_hash: verity_hash.to_owned(), verity_hash: verity_hash.to_owned(),
passwd: passwd.to_owned(), passwd: passwd.to_owned(),
group: group.to_owned(),
}); });
} else { } else {
return Err(anyhow!("Too many Docker gzip layers")); return Err(anyhow!("Too many Docker gzip layers"));
@ -377,7 +495,7 @@ async fn get_verity_and_users(
reference: &Reference, reference: &Reference,
layer_digest: &str, layer_digest: &str,
diff_id: &str, diff_id: &str,
) -> Result<(String, String)> { ) -> Result<(String, String, String)> {
let temp_dir = tempfile::tempdir_in(".")?; let temp_dir = tempfile::tempdir_in(".")?;
let base_dir = temp_dir.path(); let base_dir = temp_dir.path();
// Use file names supported by both Linux and Windows. // Use file names supported by both Linux and Windows.
@ -390,14 +508,13 @@ async fn get_verity_and_users(
let mut verity_hash = "".to_string(); let mut verity_hash = "".to_string();
let mut passwd = "".to_string(); let mut passwd = "".to_string();
let mut group = "".to_string();
let mut error_message = "".to_string(); let mut error_message = "".to_string();
let mut error = false; let mut error = false;
// get value from store and return if it exists // get value from store and return if it exists
if let Some(path) = layers_cache_file_path.as_ref() { if let Some(path) = layers_cache_file_path.as_ref() {
let res = read_verity_and_users_from_store(path, diff_id)?; (verity_hash, passwd, group) = read_verity_and_users_from_store(path, diff_id)?;
verity_hash = res.0;
passwd = res.1;
info!("Using cache file"); info!("Using cache file");
info!("dm-verity root hash: {verity_hash}"); info!("dm-verity root hash: {verity_hash}");
} }
@ -424,10 +541,15 @@ async fn get_verity_and_users(
error = true; error = true;
} }
Ok(res) => { Ok(res) => {
verity_hash = res.0; (verity_hash, passwd, group) = res;
passwd = res.1;
if let Some(path) = layers_cache_file_path.as_ref() { if let Some(path) = layers_cache_file_path.as_ref() {
add_verity_and_users_to_store(path, diff_id, &verity_hash, &passwd)?; add_verity_and_users_to_store(
path,
diff_id,
&verity_hash,
&passwd,
&group,
)?;
} }
info!("dm-verity root hash: {verity_hash}"); info!("dm-verity root hash: {verity_hash}");
} }
@ -443,7 +565,7 @@ async fn get_verity_and_users(
} }
bail!(error_message); bail!(error_message);
} }
Ok((verity_hash, passwd)) Ok((verity_hash, passwd, group))
} }
// the store is a json file that matches layer hashes to verity hashes // the store is a json file that matches layer hashes to verity hashes
@ -452,6 +574,7 @@ pub fn add_verity_and_users_to_store(
diff_id: &str, diff_id: &str,
verity_hash: &str, verity_hash: &str,
passwd: &str, passwd: &str,
group: &str,
) -> Result<()> { ) -> Result<()> {
// open the json file in read mode, create it if it doesn't exist // open the json file in read mode, create it if it doesn't exist
let read_file = OpenOptions::new() let read_file = OpenOptions::new()
@ -473,6 +596,7 @@ pub fn add_verity_and_users_to_store(
diff_id: diff_id.to_string(), diff_id: diff_id.to_string(),
verity_hash: verity_hash.to_string(), verity_hash: verity_hash.to_string(),
passwd: passwd.to_string(), passwd: passwd.to_string(),
group: group.to_string(),
}); });
// Serialize in pretty format // Serialize in pretty format
@ -500,13 +624,13 @@ pub fn add_verity_and_users_to_store(
pub fn read_verity_and_users_from_store( pub fn read_verity_and_users_from_store(
cache_file: &str, cache_file: &str,
diff_id: &str, diff_id: &str,
) -> Result<(String, String)> { ) -> Result<(String, String, String)> {
match OpenOptions::new().read(true).open(cache_file) { match OpenOptions::new().read(true).open(cache_file) {
Ok(file) => match serde_json::from_reader(file) { Ok(file) => match serde_json::from_reader(file) {
Result::<Vec<ImageLayer>, _>::Ok(layers) => { Result::<Vec<ImageLayer>, _>::Ok(layers) => {
for layer in layers { for layer in layers {
if layer.diff_id == diff_id { if layer.diff_id == diff_id {
return Ok((layer.verity_hash, layer.passwd)); return Ok((layer.verity_hash, layer.passwd, layer.group));
} }
} }
} }
@ -519,7 +643,7 @@ pub fn read_verity_and_users_from_store(
} }
} }
Ok((String::new(), String::new())) Ok((String::new(), String::new(), String::new()))
} }
async fn create_decompressed_layer_file( async fn create_decompressed_layer_file(
@ -558,7 +682,7 @@ async fn create_decompressed_layer_file(
Ok(()) Ok(())
} }
pub fn get_verity_hash_and_users(path: &Path) -> Result<(String, String)> { pub fn get_verity_hash_and_users(path: &Path) -> Result<(String, String, String)> {
info!("Calculating dm-verity root hash"); info!("Calculating dm-verity root hash");
let mut file = std::fs::File::open(path)?; let mut file = std::fs::File::open(path)?;
let size = file.seek(std::io::SeekFrom::End(0))?; let size = file.seek(std::io::SeekFrom::End(0))?;
@ -574,30 +698,45 @@ pub fn get_verity_hash_and_users(path: &Path) -> Result<(String, String)> {
file.seek(std::io::SeekFrom::Start(0))?; file.seek(std::io::SeekFrom::Start(0))?;
let mut passwd = String::new(); let mut passwd = String::new();
let mut group = String::new();
let (mut found_passwd, mut found_group) = (false, false);
for entry_wrap in tar::Archive::new(file).entries()? { for entry_wrap in tar::Archive::new(file).entries()? {
let mut entry = entry_wrap?; let mut entry = entry_wrap?;
let entry_path = entry.header().path()?; let entry_path = entry.header().path()?;
let path_str = entry_path.to_str().unwrap(); let path_str = entry_path.to_str().unwrap();
if path_str == PASSWD_FILE_TAR_PATH { if path_str == PASSWD_FILE_TAR_PATH {
entry.read_to_string(&mut passwd)?; entry.read_to_string(&mut passwd)?;
break; found_passwd = true;
if found_passwd && found_group {
break;
}
} else if path_str == PASSWD_FILE_WHITEOUT_TAR_PATH { } else if path_str == PASSWD_FILE_WHITEOUT_TAR_PATH {
passwd = WHITEOUT_MARKER.to_owned(); passwd = WHITEOUT_MARKER.to_owned();
break; found_passwd = true;
if found_passwd && found_group {
break;
}
} else if path_str == GROUP_FILE_TAR_PATH {
entry.read_to_string(&mut group)?;
found_group = true;
if found_passwd && found_group {
break;
}
} else if path_str == GROUP_FILE_WHITEOUT_TAR_PATH {
group = WHITEOUT_MARKER.to_owned();
found_group = true;
if found_passwd && found_group {
break;
}
} }
} }
Ok((result, passwd)) Ok((result, passwd, group))
} }
pub async fn get_container(config: &Config, image: &str) -> Result<Container> { pub async fn get_container(config: &Config, image: &str) -> Result<Container> {
if let Some(socket_path) = &config.containerd_socket_path { if let Some(socket_path) = &config.containerd_socket_path {
return Container::new_containerd_pull( return Container::new_containerd_pull(config, image, socket_path).await;
config.layers_cache_file_path.clone(),
image,
socket_path,
)
.await;
} }
Container::new(config, image).await Container::new(config, image).await
} }
@ -643,7 +782,7 @@ fn build_auth(reference: &Reference) -> RegistryAuth {
RegistryAuth::Anonymous RegistryAuth::Anonymous
} }
fn parse_passwd_file(passwd: String) -> Result<Vec<PasswdRecord>> { fn parse_passwd_file(passwd: &str) -> Result<Vec<PasswdRecord>> {
let mut records = Vec::new(); let mut records = Vec::new();
for rec in passwd.lines() { for rec in passwd.lines() {
@ -670,3 +809,28 @@ fn parse_passwd_file(passwd: String) -> Result<Vec<PasswdRecord>> {
Ok(records) Ok(records)
} }
fn parse_group_file(group: &str) -> Result<Vec<GroupRecord>> {
let mut records = Vec::new();
for rec in group.lines() {
let fields: Vec<&str> = rec.split(':').collect();
let field_count = fields.len();
if field_count != 4 {
return Err(anyhow!(
"Incorrect group record, expected 3 fields, got {}",
field_count
));
}
records.push(GroupRecord {
name: fields[0].to_string(),
validate: fields[1] == "x",
gid: fields[2].parse().unwrap(),
user_list: fields[3].to_string(),
});
}
Ok(records)
}

View File

@ -7,8 +7,9 @@
#![allow(non_snake_case)] #![allow(non_snake_case)]
use crate::registry::{ use crate::registry::{
add_verity_and_users_to_store, get_verity_hash_and_users, read_verity_and_users_from_store, add_verity_and_users_to_store, get_verity_hash_and_users, read_verity_and_users_from_store,
Container, DockerConfigLayer, ImageLayer, Container, DockerConfigLayer, ImageLayer, WHITEOUT_MARKER,
}; };
use crate::utils::Config;
use anyhow::{anyhow, bail, Result}; use anyhow::{anyhow, bail, Result};
use containerd_client::{services::v1::GetImageRequest, with_namespace}; use containerd_client::{services::v1::GetImageRequest, with_namespace};
@ -28,7 +29,7 @@ use tower::service_fn;
impl Container { impl Container {
pub async fn new_containerd_pull( pub async fn new_containerd_pull(
layers_cache_file_path: Option<String>, config: &Config,
image: &str, image: &str,
containerd_socket_path: &str, containerd_socket_path: &str,
) -> Result<Self> { ) -> Result<Self> {
@ -60,17 +61,42 @@ impl Container {
.await .await
.unwrap(); .unwrap();
let image_layers = get_image_layers( let image_layers = get_image_layers(
layers_cache_file_path, config.layers_cache_file_path.clone(),
&manifest, &manifest,
&config_layer, &config_layer,
&ctrd_client, &ctrd_client,
) )
.await?; .await?;
// Find the last layer with an /etc/* file, respecting whiteouts.
let mut passwd = String::new();
let mut group = String::new();
// Nydus/guest_pull doesn't make available passwd/group files from layers properly.
// See issue https://github.com/kata-containers/kata-containers/issues/11162
if !config.settings.cluster_config.guest_pull {
for layer in &image_layers {
if layer.passwd == WHITEOUT_MARKER {
passwd = String::new();
} else if !layer.passwd.is_empty() {
passwd = layer.passwd.clone();
}
if layer.group == WHITEOUT_MARKER {
group = String::new();
} else if !layer.group.is_empty() {
group = layer.group.clone();
}
}
} else {
info!("Guest pull is enabled, skipping passwd/group file parsing");
}
Ok(Container { Ok(Container {
image: image_str, image: image_str,
config_layer, config_layer,
image_layers, image_layers,
passwd,
group,
}) })
} }
} }
@ -265,7 +291,7 @@ pub async fn get_image_layers(
|| layer_media_type.eq("application/vnd.oci.image.layer.v1.tar+gzip") || layer_media_type.eq("application/vnd.oci.image.layer.v1.tar+gzip")
{ {
if layer_index < config_layer.rootfs.diff_ids.len() { if layer_index < config_layer.rootfs.diff_ids.len() {
let (verity_hash, passwd) = get_verity_and_users( let (verity_hash, passwd, group) = get_verity_and_users(
layers_cache_file_path.clone(), layers_cache_file_path.clone(),
layer["digest"].as_str().unwrap(), layer["digest"].as_str().unwrap(),
client, client,
@ -276,6 +302,7 @@ pub async fn get_image_layers(
diff_id: config_layer.rootfs.diff_ids[layer_index].clone(), diff_id: config_layer.rootfs.diff_ids[layer_index].clone(),
verity_hash, verity_hash,
passwd, passwd,
group,
}; };
layersVec.push(imageLayer); layersVec.push(imageLayer);
} else { } else {
@ -293,7 +320,7 @@ async fn get_verity_and_users(
layer_digest: &str, layer_digest: &str,
client: &containerd_client::Client, client: &containerd_client::Client,
diff_id: &str, diff_id: &str,
) -> Result<(String, String)> { ) -> Result<(String, String, String)> {
let temp_dir = tempfile::tempdir_in(".")?; let temp_dir = tempfile::tempdir_in(".")?;
let base_dir = temp_dir.path(); let base_dir = temp_dir.path();
// Use file names supported by both Linux and Windows. // Use file names supported by both Linux and Windows.
@ -306,13 +333,12 @@ async fn get_verity_and_users(
let mut verity_hash = "".to_string(); let mut verity_hash = "".to_string();
let mut passwd = "".to_string(); let mut passwd = "".to_string();
let mut group = "".to_string();
let mut error_message = "".to_string(); let mut error_message = "".to_string();
let mut error = false; let mut error = false;
if let Some(path) = layers_cache_file_path.as_ref() { if let Some(path) = layers_cache_file_path.as_ref() {
let res = read_verity_and_users_from_store(path, diff_id)?; (verity_hash, passwd, group) = read_verity_and_users_from_store(path, diff_id)?;
verity_hash = res.0;
passwd = res.1;
info!("Using cache file"); info!("Using cache file");
info!("dm-verity root hash: {verity_hash}"); info!("dm-verity root hash: {verity_hash}");
} }
@ -338,10 +364,15 @@ async fn get_verity_and_users(
error = true; error = true;
} }
Ok(res) => { Ok(res) => {
verity_hash = res.0; (verity_hash, passwd, group) = res;
passwd = res.1;
if let Some(path) = layers_cache_file_path.as_ref() { if let Some(path) = layers_cache_file_path.as_ref() {
add_verity_and_users_to_store(path, diff_id, &verity_hash, &passwd)?; add_verity_and_users_to_store(
path,
diff_id,
&verity_hash,
&passwd,
&group,
)?;
} }
info!("dm-verity root hash: {verity_hash}"); info!("dm-verity root hash: {verity_hash}");
} }
@ -356,7 +387,7 @@ async fn get_verity_and_users(
} }
bail!(error_message); bail!(error_message);
} }
Ok((verity_hash, passwd)) Ok((verity_hash, passwd, group))
} }
async fn create_decompressed_layer_file( async fn create_decompressed_layer_file(

View File

@ -109,8 +109,12 @@ impl yaml::K8sResource for ReplicaSet {
false false
} }
fn get_process_fields(&self, process: &mut policy::KataProcess) { fn get_process_fields(&self, process: &mut policy::KataProcess, must_check_passwd: &mut bool) {
yaml::get_process_fields(process, &self.spec.template.spec.securityContext); yaml::get_process_fields(
process,
&self.spec.template.spec.securityContext,
must_check_passwd,
);
} }
fn get_sysctls(&self) -> Vec<pod::Sysctl> { fn get_sysctls(&self) -> Vec<pod::Sysctl> {

View File

@ -111,8 +111,12 @@ impl yaml::K8sResource for ReplicationController {
false false
} }
fn get_process_fields(&self, process: &mut policy::KataProcess) { fn get_process_fields(&self, process: &mut policy::KataProcess, must_check_passwd: &mut bool) {
yaml::get_process_fields(process, &self.spec.template.spec.securityContext); yaml::get_process_fields(
process,
&self.spec.template.spec.securityContext,
must_check_passwd,
);
} }
fn get_sysctls(&self) -> Vec<pod::Sysctl> { fn get_sysctls(&self) -> Vec<pod::Sysctl> {

View File

@ -193,8 +193,12 @@ impl yaml::K8sResource for StatefulSet {
.or_else(|| Some(String::new())) .or_else(|| Some(String::new()))
} }
fn get_process_fields(&self, process: &mut policy::KataProcess) { fn get_process_fields(&self, process: &mut policy::KataProcess, must_check_passwd: &mut bool) {
yaml::get_process_fields(process, &self.spec.template.spec.securityContext); yaml::get_process_fields(
process,
&self.spec.template.spec.securityContext,
must_check_passwd,
);
} }
fn get_sysctls(&self) -> Vec<pod::Sysctl> { fn get_sysctls(&self) -> Vec<pod::Sysctl> {

View File

@ -96,7 +96,11 @@ pub trait K8sResource {
None None
} }
fn get_process_fields(&self, _process: &mut policy::KataProcess) { fn get_process_fields(
&self,
_process: &mut policy::KataProcess,
_must_check_passwd: &mut bool,
) {
// No need to implement support for securityContext or similar fields // No need to implement support for securityContext or similar fields
// for some of the K8s resource types. // for some of the K8s resource types.
} }
@ -386,10 +390,36 @@ fn handle_unused_field(path: &str, silent_unsupported_fields: bool) {
pub fn get_process_fields( pub fn get_process_fields(
process: &mut policy::KataProcess, process: &mut policy::KataProcess,
security_context: &Option<pod::PodSecurityContext>, security_context: &Option<pod::PodSecurityContext>,
must_check_passwd: &mut bool,
) { ) {
if let Some(context) = security_context { if let Some(context) = security_context {
if let Some(uid) = context.runAsUser { if let Some(uid) = context.runAsUser {
process.User.UID = uid.try_into().unwrap(); process.User.UID = uid.try_into().unwrap();
// Changing the UID can break the GID mapping
// if a /etc/passwd file is present.
// The proper GID is determined, in order of preference:
// 1. the securityContext runAsGroup field (applied last in code)
// 2. lacking an explicit runAsGroup, /etc/passwd
// (parsed in policy::get_container_process())
// 3. lacking an /etc/passwd, 0 (unwrap_or)
//
// This behavior comes from the containerd runtime implementation:
// WithUser https://github.com/containerd/containerd/blob/main/pkg/oci/spec_opts.go#L592
//
// We can't parse the /etc/passwd file here because
// we are in the resource context. Defer execution to outside
// the resource context, in policy::get_container_process()
// IFF the UID is changed by the resource securityContext but not the GID.
*must_check_passwd = true;
}
if let Some(gid) = context.runAsGroup {
process.User.GID = gid.try_into().unwrap();
*must_check_passwd = false;
}
if let Some(allow) = context.allowPrivilegeEscalation {
process.NoNewPrivileges = !allow
} }
} }
} }

View File

@ -197,4 +197,9 @@ mod tests {
async fn test_state_exec_process() { async fn test_state_exec_process() {
runtests("state/execprocess").await; runtests("state/execprocess").await;
} }
#[tokio::test]
async fn test_create_container_security_context() {
runtests("createcontainer/security_context").await;
}
} }

View File

@ -65,7 +65,8 @@
"SelinuxLabel": "", "SelinuxLabel": "",
"User": { "User": {
"Username": "", "Username": "",
"UID": 65535 "UID": 65535,
"GID": 65535
}, },
"Args": [ "Args": [
"/pause" "/pause"
@ -197,7 +198,8 @@
"SelinuxLabel": "", "SelinuxLabel": "",
"User": { "User": {
"Username": "", "Username": "",
"UID": 65535 "UID": 65535,
"GID": 65535
}, },
"Args": [ "Args": [
"/pause" "/pause"
@ -263,4 +265,4 @@
} }
} }
} }
] ]

View File

@ -65,7 +65,8 @@
"SelinuxLabel": "", "SelinuxLabel": "",
"User": { "User": {
"Username": "", "Username": "",
"UID": 65535 "UID": 65535,
"GID": 65535
}, },
"Args": [ "Args": [
"/pause" "/pause"
@ -197,7 +198,8 @@
"SelinuxLabel": "", "SelinuxLabel": "",
"User": { "User": {
"Username": "", "Username": "",
"UID": 65535 "UID": 65535,
"GID": 65535
}, },
"Args": [ "Args": [
"/pause" "/pause"
@ -325,7 +327,8 @@
"SelinuxLabel": "", "SelinuxLabel": "",
"User": { "User": {
"Username": "", "Username": "",
"UID": 65535 "UID": 65535,
"GID": 65535
}, },
"Args": [ "Args": [
"/pause" "/pause"
@ -457,7 +460,8 @@
"SelinuxLabel": "", "SelinuxLabel": "",
"User": { "User": {
"Username": "", "Username": "",
"UID": 65535 "UID": 65535,
"GID": 65535
}, },
"Args": [ "Args": [
"/pause" "/pause"
@ -523,4 +527,4 @@
} }
} }
} }
] ]

View File

@ -0,0 +1,12 @@
apiVersion: v1
kind: Pod
metadata:
name: dummy
spec:
runtimeClassName: kata-cc-isolation
securityContext:
runAsUser: 65534
runAsGroup: 65534
containers:
- name: dummy
image: quay.io/opstree/redis@sha256:2642c7b07713df6897fa88cbe6db85170690cf3650018ceb2ab16cfa0b4f8d48

View File

@ -0,0 +1,737 @@
[
{
"description": "Correct User for security context",
"allowed": true,
"request": {
"type": "CreateContainer",
"OCI": {
"Annotations": {
"io.katacontainers.pkg.oci.bundle_path": "/run/containerd/io.containerd.runtime.v2.task/k8s.io/a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4",
"io.katacontainers.pkg.oci.container_type": "pod_sandbox",
"io.kubernetes.cri.container-type": "sandbox",
"io.kubernetes.cri.sandbox-cpu-period": "100000",
"io.kubernetes.cri.sandbox-cpu-quota": "0",
"io.kubernetes.cri.sandbox-cpu-shares": "2",
"io.kubernetes.cri.sandbox-id": "a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4",
"io.kubernetes.cri.sandbox-log-directory": "/var/log/pods/kata-containers-k8s-tests_dummy_fd055c20-d44c-4fc5-aa90-283f629201af",
"io.kubernetes.cri.sandbox-memory": "0",
"io.kubernetes.cri.sandbox-name": "dummy",
"io.kubernetes.cri.sandbox-namespace": "kata-containers-k8s-tests",
"io.kubernetes.cri.sandbox-uid": "fd055c20-d44c-4fc5-aa90-283f629201af",
"nerdctl/network-namespace": "/var/run/netns/cni-50720768-bd65-bf4b-6185-5d5a2adf5305"
},
"Hooks": null,
"Hostname": "dummy",
"Linux": {
"CgroupsPath": "kubepods-besteffort-podfd055c20_d44c_4fc5_aa90_283f629201af.slice:cri-containerd:a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4",
"Devices": [],
"GIDMappings": [],
"IntelRdt": null,
"MaskedPaths": [
"/proc/acpi",
"/proc/asound",
"/proc/kcore",
"/proc/keys",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/sys/firmware",
"/sys/devices/virtual/powercap",
"/proc/scsi"
],
"MountLabel": "",
"Namespaces": [
{
"Path": "",
"Type": "ipc"
},
{
"Path": "",
"Type": "uts"
},
{
"Path": "",
"Type": "mount"
}
],
"ReadonlyPaths": [
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger"
],
"Resources": {
"BlockIO": null,
"CPU": {
"Cpus": "",
"Mems": "",
"Period": 0,
"Quota": 0,
"RealtimePeriod": 0,
"RealtimeRuntime": 0,
"Shares": 2
},
"Devices": [],
"HugepageLimits": [],
"Memory": null,
"Network": null,
"Pids": null
},
"RootfsPropagation": "",
"Seccomp": null,
"Sysctl": {},
"UIDMappings": []
},
"Mounts": [
{
"destination": "/proc",
"options": [
"nosuid",
"noexec",
"nodev"
],
"source": "proc",
"type_": "proc"
},
{
"destination": "/dev",
"options": [
"nosuid",
"strictatime",
"mode=755",
"size=65536k"
],
"source": "tmpfs",
"type_": "tmpfs"
},
{
"destination": "/dev/pts",
"options": [
"nosuid",
"noexec",
"newinstance",
"ptmxmode=0666",
"mode=0620",
"gid=5"
],
"source": "devpts",
"type_": "devpts"
},
{
"destination": "/dev/mqueue",
"options": [
"nosuid",
"noexec",
"nodev"
],
"source": "mqueue",
"type_": "mqueue"
},
{
"destination": "/sys",
"options": [
"nosuid",
"noexec",
"nodev",
"ro"
],
"source": "sysfs",
"type_": "sysfs"
},
{
"destination": "/dev/shm",
"options": [
"rbind"
],
"source": "/run/kata-containers/sandbox/shm",
"type_": "bind"
},
{
"destination": "/etc/resolv.conf",
"options": [
"rbind",
"ro",
"nosuid",
"nodev",
"noexec"
],
"source": "/run/kata-containers/shared/containers/a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4-8f7f27d37e8af290-resolv.conf",
"type_": "bind"
}
],
"Process": {
"ApparmorProfile": "",
"Args": [
"/pause"
],
"Capabilities": {
"Ambient": [],
"Bounding": [
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE"
],
"Effective": [
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE"
],
"Inheritable": [],
"Permitted": [
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE"
]
},
"ConsoleSize": null,
"Cwd": "/",
"Env": [
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
],
"NoNewPrivileges": true,
"OOMScoreAdj": -998,
"Rlimits": [],
"SelinuxLabel": "",
"Terminal": false,
"User": {
"GID": 65534,
"UID": 65534,
"Username": ""
}
},
"Root": {
"Path": "/run/kata-containers/shared/containers/a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4/rootfs",
"Readonly": true
},
"Solaris": null,
"Version": "1.1.0",
"Windows": null
}
}
},
{
"description": "Incorrect User.UID for security context",
"allowed": false,
"request": {
"type": "CreateContainer",
"OCI": {
"Annotations": {
"io.katacontainers.pkg.oci.bundle_path": "/run/containerd/io.containerd.runtime.v2.task/k8s.io/a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4",
"io.katacontainers.pkg.oci.container_type": "pod_sandbox",
"io.kubernetes.cri.container-type": "sandbox",
"io.kubernetes.cri.sandbox-cpu-period": "100000",
"io.kubernetes.cri.sandbox-cpu-quota": "0",
"io.kubernetes.cri.sandbox-cpu-shares": "2",
"io.kubernetes.cri.sandbox-id": "a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4",
"io.kubernetes.cri.sandbox-log-directory": "/var/log/pods/kata-containers-k8s-tests_dummy_fd055c20-d44c-4fc5-aa90-283f629201af",
"io.kubernetes.cri.sandbox-memory": "0",
"io.kubernetes.cri.sandbox-name": "dummy",
"io.kubernetes.cri.sandbox-namespace": "kata-containers-k8s-tests",
"io.kubernetes.cri.sandbox-uid": "fd055c20-d44c-4fc5-aa90-283f629201af",
"nerdctl/network-namespace": "/var/run/netns/cni-50720768-bd65-bf4b-6185-5d5a2adf5305"
},
"Hooks": null,
"Hostname": "dummy",
"Linux": {
"CgroupsPath": "kubepods-besteffort-podfd055c20_d44c_4fc5_aa90_283f629201af.slice:cri-containerd:a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4",
"Devices": [],
"GIDMappings": [],
"IntelRdt": null,
"MaskedPaths": [
"/proc/acpi",
"/proc/asound",
"/proc/kcore",
"/proc/keys",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/sys/firmware",
"/sys/devices/virtual/powercap",
"/proc/scsi"
],
"MountLabel": "",
"Namespaces": [
{
"Path": "",
"Type": "ipc"
},
{
"Path": "",
"Type": "uts"
},
{
"Path": "",
"Type": "mount"
}
],
"ReadonlyPaths": [
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger"
],
"Resources": {
"BlockIO": null,
"CPU": {
"Cpus": "",
"Mems": "",
"Period": 0,
"Quota": 0,
"RealtimePeriod": 0,
"RealtimeRuntime": 0,
"Shares": 2
},
"Devices": [],
"HugepageLimits": [],
"Memory": null,
"Network": null,
"Pids": null
},
"RootfsPropagation": "",
"Seccomp": null,
"Sysctl": {},
"UIDMappings": []
},
"Mounts": [
{
"destination": "/proc",
"options": [
"nosuid",
"noexec",
"nodev"
],
"source": "proc",
"type_": "proc"
},
{
"destination": "/dev",
"options": [
"nosuid",
"strictatime",
"mode=755",
"size=65536k"
],
"source": "tmpfs",
"type_": "tmpfs"
},
{
"destination": "/dev/pts",
"options": [
"nosuid",
"noexec",
"newinstance",
"ptmxmode=0666",
"mode=0620",
"gid=5"
],
"source": "devpts",
"type_": "devpts"
},
{
"destination": "/dev/mqueue",
"options": [
"nosuid",
"noexec",
"nodev"
],
"source": "mqueue",
"type_": "mqueue"
},
{
"destination": "/sys",
"options": [
"nosuid",
"noexec",
"nodev",
"ro"
],
"source": "sysfs",
"type_": "sysfs"
},
{
"destination": "/dev/shm",
"options": [
"rbind"
],
"source": "/run/kata-containers/sandbox/shm",
"type_": "bind"
},
{
"destination": "/etc/resolv.conf",
"options": [
"rbind",
"ro",
"nosuid",
"nodev",
"noexec"
],
"source": "/run/kata-containers/shared/containers/a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4-8f7f27d37e8af290-resolv.conf",
"type_": "bind"
}
],
"Process": {
"ApparmorProfile": "",
"Args": [
"/pause"
],
"Capabilities": {
"Ambient": [],
"Bounding": [
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE"
],
"Effective": [
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE"
],
"Inheritable": [],
"Permitted": [
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE"
]
},
"ConsoleSize": null,
"Cwd": "/",
"Env": [
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
],
"NoNewPrivileges": true,
"OOMScoreAdj": -998,
"Rlimits": [],
"SelinuxLabel": "",
"Terminal": false,
"User": {
"GID": 65534,
"UID": 0,
"Username": ""
}
},
"Root": {
"Path": "/run/kata-containers/shared/containers/a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4/rootfs",
"Readonly": true
},
"Solaris": null,
"Version": "1.1.0",
"Windows": null
}
}
},
{
"description": "Incorrect User.GID for security context",
"allowed": false,
"request": {
"type": "CreateContainer",
"OCI": {
"Annotations": {
"io.katacontainers.pkg.oci.bundle_path": "/run/containerd/io.containerd.runtime.v2.task/k8s.io/a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4",
"io.katacontainers.pkg.oci.container_type": "pod_sandbox",
"io.kubernetes.cri.container-type": "sandbox",
"io.kubernetes.cri.sandbox-cpu-period": "100000",
"io.kubernetes.cri.sandbox-cpu-quota": "0",
"io.kubernetes.cri.sandbox-cpu-shares": "2",
"io.kubernetes.cri.sandbox-id": "a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4",
"io.kubernetes.cri.sandbox-log-directory": "/var/log/pods/kata-containers-k8s-tests_dummy_fd055c20-d44c-4fc5-aa90-283f629201af",
"io.kubernetes.cri.sandbox-memory": "0",
"io.kubernetes.cri.sandbox-name": "dummy",
"io.kubernetes.cri.sandbox-namespace": "kata-containers-k8s-tests",
"io.kubernetes.cri.sandbox-uid": "fd055c20-d44c-4fc5-aa90-283f629201af",
"nerdctl/network-namespace": "/var/run/netns/cni-50720768-bd65-bf4b-6185-5d5a2adf5305"
},
"Hooks": null,
"Hostname": "dummy",
"Linux": {
"CgroupsPath": "kubepods-besteffort-podfd055c20_d44c_4fc5_aa90_283f629201af.slice:cri-containerd:a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4",
"Devices": [],
"GIDMappings": [],
"IntelRdt": null,
"MaskedPaths": [
"/proc/acpi",
"/proc/asound",
"/proc/kcore",
"/proc/keys",
"/proc/latency_stats",
"/proc/timer_list",
"/proc/timer_stats",
"/proc/sched_debug",
"/sys/firmware",
"/sys/devices/virtual/powercap",
"/proc/scsi"
],
"MountLabel": "",
"Namespaces": [
{
"Path": "",
"Type": "ipc"
},
{
"Path": "",
"Type": "uts"
},
{
"Path": "",
"Type": "mount"
}
],
"ReadonlyPaths": [
"/proc/bus",
"/proc/fs",
"/proc/irq",
"/proc/sys",
"/proc/sysrq-trigger"
],
"Resources": {
"BlockIO": null,
"CPU": {
"Cpus": "",
"Mems": "",
"Period": 0,
"Quota": 0,
"RealtimePeriod": 0,
"RealtimeRuntime": 0,
"Shares": 2
},
"Devices": [],
"HugepageLimits": [],
"Memory": null,
"Network": null,
"Pids": null
},
"RootfsPropagation": "",
"Seccomp": null,
"Sysctl": {},
"UIDMappings": []
},
"Mounts": [
{
"destination": "/proc",
"options": [
"nosuid",
"noexec",
"nodev"
],
"source": "proc",
"type_": "proc"
},
{
"destination": "/dev",
"options": [
"nosuid",
"strictatime",
"mode=755",
"size=65536k"
],
"source": "tmpfs",
"type_": "tmpfs"
},
{
"destination": "/dev/pts",
"options": [
"nosuid",
"noexec",
"newinstance",
"ptmxmode=0666",
"mode=0620",
"gid=5"
],
"source": "devpts",
"type_": "devpts"
},
{
"destination": "/dev/mqueue",
"options": [
"nosuid",
"noexec",
"nodev"
],
"source": "mqueue",
"type_": "mqueue"
},
{
"destination": "/sys",
"options": [
"nosuid",
"noexec",
"nodev",
"ro"
],
"source": "sysfs",
"type_": "sysfs"
},
{
"destination": "/dev/shm",
"options": [
"rbind"
],
"source": "/run/kata-containers/sandbox/shm",
"type_": "bind"
},
{
"destination": "/etc/resolv.conf",
"options": [
"rbind",
"ro",
"nosuid",
"nodev",
"noexec"
],
"source": "/run/kata-containers/shared/containers/a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4-8f7f27d37e8af290-resolv.conf",
"type_": "bind"
}
],
"Process": {
"ApparmorProfile": "",
"Args": [
"/pause"
],
"Capabilities": {
"Ambient": [],
"Bounding": [
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE"
],
"Effective": [
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE"
],
"Inheritable": [],
"Permitted": [
"CAP_CHOWN",
"CAP_DAC_OVERRIDE",
"CAP_FSETID",
"CAP_FOWNER",
"CAP_MKNOD",
"CAP_NET_RAW",
"CAP_SETGID",
"CAP_SETUID",
"CAP_SETFCAP",
"CAP_SETPCAP",
"CAP_NET_BIND_SERVICE",
"CAP_SYS_CHROOT",
"CAP_KILL",
"CAP_AUDIT_WRITE"
]
},
"ConsoleSize": null,
"Cwd": "/",
"Env": [
"PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
],
"NoNewPrivileges": true,
"OOMScoreAdj": -998,
"Rlimits": [],
"SelinuxLabel": "",
"Terminal": false,
"User": {
"GID": 65534,
"UID": 0,
"Username": ""
}
},
"Root": {
"Path": "/run/kata-containers/shared/containers/a10abe57d2a2e47c30d5bd2427170e019fddc587a59d173544d87842f1905da4/rootfs",
"Readonly": true
},
"Solaris": null,
"Version": "1.1.0",
"Windows": null
}
}
}
]

View File

@ -2,7 +2,9 @@
{ {
"description": "sysctls listed in yaml or settings", "description": "sysctls listed in yaml or settings",
"allowed": true, "allowed": true,
"state": {"sandbox_name": "policy-redis-deployment-6674f9448-xjrzf"}, "state": {
"sandbox_name": "policy-redis-deployment-6674f9448-xjrzf"
},
"request": { "request": {
"type": "CreateContainer", "type": "CreateContainer",
"OCI": { "OCI": {
@ -238,7 +240,7 @@
"AdditionalGids": [ "AdditionalGids": [
0 0
], ],
"GID": 0, "GID": 65535,
"UID": 65535, "UID": 65535,
"Username": "" "Username": ""
} }
@ -278,7 +280,9 @@
{ {
"description": "sysctl not listed in yaml or settings", "description": "sysctl not listed in yaml or settings",
"allowed": false, "allowed": false,
"state": {"sandbox_name": "policy-redis-deployment-6674f9448-xjrzf"}, "state": {
"sandbox_name": "policy-redis-deployment-6674f9448-xjrzf"
},
"request": { "request": {
"type": "CreateContainer", "type": "CreateContainer",
"OCI": { "OCI": {
@ -514,7 +518,7 @@
"AdditionalGids": [ "AdditionalGids": [
0 0
], ],
"GID": 0, "GID": 65535,
"UID": 65535, "UID": 65535,
"Username": "" "Username": ""
} }
@ -551,4 +555,4 @@
"string_user": null "string_user": null
} }
} }
] ]

View File

@ -233,7 +233,7 @@
"AdditionalGids": [ "AdditionalGids": [
0 0
], ],
"GID": 0, "GID": 65535,
"UID": 65535, "UID": 65535,
"Username": "" "Username": ""
} }
@ -279,4 +279,4 @@
"timeout": 0 "timeout": 0
} }
} }
] ]

View File

@ -0,0 +1,103 @@
#!/usr/bin/env bats
#
# Copyright (c) 2024 Microsoft.
#
# SPDX-License-Identifier: Apache-2.0
#
load "${BATS_TEST_DIRNAME}/../../common.bash"
load "${BATS_TEST_DIRNAME}/tests_common.sh"
setup() {
auto_generate_policy_enabled || skip "Auto-generated policy tests are disabled."
get_pod_config_dir
deployment_name="policy-redis-deployment"
pod_sc_deployment_yaml="${pod_config_dir}/k8s-pod-sc-deployment.yaml"
pod_sc_nobodyupdate_deployment_yaml="${pod_config_dir}/k8s-pod-sc-nobodyupdate-deployment.yaml"
pod_sc_layered_deployment_yaml="${pod_config_dir}/k8s-layered-sc-deployment.yaml"
# Save some time by executing genpolicy a single time.
if [ "${BATS_TEST_NUMBER}" == "1" ]; then
# Add an appropriate policy to the correct YAML file.
policy_settings_dir="$(create_tmp_policy_settings_dir "${pod_config_dir}")"
add_requests_to_policy_settings "${policy_settings_dir}" "ReadStreamRequest"
auto_generate_policy "${policy_settings_dir}" "${pod_sc_deployment_yaml}"
auto_generate_policy "${policy_settings_dir}" "${pod_sc_nobodyupdate_deployment_yaml}"
auto_generate_policy "${policy_settings_dir}" "${pod_sc_layered_deployment_yaml}"
fi
# Start each test case with a copy of the correct yaml file.
incorrect_deployment_yaml="${pod_config_dir}/k8s-layered-sc-deployment-incorrect.yaml"
cp "${pod_sc_layered_deployment_yaml}" "${incorrect_deployment_yaml}"
}
@test "Successful sc deployment with auto-generated policy and container image volumes" {
# Initiate deployment
kubectl apply -f "${pod_sc_deployment_yaml}"
# Wait for the deployment to be created
cmd="kubectl rollout status --timeout=1s deployment/${deployment_name} | grep 'successfully rolled out'"
info "Waiting for: ${cmd}"
waitForProcess "${wait_time}" "${sleep_time}" "${cmd}"
}
@test "Successful sc deployment with security context choosing another valid user" {
# Initiate deployment
kubectl apply -f "${pod_sc_nobodyupdate_deployment_yaml}"
# Wait for the deployment to be created
cmd="kubectl rollout status --timeout=1s deployment/${deployment_name} | grep 'successfully rolled out'"
info "Waiting for: ${cmd}"
waitForProcess "${wait_time}" "${sleep_time}" "${cmd}"
}
@test "Successful layered sc deployment with auto-generated policy and container image volumes" {
# Initiate deployment
kubectl apply -f "${pod_sc_layered_deployment_yaml}"
# Wait for the deployment to be created
cmd="kubectl rollout status --timeout=1s deployment/${deployment_name} | grep 'successfully rolled out'"
info "Waiting for: ${cmd}"
waitForProcess "${wait_time}" "${sleep_time}" "${cmd}"
}
test_deployment_policy_error() {
# Initiate deployment
kubectl apply -f "${incorrect_deployment_yaml}"
# Wait for the deployment pod to fail
wait_for_blocked_request "CreateContainerRequest" "${deployment_name}"
}
@test "Policy failure: unexpected GID = 0 for layered securityContext deployment" {
# Change the pod GID to 0 after the policy has been generated using a different
# runAsGroup value. The policy would use GID = 0 by default, if there weren't
# a different runAsGroup value in the YAML file.
yq -i \
'.spec.template.spec.securityContext.runAsGroup = 0' \
"${incorrect_deployment_yaml}"
test_deployment_policy_error
}
teardown() {
auto_generate_policy_enabled || skip "Auto-generated policy tests are disabled."
# Pod debugging information. Don't print the "Message:" line because it contains a truncated policy log.
info "Pod ${deployment_name}:"
kubectl describe pod "${deployment_name}" | grep -v "Message:"
# Deployment debugging information. The --watch=false argument makes "kubectl rollout status"
# return instead of waiting for a possibly failed deployment to complete.
info "Deployment ${deployment_name}:"
kubectl describe deployment "${deployment_name}"
kubectl rollout status deployment/${deployment_name} --watch=false
# Clean-up
kubectl delete deployment "${deployment_name}"
delete_tmp_policy_settings_dir "${policy_settings_dir}"
rm -f "${incorrect_deployment_yaml}"
}

View File

@ -74,6 +74,7 @@ else
"k8s-pod-quota.bats" \ "k8s-pod-quota.bats" \
"k8s-policy-hard-coded.bats" \ "k8s-policy-hard-coded.bats" \
"k8s-policy-deployment.bats" \ "k8s-policy-deployment.bats" \
"k8s-policy-deployment-sc.bats" \
"k8s-policy-job.bats" \ "k8s-policy-job.bats" \
"k8s-policy-logs.bats" \ "k8s-policy-logs.bats" \
"k8s-policy-pod.bats" \ "k8s-policy-pod.bats" \

View File

@ -0,0 +1,41 @@
#
# Copyright (c) 2024 Microsoft
#
# SPDX-License-Identifier: Apache-2.0
#
apiVersion: apps/v1
kind: Deployment
metadata:
name: policy-redis-deployment
labels:
app: policyredis
spec:
selector:
matchLabels:
app: policyredis
role: master
tier: backend
replicas: 1
template:
metadata:
labels:
app: policyredis
role: master
tier: backend
spec:
terminationGracePeriodSeconds: 0
runtimeClassName: kata
securityContext:
runAsUser: 2000
runAsGroup: 2000
containers:
- name: master
image: quay.io/opstree/redis@sha256:2642c7b07713df6897fa88cbe6db85170690cf3650018ceb2ab16cfa0b4f8d48
securityContext:
runAsUser: 3000
resources:
requests:
cpu: 100m
memory: 100Mi
ports:
- containerPort: 6379

View File

@ -0,0 +1,39 @@
#
# Copyright (c) 2024 Microsoft
#
# SPDX-License-Identifier: Apache-2.0
#
apiVersion: apps/v1
kind: Deployment
metadata:
name: policy-redis-deployment
labels:
app: policyredis
spec:
selector:
matchLabels:
app: policyredis
role: master
tier: backend
replicas: 1
template:
metadata:
labels:
app: policyredis
role: master
tier: backend
spec:
terminationGracePeriodSeconds: 0
runtimeClassName: kata
securityContext:
runAsUser: 2000
runAsGroup: 2000
containers:
- name: master
image: quay.io/opstree/redis@sha256:2642c7b07713df6897fa88cbe6db85170690cf3650018ceb2ab16cfa0b4f8d48
resources:
requests:
cpu: 100m
memory: 100Mi
ports:
- containerPort: 6379

View File

@ -0,0 +1,38 @@
#
# Copyright (c) 2024 Microsoft
#
# SPDX-License-Identifier: Apache-2.0
#
apiVersion: apps/v1
kind: Deployment
metadata:
name: policy-redis-deployment
labels:
app: policyredis
spec:
selector:
matchLabels:
app: policyredis
role: master
tier: backend
replicas: 1
template:
metadata:
labels:
app: policyredis
role: master
tier: backend
spec:
terminationGracePeriodSeconds: 0
runtimeClassName: kata
securityContext:
runAsUser: 65534
containers:
- name: master
image: quay.io/opstree/redis@sha256:2642c7b07713df6897fa88cbe6db85170690cf3650018ceb2ab16cfa0b4f8d48
resources:
requests:
cpu: 100m
memory: 100Mi
ports:
- containerPort: 6379

View File

@ -116,6 +116,15 @@ adapt_common_policy_settings_for_cbl_mariner() {
true true
} }
# adapt common policy settings for guest-pull Hosts
# see issue https://github.com/kata-containers/kata-containers/issues/11162
adapt_common_policy_settings_for_guest_pull() {
local settings_dir=$1
info "Adapting common policy settings for guest-pull environment"
jq '.cluster_config.guest_pull = true' "${settings_dir}/genpolicy-settings.json" > temp.json && sudo mv temp.json "${settings_dir}/genpolicy-settings.json"
}
# adapt common policy settings for various platforms # adapt common policy settings for various platforms
adapt_common_policy_settings() { adapt_common_policy_settings() {
local settings_dir=$1 local settings_dir=$1
@ -143,6 +152,12 @@ adapt_common_policy_settings() {
adapt_common_policy_settings_for_cbl_mariner "${settings_dir}" adapt_common_policy_settings_for_cbl_mariner "${settings_dir}"
;; ;;
esac esac
case "${PULL_TYPE}" in
"guest-pull")
adapt_common_policy_settings_for_guest_pull "${settings_dir}"
;;
esac
} }
# If auto-generated policy testing is enabled, make a copy of the genpolicy settings, # If auto-generated policy testing is enabled, make a copy of the genpolicy settings,