mirror of
https://github.com/kata-containers/kata-containers.git
synced 2026-07-01 22:50:54 +00:00
Merge pull request #13214 from fidencio/topic/block-volume-readonly-propagation
runtime(-rs): Propagate host block device read-only flag to the VMM
This commit is contained in:
@@ -12,7 +12,7 @@ use async_trait::async_trait;
|
||||
use hypervisor::{
|
||||
device::{
|
||||
device_manager::{do_handle_device, get_block_device_info, DeviceManager},
|
||||
util::{get_host_path, DEVICE_TYPE_CHAR},
|
||||
util::{get_host_path, DEVICE_TYPE_BLOCK, DEVICE_TYPE_CHAR},
|
||||
DeviceConfig, DeviceType,
|
||||
},
|
||||
utils::uses_native_ccw_bus,
|
||||
@@ -41,7 +41,7 @@ use crate::{
|
||||
resource_persist::ResourceState,
|
||||
rootfs::{RootFsResource, Rootfs},
|
||||
share_fs::{self, sandbox_bind_mounts::SandboxBindMounts, NydusShareFs, ShareFs},
|
||||
volume::{Volume, VolumeResource},
|
||||
volume::{utils::is_block_device_readonly, Volume, VolumeResource},
|
||||
ResourceConfig, ResourceUpdateOp,
|
||||
};
|
||||
|
||||
@@ -535,9 +535,21 @@ impl ResourceManagerInner {
|
||||
match d.typ() {
|
||||
LinuxDeviceType::B => {
|
||||
let blkdev_info = get_block_device_info(&self.device_manager).await;
|
||||
// Read-only intent comes from the cgroup device access rule.
|
||||
// Also honor the host device's own read-only flag (BLKROGET):
|
||||
// block-mode volumes frequently carry no read-only signal in
|
||||
// the OCI spec, so the device flag is the only reliable
|
||||
// source. Either signal being positive marks it read-only.
|
||||
let is_readonly = device_cgroup_access_is_readonly(
|
||||
linux,
|
||||
LinuxDeviceType::B,
|
||||
d.major(),
|
||||
d.minor(),
|
||||
) || block_device_node_is_readonly(d.major(), d.minor());
|
||||
let dev_info = DeviceConfig::BlockCfg(BlockConfig {
|
||||
major: d.major(),
|
||||
minor: d.minor(),
|
||||
is_readonly,
|
||||
driver_option: blkdev_info.block_device_driver,
|
||||
blkdev_aio: BlockDeviceAio::new(&blkdev_info.block_device_aio),
|
||||
num_queues: blkdev_info.num_queues,
|
||||
@@ -1199,3 +1211,164 @@ async fn resolve_physical_endpoint_pci_paths(
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Derive a device's read-only intent from the cgroup device access rules.
|
||||
///
|
||||
/// Block-mode volumes (e.g. Kubernetes volumeDevices) are passed as device
|
||||
/// nodes in `spec.Linux.Devices` and carry no mount "ro" option; their
|
||||
/// read-only intent is expressed solely through the cgroup device access in
|
||||
/// `spec.Linux.Resources.Devices` ("rm" = read+mknod, no write, for read-only;
|
||||
/// "rwm" for read-write).
|
||||
///
|
||||
/// The allow rule that exactly matches the device (type and exact major/minor)
|
||||
/// decides: the device is read-only when that rule grants access without the
|
||||
/// write ("w") bit. Wildcard rules (no major/minor) describe broad device
|
||||
/// classes and are ignored so they cannot override a specific device's access.
|
||||
/// If no exact rule matches, the device is left read-write.
|
||||
fn device_cgroup_access_is_readonly(
|
||||
linux: &Linux,
|
||||
dev_type: LinuxDeviceType,
|
||||
major: i64,
|
||||
minor: i64,
|
||||
) -> bool {
|
||||
let devices = match linux.resources().as_ref().and_then(|r| r.devices().as_ref()) {
|
||||
Some(devices) => devices,
|
||||
None => return false,
|
||||
};
|
||||
|
||||
for r in devices.iter() {
|
||||
if !r.allow() {
|
||||
continue;
|
||||
}
|
||||
let (rule_major, rule_minor) = match (r.major(), r.minor()) {
|
||||
(Some(major), Some(minor)) => (major, minor),
|
||||
_ => continue,
|
||||
};
|
||||
if rule_major != major || rule_minor != minor {
|
||||
continue;
|
||||
}
|
||||
// A specific type must match; `A` (all) and an unset type are wildcards.
|
||||
if let Some(typ) = r.typ() {
|
||||
if typ != LinuxDeviceType::A && typ != dev_type {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return !r.access().as_deref().unwrap_or("").contains('w');
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
/// block_device_node_is_readonly reports whether the host block device
|
||||
/// identified by major:minor advertises the read-only flag (BLKROGET). This is
|
||||
/// the ground truth for a device's writability: block-mode volumes frequently
|
||||
/// carry no read-only signal in the OCI spec, so the device flag is the only
|
||||
/// reliable source. Any failure is logged and treated as not-read-only so it
|
||||
/// can never flip a positive signal back.
|
||||
fn block_device_node_is_readonly(major: i64, minor: i64) -> bool {
|
||||
let host_path = match get_host_path(DEVICE_TYPE_BLOCK, major, minor) {
|
||||
Ok(path) if !path.is_empty() => path,
|
||||
Ok(_) => return false,
|
||||
Err(e) => {
|
||||
warn!(
|
||||
sl!(),
|
||||
"could not resolve host path for block device {}:{}: {:?}", major, minor, e
|
||||
);
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
is_block_device_readonly(&host_path).unwrap_or_else(|e| {
|
||||
warn!(
|
||||
sl!(),
|
||||
"could not query block device read-only flag for {}: {:?}", host_path, e
|
||||
);
|
||||
false
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::device_cgroup_access_is_readonly;
|
||||
use oci_spec::runtime::{
|
||||
Linux, LinuxBuilder, LinuxDeviceCgroup, LinuxDeviceCgroupBuilder, LinuxDeviceType,
|
||||
LinuxResourcesBuilder,
|
||||
};
|
||||
use rstest::rstest;
|
||||
|
||||
const MAJOR: i64 = 8;
|
||||
const MINOR: i64 = 0;
|
||||
|
||||
fn rule(
|
||||
allow: bool,
|
||||
typ: LinuxDeviceType,
|
||||
major: Option<i64>,
|
||||
minor: Option<i64>,
|
||||
access: &str,
|
||||
) -> LinuxDeviceCgroup {
|
||||
let mut builder = LinuxDeviceCgroupBuilder::default()
|
||||
.allow(allow)
|
||||
.typ(typ)
|
||||
.access(access);
|
||||
if let Some(major) = major {
|
||||
builder = builder.major(major);
|
||||
}
|
||||
if let Some(minor) = minor {
|
||||
builder = builder.minor(minor);
|
||||
}
|
||||
builder.build().unwrap()
|
||||
}
|
||||
|
||||
fn linux_with_rules(rules: Vec<LinuxDeviceCgroup>) -> Linux {
|
||||
LinuxBuilder::default()
|
||||
.resources(
|
||||
LinuxResourcesBuilder::default()
|
||||
.devices(rules)
|
||||
.build()
|
||||
.unwrap(),
|
||||
)
|
||||
.build()
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
#[rstest]
|
||||
#[case::no_rules(vec![], false)]
|
||||
#[case::exact_match_rm(vec![rule(true, LinuxDeviceType::B, Some(MAJOR), Some(MINOR), "rm")], true)]
|
||||
#[case::exact_match_r(vec![rule(true, LinuxDeviceType::B, Some(MAJOR), Some(MINOR), "r")], true)]
|
||||
#[case::exact_match_rwm(vec![rule(true, LinuxDeviceType::B, Some(MAJOR), Some(MINOR), "rwm")], false)]
|
||||
#[case::type_all_is_wildcard(vec![rule(true, LinuxDeviceType::A, Some(MAJOR), Some(MINOR), "rm")], true)]
|
||||
#[case::deny_rule_ignored(vec![rule(false, LinuxDeviceType::B, Some(MAJOR), Some(MINOR), "rm")], false)]
|
||||
#[case::wildcard_major_ignored(vec![rule(true, LinuxDeviceType::B, None, Some(MINOR), "rm")], false)]
|
||||
#[case::wildcard_minor_ignored(vec![rule(true, LinuxDeviceType::B, Some(MAJOR), None, "rm")], false)]
|
||||
#[case::type_mismatch_ignored(vec![rule(true, LinuxDeviceType::C, Some(MAJOR), Some(MINOR), "rm")], false)]
|
||||
#[case::different_device_ignored(vec![rule(true, LinuxDeviceType::B, Some(9), Some(1), "rm")], false)]
|
||||
#[case::first_exact_match_wins(
|
||||
vec![
|
||||
rule(true, LinuxDeviceType::B, Some(MAJOR), Some(MINOR), "rm"),
|
||||
rule(true, LinuxDeviceType::B, Some(MAJOR), Some(MINOR), "rwm"),
|
||||
],
|
||||
true
|
||||
)]
|
||||
fn test_device_cgroup_access_is_readonly(
|
||||
#[case] rules: Vec<LinuxDeviceCgroup>,
|
||||
#[case] expected: bool,
|
||||
) {
|
||||
let linux = linux_with_rules(rules);
|
||||
assert_eq!(
|
||||
device_cgroup_access_is_readonly(&linux, LinuxDeviceType::B, MAJOR, MINOR),
|
||||
expected
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_no_resources() {
|
||||
let linux = LinuxBuilder::default().build().unwrap();
|
||||
assert!(!device_cgroup_access_is_readonly(
|
||||
&linux,
|
||||
LinuxDeviceType::B,
|
||||
MAJOR,
|
||||
MINOR
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,7 +5,9 @@
|
||||
//
|
||||
|
||||
use super::Volume;
|
||||
use crate::volume::utils::{handle_block_volume, DEFAULT_VOLUME_FS_TYPE, KATA_MOUNT_BIND_TYPE};
|
||||
use crate::volume::utils::{
|
||||
handle_block_volume, is_block_device_readonly, DEFAULT_VOLUME_FS_TYPE, KATA_MOUNT_BIND_TYPE,
|
||||
};
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use async_trait::async_trait;
|
||||
use hypervisor::{
|
||||
@@ -42,9 +44,25 @@ impl BlockVolume {
|
||||
|
||||
let blkdev_info = get_block_device_info(d).await;
|
||||
let fstat = stat::stat(mnt_src).context(format!("stat {}", mnt_src.display()))?;
|
||||
|
||||
// Honor the host block device's own read-only flag in addition to the
|
||||
// mount-derived intent, so a device marked read-only on the host is
|
||||
// exposed read-only to the guest.
|
||||
let read_only = read_only
|
||||
|| is_block_device_readonly(mnt_src).unwrap_or_else(|e| {
|
||||
warn!(
|
||||
sl!(),
|
||||
"could not query block device read-only flag for {}: {:?}",
|
||||
mnt_src.display(),
|
||||
e
|
||||
);
|
||||
false
|
||||
});
|
||||
|
||||
let block_device_config = BlockConfig {
|
||||
major: stat::major(fstat.st_rdev) as i64,
|
||||
minor: stat::minor(fstat.st_rdev) as i64,
|
||||
is_readonly: read_only,
|
||||
driver_option: blkdev_info.block_device_driver,
|
||||
blkdev_aio: BlockDeviceAio::new(&blkdev_info.block_device_aio),
|
||||
num_queues: blkdev_info.num_queues,
|
||||
|
||||
@@ -18,7 +18,11 @@ use nix::sys::{stat, stat::SFlag};
|
||||
use oci_spec::runtime as oci;
|
||||
use tokio::sync::RwLock;
|
||||
|
||||
use crate::volume::{direct_volumes::KATA_DIRECT_VOLUME_TYPE, utils::handle_block_volume, Volume};
|
||||
use crate::volume::{
|
||||
direct_volumes::KATA_DIRECT_VOLUME_TYPE,
|
||||
utils::{handle_block_volume, is_block_device_readonly},
|
||||
Volume,
|
||||
};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub(crate) struct RawblockVolume {
|
||||
@@ -58,8 +62,25 @@ impl RawblockVolume {
|
||||
));
|
||||
}
|
||||
|
||||
// For a real block device, honor its host read-only flag (BLKROGET) in
|
||||
// addition to the mount-derived intent, so a device marked read-only on
|
||||
// the host is exposed read-only to the guest. (Not applicable to
|
||||
// regular-file backed images.)
|
||||
let read_only = read_only
|
||||
|| (SFlag::from_bits_truncate(fstat.st_mode) == SFlag::S_IFBLK
|
||||
&& is_block_device_readonly(mount_info.device.as_str()).unwrap_or_else(|e| {
|
||||
warn!(
|
||||
sl!(),
|
||||
"could not query block device read-only flag for {}: {:?}",
|
||||
mount_info.device,
|
||||
e
|
||||
);
|
||||
false
|
||||
}));
|
||||
|
||||
let block_config = BlockConfigModern {
|
||||
path_on_host: mount_info.device.clone(),
|
||||
is_readonly: read_only,
|
||||
driver_option: blkdev_info.block_device_driver,
|
||||
blkdev_aio: BlockDeviceAio::new(&blkdev_info.block_device_aio),
|
||||
num_queues: blkdev_info.num_queues,
|
||||
|
||||
@@ -6,6 +6,8 @@
|
||||
|
||||
use std::{
|
||||
fs,
|
||||
fs::OpenOptions,
|
||||
os::unix::{fs::OpenOptionsExt, io::AsRawFd},
|
||||
path::{Path, PathBuf},
|
||||
};
|
||||
|
||||
@@ -26,6 +28,33 @@ use hypervisor::device::DeviceType;
|
||||
pub const DEFAULT_VOLUME_FS_TYPE: &str = "ext4";
|
||||
pub const KATA_MOUNT_BIND_TYPE: &str = "bind";
|
||||
|
||||
// BLKROGET (_IO(0x12, 94)) returns the block device's read-only flag into an
|
||||
// int. It is encoded as an `_IO` ioctl but actually transfers data, so it is a
|
||||
// "bad" ioctl; request_code_none! produces the correct, arch-aware value.
|
||||
nix::ioctl_read_bad!(blkroget, nix::request_code_none!(0x12, 94), libc::c_int);
|
||||
|
||||
/// Query the host block device's read-only flag (BLKROGET). This reflects the
|
||||
/// device's actual writability, which is the ground truth for whether the guest
|
||||
/// should see it read-only: when the host backing is read-only, writes from the
|
||||
/// guest fail at the host anyway, so the device must be exposed read-only. The
|
||||
/// read-only intent for such devices is frequently not carried in the OCI spec
|
||||
/// (no "ro" mount option), so the device flag is the only reliable source.
|
||||
pub(crate) fn is_block_device_readonly<P: AsRef<Path>>(path: P) -> Result<bool> {
|
||||
let path = path.as_ref();
|
||||
let file = OpenOptions::new()
|
||||
.read(true)
|
||||
.custom_flags(libc::O_CLOEXEC | libc::O_NONBLOCK)
|
||||
.open(path)
|
||||
.with_context(|| format!("open {} for readonly probe", path.display()))?;
|
||||
|
||||
let mut ro: libc::c_int = 0;
|
||||
// Safe: file owns a valid fd for the duration of the call and `ro` is a
|
||||
// valid, properly aligned pointer to an initialized int.
|
||||
unsafe { blkroget(file.as_raw_fd(), &mut ro).context("ioctl BLKROGET")? };
|
||||
|
||||
Ok(ro != 0)
|
||||
}
|
||||
|
||||
pub fn get_file_name<P: AsRef<Path>>(src: P) -> Result<String> {
|
||||
let file_name = src
|
||||
.as_ref()
|
||||
|
||||
37
src/runtime/pkg/device/config/config_linux.go
Normal file
37
src/runtime/pkg/device/config/config_linux.go
Normal file
@@ -0,0 +1,37 @@
|
||||
// Copyright (c) 2017-2018 Intel Corporation
|
||||
// Copyright (c) 2018 Huawei Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"golang.org/x/sys/unix"
|
||||
)
|
||||
|
||||
// BlockDeviceIsReadOnly queries the host block device at path for its
|
||||
// read-only flag (BLKROGET). This reflects the device's actual writability,
|
||||
// which is the ground truth for whether the guest should see it read-only:
|
||||
// when the host backing is read-only, writes from the guest fail at the host
|
||||
// anyway, so the device must be exposed read-only (e.g. so the guest does not
|
||||
// attempt a journal replay that cannot succeed). The read-only intent for such
|
||||
// devices is frequently not carried in the OCI spec (no "ro" mount option and
|
||||
// no read-only cgroup device rule), so the device flag is the only reliable
|
||||
// source.
|
||||
func BlockDeviceIsReadOnly(path string) (bool, error) {
|
||||
f, err := os.OpenFile(path, os.O_RDONLY|unix.O_CLOEXEC|unix.O_NONBLOCK, 0)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
ro, err := unix.IoctlGetInt(int(f.Fd()), unix.BLKROGET)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
|
||||
return ro != 0, nil
|
||||
}
|
||||
18
src/runtime/pkg/device/config/config_nonlinux.go
Normal file
18
src/runtime/pkg/device/config/config_nonlinux.go
Normal file
@@ -0,0 +1,18 @@
|
||||
// Copyright (c) 2017-2018 Intel Corporation
|
||||
// Copyright (c) 2018 Huawei Corporation
|
||||
//
|
||||
// SPDX-License-Identifier: Apache-2.0
|
||||
//
|
||||
|
||||
//go:build !linux
|
||||
|
||||
package config
|
||||
|
||||
import "fmt"
|
||||
|
||||
// BlockDeviceIsReadOnly is only meaningful on Linux, where the BLKROGET ioctl
|
||||
// is available. On other platforms it is a no-op stub so the package still
|
||||
// builds (e.g. for the macOS CI), and callers treat the error as "unknown".
|
||||
func BlockDeviceIsReadOnly(path string) (bool, error) {
|
||||
return false, fmt.Errorf("BlockDeviceIsReadOnly is not supported on this platform")
|
||||
}
|
||||
@@ -320,7 +320,62 @@ func checkAnnotationNameIsValid(list []string, name string, prefix string) bool
|
||||
return true
|
||||
}
|
||||
|
||||
func newLinuxDeviceInfo(d specs.LinuxDevice) (*config.DeviceInfo, error) {
|
||||
// deviceCgroupAccessIsReadOnly derives a device's read-only intent from the
|
||||
// cgroup device access rules. Block-mode volumes (e.g. Kubernetes
|
||||
// volumeDevices) are passed as device nodes in spec.Linux.Devices and carry no
|
||||
// mount "ro" option; their read-only intent is expressed solely through the
|
||||
// cgroup device access in spec.Linux.Resources.Devices ("rm" = read+mknod, no
|
||||
// write, for read-only; "rwm" for read-write).
|
||||
//
|
||||
// The allow rule that exactly matches the device (type and exact major/minor)
|
||||
// decides: the device is read-only when that rule grants access without the
|
||||
// write ("w") bit. Wildcard rules (nil major/minor) describe broad device
|
||||
// classes and are ignored so they cannot override a specific device's access.
|
||||
// If no exact rule matches, the device is left read-write, preserving the
|
||||
// previous behavior.
|
||||
func deviceCgroupAccessIsReadOnly(resources *specs.LinuxResources, devType string, major, minor int64) bool {
|
||||
if resources == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
for _, r := range resources.Devices {
|
||||
if !r.Allow {
|
||||
continue
|
||||
}
|
||||
if r.Major == nil || r.Minor == nil {
|
||||
continue
|
||||
}
|
||||
if *r.Major != major || *r.Minor != minor {
|
||||
continue
|
||||
}
|
||||
if r.Type != "" && r.Type != "a" && r.Type != devType {
|
||||
continue
|
||||
}
|
||||
|
||||
return !strings.Contains(r.Access, "w")
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// blockDeviceReadOnlyProbe reports whether the host block device identified by
|
||||
// major:minor advertises the read-only flag (BLKROGET). It is a package
|
||||
// variable so tests can stub the host probe. The default implementation does a
|
||||
// best-effort probe of /dev/block/<major>:<minor> (the canonical sysfs-backed
|
||||
// node that always exists for a registered block device); any failure is logged
|
||||
// and treated as not-read-only so it can never flip a positive signal back.
|
||||
var blockDeviceReadOnlyProbe = func(major, minor int64) bool {
|
||||
path := fmt.Sprintf("/dev/block/%d:%d", major, minor)
|
||||
ro, err := config.BlockDeviceIsReadOnly(path)
|
||||
if err != nil {
|
||||
ociLog.WithError(err).WithField("device", path).
|
||||
Warn("could not query block device read-only flag")
|
||||
return false
|
||||
}
|
||||
return ro
|
||||
}
|
||||
|
||||
func newLinuxDeviceInfo(d specs.LinuxDevice, resources *specs.LinuxResources) (*config.DeviceInfo, error) {
|
||||
allowedDeviceTypes := []string{"c", "b", "u", "p"}
|
||||
|
||||
if !contains(allowedDeviceTypes, d.Type) {
|
||||
@@ -331,11 +386,22 @@ func newLinuxDeviceInfo(d specs.LinuxDevice) (*config.DeviceInfo, error) {
|
||||
return nil, fmt.Errorf("Path cannot be empty for device")
|
||||
}
|
||||
|
||||
// Read-only intent comes from the cgroup device access rule. For block
|
||||
// devices, also honor the host device's own read-only flag (BLKROGET):
|
||||
// block-mode volumes frequently carry no read-only signal in the OCI spec,
|
||||
// so the device flag is the only reliable source. Either signal being
|
||||
// positive marks the device read-only.
|
||||
readOnly := deviceCgroupAccessIsReadOnly(resources, d.Type, d.Major, d.Minor)
|
||||
if !readOnly && d.Type == "b" {
|
||||
readOnly = blockDeviceReadOnlyProbe(d.Major, d.Minor)
|
||||
}
|
||||
|
||||
deviceInfo := config.DeviceInfo{
|
||||
ContainerPath: d.Path,
|
||||
DevType: d.Type,
|
||||
Major: d.Major,
|
||||
Minor: d.Minor,
|
||||
ReadOnly: readOnly,
|
||||
}
|
||||
if d.UID != nil {
|
||||
deviceInfo.UID = *d.UID
|
||||
@@ -359,9 +425,14 @@ func containerDeviceInfos(spec specs.Spec) ([]config.DeviceInfo, error) {
|
||||
return []config.DeviceInfo{}, nil
|
||||
}
|
||||
|
||||
var resources *specs.LinuxResources
|
||||
if spec.Linux != nil {
|
||||
resources = spec.Linux.Resources
|
||||
}
|
||||
|
||||
var devices []config.DeviceInfo
|
||||
for _, d := range ociLinuxDevices {
|
||||
linuxDeviceInfo, err := newLinuxDeviceInfo(d)
|
||||
linuxDeviceInfo, err := newLinuxDeviceInfo(d, resources)
|
||||
if err != nil {
|
||||
return []config.DeviceInfo{}, err
|
||||
}
|
||||
|
||||
@@ -375,6 +375,95 @@ func TestDevicePathEmpty(t *testing.T) {
|
||||
assert.NotNil(t, err, "This test should fail as path cannot be empty for device")
|
||||
}
|
||||
|
||||
func TestDeviceCgroupAccessIsReadOnly(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
i64 := func(v int64) *int64 { return &v }
|
||||
major, minor := int64(8), int64(0)
|
||||
|
||||
rule := func(allow bool, typ string, maj, min *int64, access string) specs.LinuxDeviceCgroup {
|
||||
return specs.LinuxDeviceCgroup{
|
||||
Allow: allow,
|
||||
Type: typ,
|
||||
Major: maj,
|
||||
Minor: min,
|
||||
Access: access,
|
||||
}
|
||||
}
|
||||
|
||||
resources := func(rules ...specs.LinuxDeviceCgroup) *specs.LinuxResources {
|
||||
return &specs.LinuxResources{Devices: rules}
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
resources *specs.LinuxResources
|
||||
expected bool
|
||||
}{
|
||||
{"nil resources", nil, false},
|
||||
{"no rules", resources(), false},
|
||||
{"exact match read-only (rm)", resources(rule(true, "b", i64(major), i64(minor), "rm")), true},
|
||||
{"exact match read-only (r)", resources(rule(true, "b", i64(major), i64(minor), "r")), true},
|
||||
{"exact match read-write (rwm)", resources(rule(true, "b", i64(major), i64(minor), "rwm")), false},
|
||||
{"type all (a) read-only", resources(rule(true, "a", i64(major), i64(minor), "rm")), true},
|
||||
{"empty type read-only", resources(rule(true, "", i64(major), i64(minor), "rm")), true},
|
||||
{"deny rule is ignored", resources(rule(false, "b", i64(major), i64(minor), "rm")), false},
|
||||
{"wildcard major is ignored", resources(rule(true, "b", nil, i64(minor), "rm")), false},
|
||||
{"wildcard minor is ignored", resources(rule(true, "b", i64(major), nil, "rm")), false},
|
||||
{"type mismatch is ignored", resources(rule(true, "c", i64(major), i64(minor), "rm")), false},
|
||||
{"different device is ignored", resources(rule(true, "b", i64(9), i64(1), "rm")), false},
|
||||
{
|
||||
"first exact match wins",
|
||||
resources(
|
||||
rule(true, "b", i64(major), i64(minor), "rm"),
|
||||
rule(true, "b", i64(major), i64(minor), "rwm"),
|
||||
),
|
||||
true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
assert.Equal(tt.expected, deviceCgroupAccessIsReadOnly(tt.resources, "b", major, minor), tt.name)
|
||||
}
|
||||
}
|
||||
|
||||
func TestContainerDeviceInfosReadOnly(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
|
||||
// Stub the host BLKROGET probe so the test is deterministic and does no
|
||||
// real device I/O: only the probe-only device (8:32) reports read-only.
|
||||
savedProbe := blockDeviceReadOnlyProbe
|
||||
defer func() { blockDeviceReadOnlyProbe = savedProbe }()
|
||||
blockDeviceReadOnlyProbe = func(major, minor int64) bool {
|
||||
return major == 8 && minor == 32
|
||||
}
|
||||
|
||||
i64 := func(v int64) *int64 { return &v }
|
||||
|
||||
var ociSpec specs.Spec
|
||||
ociSpec.Linux = &specs.Linux{
|
||||
Devices: []specs.LinuxDevice{
|
||||
{Path: "/dev/cgroup-ro-blk", Type: "b", Major: 8, Minor: 0},
|
||||
{Path: "/dev/rw-blk", Type: "b", Major: 8, Minor: 16},
|
||||
{Path: "/dev/blkroget-ro-blk", Type: "b", Major: 8, Minor: 32},
|
||||
},
|
||||
Resources: &specs.LinuxResources{
|
||||
Devices: []specs.LinuxDeviceCgroup{
|
||||
{Allow: true, Type: "b", Major: i64(8), Minor: i64(0), Access: "rm"},
|
||||
{Allow: true, Type: "b", Major: i64(8), Minor: i64(16), Access: "rwm"},
|
||||
{Allow: true, Type: "b", Major: i64(8), Minor: i64(32), Access: "rwm"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
devices, err := containerDeviceInfos(ociSpec)
|
||||
assert.NoError(err)
|
||||
assert.Len(devices, 3)
|
||||
assert.True(devices[0].ReadOnly, "device with cgroup access \"rm\" should be read-only")
|
||||
assert.False(devices[1].ReadOnly, "device with cgroup access \"rwm\" and writable host device should be read-write")
|
||||
assert.True(devices[2].ReadOnly, "device flagged read-only via BLKROGET should be read-only even with \"rwm\" cgroup access")
|
||||
}
|
||||
|
||||
func TestGetShmSize(t *testing.T) {
|
||||
containerConfig := vc.ContainerConfig{
|
||||
Mounts: []vc.Mount{},
|
||||
|
||||
@@ -814,6 +814,18 @@ func (c *Container) createDeviceInfo(source, destination string, readonly, isBlo
|
||||
var err error
|
||||
|
||||
if stat.Mode&unix.S_IFMT == unix.S_IFBLK {
|
||||
// Honor the host block device's own read-only flag in addition to the
|
||||
// mount-derived intent, so a device marked read-only on the host is
|
||||
// exposed read-only to the guest.
|
||||
if !readonly {
|
||||
if ro, roErr := config.BlockDeviceIsReadOnly(source); roErr != nil {
|
||||
c.Logger().WithError(roErr).WithField("mount-source", source).
|
||||
Warn("could not query block device read-only flag")
|
||||
} else {
|
||||
readonly = ro
|
||||
}
|
||||
}
|
||||
|
||||
di = &config.DeviceInfo{
|
||||
HostPath: source,
|
||||
ContainerPath: destination,
|
||||
|
||||
Reference in New Issue
Block a user