diff --git a/README.md b/README.md index 0d8c5d2b82..431d687de0 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,7 @@ The table below lists the core parts of the project: | [runtime](src/runtime) | core | Main component run by a container manager and providing a containerd shimv2 runtime implementation. | | [agent](src/agent) | core | Management process running inside the virtual machine / POD that sets up the container environment. | | [documentation](docs) | documentation | Documentation common to all components (such as design and install documentation). | +| [libraries](src/libs) | core | Library crates shared by multiple Kata Container components or published to [`crates.io`](https://crates.io/index.html) | | [tests](https://github.com/kata-containers/tests) | tests | Excludes unit tests which live with the main code. | ### Additional components diff --git a/src/libs/logging/Cargo.lock b/src/libs/Cargo.lock similarity index 76% rename from src/libs/logging/Cargo.lock rename to src/libs/Cargo.lock index 78f8feef6d..ad9d8672c9 100644 --- a/src/libs/logging/Cargo.lock +++ b/src/libs/Cargo.lock @@ -1,7 +1,5 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. -version = 3 - [[package]] name = "arc-swap" version = "1.5.0" @@ -41,9 +39,9 @@ dependencies = [ [[package]] name = "crossbeam-channel" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" +checksum = "e54ea8bc3fb1ee042f5aace6e3c6e025d3874866da222930f70ce62aceba0bfa" dependencies = [ "cfg-if", "crossbeam-utils", @@ -51,23 +49,30 @@ dependencies = [ [[package]] name = "crossbeam-utils" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" +checksum = "cfcae03edb34f947e64acdb1c33ec169824e20657e9ecb61cef6c8c74dcb8120" dependencies = [ "cfg-if", "lazy_static", ] [[package]] -name = "getrandom" -version = "0.2.3" +name = "fastrand" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" +checksum = "779d043b6a0b90cc4c0ed7ee380a6504394cee7efd7db050e3774eee387324b2" +dependencies = [ + "instant", +] + +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" dependencies = [ "cfg-if", - "libc", - "wasi", ] [[package]] @@ -84,9 +89,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" [[package]] name = "libc" -version = "0.2.112" +version = "0.2.113" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b03d17f364a3a042d5e5d46b053bbbf82c92c9430c592dd4c064dc6ee997125" +checksum = "eef78b64d87775463c549fbd80e19249ef436ea3bf1de2a1eb7e717ec7fab1e9" [[package]] name = "logging" @@ -125,52 +130,6 @@ version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" -[[package]] -name = "ppv-lite86" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba" - -[[package]] -name = "rand" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8" -dependencies = [ - "libc", - "rand_chacha", - "rand_core", - "rand_hc", -] - -[[package]] -name = "rand_chacha" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" -dependencies = [ - "ppv-lite86", - "rand_core", -] - -[[package]] -name = "rand_core" -version = "0.6.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" -dependencies = [ - "getrandom", -] - -[[package]] -name = "rand_hc" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7" -dependencies = [ - "rand_core", -] - [[package]] name = "redox_syscall" version = "0.2.10" @@ -195,17 +154,25 @@ version = "1.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f" +[[package]] +name = "safe-path" +version = "0.1.0" +dependencies = [ + "libc", + "tempfile", +] + [[package]] name = "serde" -version = "1.0.131" +version = "1.0.133" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ad69dfbd3e45369132cc64e6748c2d65cdfb001a2b1c232d128b4ad60561c1" +checksum = "97565067517b60e2d1ea8b268e59ce036de907ac523ad83a0475da04e818989a" [[package]] name = "serde_json" -version = "1.0.73" +version = "1.0.75" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcbd0344bc6533bc7ec56df11d42fb70f1b912351c0825ccb7211b59d8af7cf5" +checksum = "c059c05b48c5c0067d4b4b2b4f0732dd65feb52daf7e0ea09cd87e7dadc1af79" dependencies = [ "itoa", "ryu", @@ -261,13 +228,13 @@ checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60" [[package]] name = "tempfile" -version = "3.2.0" +version = "3.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dac1c663cfc93810f88aed9b8941d48cabf856a1b111c29a40439018d870eb22" +checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" dependencies = [ "cfg-if", + "fastrand", "libc", - "rand", "redox_syscall", "remove_dir_all", "winapi", @@ -284,19 +251,20 @@ dependencies = [ [[package]] name = "time" -version = "0.1.43" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" +checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" dependencies = [ "libc", + "wasi", "winapi", ] [[package]] name = "wasi" -version = "0.10.2+wasi-snapshot-preview1" +version = "0.10.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" [[package]] name = "winapi" diff --git a/src/libs/Cargo.toml b/src/libs/Cargo.toml new file mode 100644 index 0000000000..f4b707a14a --- /dev/null +++ b/src/libs/Cargo.toml @@ -0,0 +1,6 @@ +[workspace] +members = [ + "logging", + "safe-path", +] +resolver = "2" diff --git a/src/libs/README.md b/src/libs/README.md new file mode 100644 index 0000000000..36f2f00d73 --- /dev/null +++ b/src/libs/README.md @@ -0,0 +1,10 @@ +The `src/libs` directory hosts library crates which may be shared by multiple Kata Containers components +or published to [`crates.io`](https://crates.io/index.html). + +### Library Crates +Currently it provides following library crates: + +| Library | Description | +|-|-|-| +| [logging](logging/) | Facilities to setup logging subsystem based slog. | +| [safe-path](safe-path/) | Utilities to safely resolve filesystem paths. | diff --git a/src/libs/safe-path/Cargo.toml b/src/libs/safe-path/Cargo.toml new file mode 100644 index 0000000000..92e0ee9480 --- /dev/null +++ b/src/libs/safe-path/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "safe-path" +version = "0.1.0" +description = "A library to safely handle file system paths for container runtimes" +keywords = ["kata", "container", "path", "securejoin"] +categories = ["parser-implementations", "filesystem"] +authors = ["The Kata Containers community "] +repository = "https://github.com/kata-containers/kata-containers.git" +homepage = "https://katacontainers.io/" +readme = "README.md" +license = "Apache-2.0" +edition = "2018" + +[dependencies] +libc = "0.2.100" + +[dev-dependencies] +tempfile = "3.2.0" diff --git a/src/libs/safe-path/README.md b/src/libs/safe-path/README.md new file mode 100644 index 0000000000..3c08f72656 --- /dev/null +++ b/src/libs/safe-path/README.md @@ -0,0 +1,21 @@ +Safe Path +==================== +[![CI](https://github.com/magiclen/path-absolutize/actions/workflows/ci.yml/badge.svg)](https://github.com/magiclen/path-absolutize/actions/workflows/ci.yml) + +A library to safely handle filesystem paths, typically for container runtimes. + +There are often path related attacks, such as symlink based attacks, TOCTTOU attacks. The `safe-path` crate +provides several functions and utility structures to protect against path resolution related attacks. + +## Support + +**Operating Systems**: +- Linux + +## Reference +- [`filepath-securejoin`](https://github.com/cyphar/filepath-securejoin): secure_join() written in Go. +- [CVE-2021-30465](https://github.com/advisories/GHSA-c3xm-pvg7-gh7r): symlink related TOCTOU flaw in `runC`. + +## License + +This code is licensed under [Apache-2.0](../../../LICENSE). diff --git a/src/libs/safe-path/src/lib.rs b/src/libs/safe-path/src/lib.rs new file mode 100644 index 0000000000..6b853a3c87 --- /dev/null +++ b/src/libs/safe-path/src/lib.rs @@ -0,0 +1,65 @@ +// Copyright (c) 2022 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +//! A library to safely handle filesystem paths, typically for container runtimes. +//! +//! Linux [mount namespace](https://man7.org/linux/man-pages/man7/mount_namespaces.7.html) +//! provides isolation of the list of mounts seen by the processes in each +//! [namespace](https://man7.org/linux/man-pages/man7/namespaces.7.html) instance. +//! Thus, the processes in each of the mount namespace instances will see distinct single-directory +//! hierarchies. +//! +//! Containers are used to isolate workloads from the host system. Container on Linux systems +//! depends on the mount namespace to build an isolated root filesystem for each container, +//! thus protect the host and containers from each other. When creating containers, the container +//! runtime needs to setup filesystem mounts for container rootfs/volumes. Configuration for +//! mounts/paths may be indirectly controlled by end users through: +//! - container images +//! - Kubernetes pod specifications +//! - hook command line arguments +//! +//! These volume configuration information may be controlled by end users/malicious attackers, +//! so it must not be trusted by container runtimes. When the container runtime is preparing mount +//! namespace for a container, it must be very careful to validate user input configuration +//! information and ensure data out of the container rootfs directory won't be affected +//! by the container. There are several types of attacks related to container mount namespace: +//! - symlink based attack +//! - Time of check to time of use (TOCTTOU) +//! +//! This crate provides several mechanisms for container runtimes to safely handle filesystem paths +//! when preparing mount namespace for containers. +//! - [scoped_join()](crate::scoped_join()): safely join `unsafe_path` to `root`, and ensure +//! `unsafe_path` is scoped under `root`. +//! - [scoped_resolve()](crate::scoped_resolve()): resolve `unsafe_path` to a relative path, +//! rooted at and constrained by `root`. +//! - [struct PinnedPathBuf](crate::PinnedPathBuf): safe version of `PathBuf` to protect from +//! TOCTTOU style of attacks, which ensures: +//! - the value of [`PinnedPathBuf::as_path()`] never changes. +//! - the path returned by [`PinnedPathBuf::as_path()`] is always a symlink. +//! - the filesystem object referenced by the symlink [`PinnedPathBuf::as_path()`] never changes. +//! - the value of [`PinnedPathBuf::target()`] never changes. +//! - [struct ScopedDirBuilder](crate::ScopedDirBuilder): safe version of `DirBuilder` to protect +//! from symlink race and TOCTTOU style of attacks, which enhances security by: +//! - ensuring the new directories are created under a specified `root` directory. +//! - avoiding symlink race attacks during making directories. +//! - returning a [PinnedPathBuf] for the last level of directory, so it could be used for other +//! operations safely. +//! +//! The work is inspired by: +//! - [`filepath-securejoin`](https://github.com/cyphar/filepath-securejoin): secure_join() written +//! in Go. +//! - [CVE-2021-30465](https://github.com/advisories/GHSA-c3xm-pvg7-gh7r): symlink related TOCTOU +//! flaw in `runC`. + +#![deny(missing_docs)] + +mod pinned_path_buf; +pub use pinned_path_buf::PinnedPathBuf; + +mod scoped_dir_builder; +pub use scoped_dir_builder::ScopedDirBuilder; + +mod scoped_path_resolver; +pub use scoped_path_resolver::{scoped_join, scoped_resolve}; diff --git a/src/libs/safe-path/src/pinned_path_buf.rs b/src/libs/safe-path/src/pinned_path_buf.rs new file mode 100644 index 0000000000..e25b43769f --- /dev/null +++ b/src/libs/safe-path/src/pinned_path_buf.rs @@ -0,0 +1,400 @@ +// Copyright (c) 2022 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::ffi::{CString, OsStr}; +use std::fs::{self, File, Metadata, OpenOptions}; +use std::io::{Error, ErrorKind, Result}; +use std::ops::Deref; +use std::os::unix::ffi::OsStrExt; +use std::os::unix::fs::OpenOptionsExt; +use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; +use std::path::{Component, Path, PathBuf}; + +use crate::scoped_join; + +/// A safe version of [`PathBuf`] pinned to an underlying filesystem object to protect from +/// `TOCTTOU` style of attacks. +/// +/// A [`PinnedPathBuf`] is a resolved path buffer pinned to an underlying filesystem object, which +/// guarantees: +/// - the value of [`PinnedPathBuf::as_path()`] never changes. +/// - the path returned by [`PinnedPathBuf::as_path()`] is always a symlink. +/// - the filesystem object referenced by the symlink [`PinnedPathBuf::as_path()`] never changes. +/// - the value of [`PinnedPathBuf::target()`] never changes. +/// +/// Note: +/// - Though the filesystem object referenced by the symlink [`PinnedPathBuf::as_path()`] never +/// changes, the value of `fs::read_link(PinnedPathBuf::as_path())` may change due to filesystem +/// operations. +/// - The value of [`PinnedPathBuf::target()`] is a cached version of +/// `fs::read_link(PinnedPathBuf::as_path())` generated when creating the `PinnedPathBuf` object. +/// - It's a sign of possible attacks if `[PinnedPathBuf::target()]` doesn't match +/// `fs::read_link(PinnedPathBuf::as_path())`. +/// - Once the [`PinnedPathBuf`] object gets dropped, the [`Path`] returned by +/// [`PinnedPathBuf::as_path()`] becomes invalid. +/// +/// With normal [`PathBuf`], there's a race window for attackers between time to validate a path and +/// time to use the path. An attacker may maliciously change filesystem object referenced by the +/// path by using symlinks to compose an attack. +/// +/// The [`PinnedPathBuf`] is introduced to protect from such attacks, by using the +/// `/proc/self/fd/xxx` files on Linux. The `/proc/self/fd/xxx` file on Linux is a symlink to the +/// real target corresponding to the process's file descriptor `xxx`. And the target filesystem +/// object referenced by the symlink will be kept stable until the file descriptor has been closed. +/// Combined with `O_PATH`, a safe version of `PathBuf` could be built by: +/// - Generate a safe path from `root` and `path` by using [`crate::scoped_join()`]. +/// - Open the safe path with O_PATH | O_CLOEXEC flags, say the fd number is `fd_num`. +/// - Read the symlink target of `/proc/self/fd/fd_num`. +/// - Compare the symlink target with the safe path, it's safe if these two paths equal. +/// - Use the proc file path as a safe version of [`PathBuf`]. +/// - Close the `fd_num` when dropping the [`PinnedPathBuf`] object. +#[derive(Debug)] +pub struct PinnedPathBuf { + handle: File, + path: PathBuf, + target: PathBuf, +} + +impl PinnedPathBuf { + /// Create a [`PinnedPathBuf`] object from `root` and `path`. + /// + /// The `path` must be a subdirectory of `root`, otherwise error will be returned. + pub fn new, U: AsRef>(root: R, path: U) -> Result { + let path = scoped_join(root, path)?; + Self::from_path(path) + } + + /// Create a `PinnedPathBuf` from `path`. + /// + /// If the resolved value of `path` doesn't equal to `path`, an error will be returned. + pub fn from_path>(orig_path: P) -> Result { + let orig_path = orig_path.as_ref(); + let handle = Self::open_by_path(orig_path)?; + Self::new_from_file(handle, orig_path) + } + + /// Try to clone the [`PinnedPathBuf`] object. + pub fn try_clone(&self) -> Result { + let fd = unsafe { libc::dup(self.path_fd()) }; + if fd < 0 { + Err(Error::last_os_error()) + } else { + Ok(Self { + handle: unsafe { File::from_raw_fd(fd) }, + path: Self::get_proc_path(fd), + target: self.target.clone(), + }) + } + } + + /// Return the underlying file descriptor representing the pinned path. + /// + /// Following operations are supported by the returned `RawFd`: + /// - fchdir + /// - fstat/fstatfs + /// - openat/linkat/fchownat/fstatat/readlinkat/mkdirat/*at + /// - fcntl(F_GETFD, F_SETFD, F_GETFL) + pub fn path_fd(&self) -> RawFd { + self.handle.as_raw_fd() + } + + /// Get the symlink path referring the target filesystem object. + pub fn as_path(&self) -> &Path { + self.path.as_path() + } + + /// Get the cached real path of the target filesystem object. + /// + /// The target path is cached version of `fs::read_link(PinnedPathBuf::as_path())` generated + /// when creating the `PinnedPathBuf` object. On the other hand, the value of + /// `fs::read_link(PinnedPathBuf::as_path())` may change due to underlying filesystem operations. + /// So it's a sign of possible attacks if `PinnedPathBuf::target()` does not match + /// `fs::read_link(PinnedPathBuf::as_path())`. + pub fn target(&self) -> &Path { + &self.target + } + + /// Get [`Metadata`] about the path handle. + pub fn metadata(&self) -> Result { + self.handle.metadata() + } + + /// Open a direct child of the filesystem objected referenced by the `PinnedPathBuf` object. + pub fn open_child(&self, path_comp: &OsStr) -> Result { + let name = Self::prepare_path_component(path_comp)?; + let oflags = libc::O_PATH | libc::O_CLOEXEC; + let res = unsafe { libc::openat(self.path_fd(), name.as_ptr(), oflags, 0) }; + if res < 0 { + Err(Error::last_os_error()) + } else { + let handle = unsafe { File::from_raw_fd(res) }; + Self::new_from_file(handle, self.target.join(path_comp)) + } + } + + /// Create or open a child directory if current object is a directory. + pub fn mkdir(&self, path_comp: &OsStr, mode: libc::mode_t) -> Result { + let path_name = Self::prepare_path_component(path_comp)?; + let res = unsafe { libc::mkdirat(self.handle.as_raw_fd(), path_name.as_ptr(), mode) }; + if res < 0 { + Err(Error::last_os_error()) + } else { + self.open_child(path_comp) + } + } + + /// Open a directory/file by path. + /// + /// Obtain a file descriptor that can be used for two purposes: + /// - indicate a location in the filesystem tree + /// - perform operations that act purely at the file descriptor level + fn open_by_path>(path: P) -> Result { + // When O_PATH is specified in flags, flag bits other than O_CLOEXEC, O_DIRECTORY, and + // O_NOFOLLOW are ignored. + let o_flags = libc::O_PATH | libc::O_CLOEXEC; + OpenOptions::new() + .read(true) + .custom_flags(o_flags) + .open(path.as_ref()) + } + + fn get_proc_path(file: F) -> PathBuf { + PathBuf::from(format!("/proc/self/fd/{}", file.as_raw_fd())) + } + + fn new_from_file>(handle: File, orig_path: P) -> Result { + let path = Self::get_proc_path(handle.as_raw_fd()); + let link_path = fs::read_link(path.as_path())?; + if link_path != orig_path.as_ref() { + Err(Error::new( + ErrorKind::Other, + format!( + "Path changed from {} to {} on open, possible attack", + orig_path.as_ref().display(), + link_path.display() + ), + )) + } else { + Ok(PinnedPathBuf { + handle, + path, + target: link_path, + }) + } + } + + #[inline] + fn prepare_path_component(path_comp: &OsStr) -> Result { + let path = Path::new(path_comp); + let mut comps = path.components(); + let name = comps.next(); + if !matches!(name, Some(Component::Normal(_))) || comps.next().is_some() { + return Err(Error::new( + ErrorKind::Other, + format!("Path component {} is invalid", path_comp.to_string_lossy()), + )); + } + let name = name.unwrap(); + if name.as_os_str() != path_comp { + return Err(Error::new( + ErrorKind::Other, + format!("Path component {} is invalid", path_comp.to_string_lossy()), + )); + } + + CString::new(path_comp.as_bytes()).map_err(|_e| { + Error::new( + ErrorKind::Other, + format!("Path component {} is invalid", path_comp.to_string_lossy()), + ) + }) + } +} + +impl Deref for PinnedPathBuf { + type Target = PathBuf; + + fn deref(&self) -> &Self::Target { + &self.path + } +} + +impl AsRef for PinnedPathBuf { + fn as_ref(&self) -> &Path { + self.path.as_path() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs::DirBuilder; + use std::io::Write; + use std::os::unix::fs::{symlink, MetadataExt}; + use std::sync::{Arc, Barrier}; + use std::thread; + + #[test] + fn test_pinned_path_buf() { + // Create a root directory, which itself contains symlinks. + let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir"); + DirBuilder::new() + .create(rootfs_dir.path().join("b")) + .unwrap(); + symlink(rootfs_dir.path().join("b"), rootfs_dir.path().join("a")).unwrap(); + let rootfs_path = &rootfs_dir.path().join("a"); + + // Create a file and a symlink to it. + fs::create_dir(rootfs_path.join("symlink_dir")).unwrap(); + symlink("/endpoint", rootfs_path.join("symlink_dir/endpoint")).unwrap(); + fs::write(rootfs_path.join("endpoint"), "test").unwrap(); + + // Pin the target and validate the path/content. + let path = PinnedPathBuf::new(rootfs_path.to_path_buf(), "symlink_dir/endpoint").unwrap(); + assert!(!path.is_dir()); + let path_ref = path.deref(); + let target = fs::read_link(path_ref).unwrap(); + assert_eq!(target, rootfs_path.join("endpoint").canonicalize().unwrap()); + let content = fs::read_to_string(&path).unwrap(); + assert_eq!(&content, "test"); + + // Remove the target file and validate that we could still read data from the pinned path. + fs::remove_file(&target).unwrap(); + fs::read_to_string(&target).unwrap_err(); + let content = fs::read_to_string(&path).unwrap(); + assert_eq!(&content, "test"); + } + + #[test] + fn test_pinned_path_buf_race() { + let root_dir = tempfile::tempdir().expect("failed to create tmpdir"); + let root_path = root_dir.path(); + let barrier = Arc::new(Barrier::new(2)); + + fs::write(root_path.join("a"), b"a").unwrap(); + fs::write(root_path.join("b"), b"b").unwrap(); + fs::write(root_path.join("c"), b"c").unwrap(); + symlink("a", root_path.join("s")).unwrap(); + + let root_path2 = root_path.to_path_buf(); + let barrier2 = barrier.clone(); + let thread = thread::spawn(move || { + // step 1 + barrier2.wait(); + fs::remove_file(root_path2.join("a")).unwrap(); + symlink("b", root_path2.join("a")).unwrap(); + barrier2.wait(); + + // step 2 + barrier2.wait(); + fs::remove_file(root_path2.join("b")).unwrap(); + symlink("c", root_path2.join("b")).unwrap(); + barrier2.wait(); + }); + + let path = scoped_join(&root_path, "s").unwrap(); + let data = fs::read_to_string(&path).unwrap(); + assert_eq!(&data, "a"); + assert!(path.is_file()); + barrier.wait(); + barrier.wait(); + // Verify the target has been redirected. + let data = fs::read_to_string(&path).unwrap(); + assert_eq!(&data, "b"); + PinnedPathBuf::from_path(&path).unwrap_err(); + + let pinned_path = PinnedPathBuf::new(&root_path, "s").unwrap(); + let data = fs::read_to_string(&pinned_path).unwrap(); + assert_eq!(&data, "b"); + + // step2 + barrier.wait(); + barrier.wait(); + // Verify it still points to the old target. + let data = fs::read_to_string(&pinned_path).unwrap(); + assert_eq!(&data, "b"); + + thread.join().unwrap(); + } + + #[test] + fn test_new_pinned_path_buf() { + let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir"); + let rootfs_path = rootfs_dir.path(); + let path = PinnedPathBuf::from_path(rootfs_path).unwrap(); + let _ = OpenOptions::new().read(true).open(&path).unwrap(); + } + + #[test] + fn test_pinned_path_try_clone() { + let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir"); + let rootfs_path = rootfs_dir.path(); + let path = PinnedPathBuf::from_path(rootfs_path).unwrap(); + let path2 = path.try_clone().unwrap(); + assert_ne!(path.as_path(), path2.as_path()); + } + + #[test] + fn test_new_pinned_path_buf_from_nonexist_file() { + let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir"); + let rootfs_path = rootfs_dir.path(); + PinnedPathBuf::new(rootfs_path, "does_not_exist").unwrap_err(); + } + + #[test] + fn test_new_pinned_path_buf_without_read_perm() { + let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir"); + let rootfs_path = rootfs_dir.path(); + let path = rootfs_path.join("write_only_file"); + + let mut file = OpenOptions::new() + .read(false) + .write(true) + .create(true) + .mode(0o200) + .open(&path) + .unwrap(); + file.write_all(&[0xa5u8]).unwrap(); + let md = fs::metadata(&path).unwrap(); + let umask = unsafe { libc::umask(0022) }; + unsafe { libc::umask(umask) }; + assert_eq!(md.mode() & 0o700, 0o200 & !umask); + PinnedPathBuf::from_path(&path).unwrap(); + } + + #[test] + fn test_pinned_path_buf_path_fd() { + let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir"); + let rootfs_path = rootfs_dir.path(); + let path = rootfs_path.join("write_only_file"); + + let mut file = OpenOptions::new() + .read(false) + .write(true) + .create(true) + .mode(0o200) + .open(&path) + .unwrap(); + file.write_all(&[0xa5u8]).unwrap(); + let handle = PinnedPathBuf::from_path(&path).unwrap(); + // Check that `fstat()` etc works with the fd returned by `path_fd()`. + let fd = handle.path_fd(); + let mut stat: libc::stat = unsafe { std::mem::zeroed() }; + let res = unsafe { libc::fstat(fd, &mut stat as *mut _) }; + assert_eq!(res, 0); + } + + #[test] + fn test_pinned_path_buf_open_child() { + let rootfs_dir = tempfile::tempdir().expect("failed to create tmpdir"); + let rootfs_path = rootfs_dir.path(); + let path = PinnedPathBuf::from_path(rootfs_path).unwrap(); + + fs::write(path.join("child"), "test").unwrap(); + let path = path.open_child(OsStr::new("child")).unwrap(); + let content = fs::read_to_string(&path).unwrap(); + assert_eq!(&content, "test"); + } +} diff --git a/src/libs/safe-path/src/scoped_dir_builder.rs b/src/libs/safe-path/src/scoped_dir_builder.rs new file mode 100644 index 0000000000..656da1c0f7 --- /dev/null +++ b/src/libs/safe-path/src/scoped_dir_builder.rs @@ -0,0 +1,261 @@ +// Copyright (c) 2022 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::io::{Error, ErrorKind, Result}; +use std::path::Path; + +use crate::{scoped_join, scoped_resolve, PinnedPathBuf}; + +const DIRECTORY_MODE_DEFAULT: u32 = 0o777; +const DIRECTORY_MODE_MASK: u32 = 0o777; + +/// Safe version of `DirBuilder` to protect from TOCTOU style of attacks. +/// +/// The `ScopedDirBuilder` is a counterpart for `DirBuilder`, with safety enhancements of: +/// - ensuring the new directories are created under a specified `root` directory. +/// - ensuring all created directories are still scoped under `root` even under symlink based +/// attacks. +/// - returning a [PinnedPathBuf] for the last level of directory, so it could be used for other +/// operations safely. +#[derive(Debug)] +pub struct ScopedDirBuilder { + root: PinnedPathBuf, + mode: u32, + recursive: bool, +} + +impl ScopedDirBuilder { + /// Create a new instance of `ScopedDirBuilder` with with default mode/security settings. + pub fn new>(root: P) -> Result { + let root = root.as_ref().canonicalize()?; + let root = PinnedPathBuf::from_path(root)?; + if !root.metadata()?.is_dir() { + return Err(Error::new( + ErrorKind::Other, + format!("Invalid root path: {}", root.display()), + )); + } + + Ok(ScopedDirBuilder { + root, + mode: DIRECTORY_MODE_DEFAULT, + recursive: false, + }) + } + + /// Indicates that directories should be created recursively, creating all parent directories. + /// + /// Parents that do not exist are created with the same security and permissions settings. + pub fn recursive(&mut self, recursive: bool) -> &mut Self { + self.recursive = recursive; + self + } + + /// Sets the mode to create new directories with. This option defaults to 0o755. + pub fn mode(&mut self, mode: u32) -> &mut Self { + self.mode = mode & DIRECTORY_MODE_MASK; + self + } + + /// Creates the specified directory with the options configured in this builder. + /// + /// This is a helper to create subdirectory with an absolute path, without stripping off + /// `self.root`. So error will be returned if path does start with `self.root`. + /// It is considered an error if the directory already exists unless recursive mode is enabled. + pub fn create_with_unscoped_path>(&self, path: P) -> Result { + if !path.as_ref().is_absolute() { + return Err(Error::new( + ErrorKind::Other, + format!( + "Expected absolute directory path: {}", + path.as_ref().display() + ), + )); + } + // Partially canonicalize `path` so we can strip the `root` part. + let scoped_path = scoped_join("/", path)?; + let stripped_path = scoped_path.strip_prefix(self.root.target()).map_err(|_| { + Error::new( + ErrorKind::Other, + format!( + "Path {} is not under {}", + scoped_path.display(), + self.root.target().display() + ), + ) + })?; + + self.do_mkdir(&stripped_path) + } + + /// Creates sub-directory with the options configured in this builder. + /// + /// It is considered an error if the directory already exists unless recursive mode is enabled. + pub fn create>(&self, path: P) -> Result { + let path = scoped_resolve(&self.root, path)?; + self.do_mkdir(&path) + } + + fn do_mkdir(&self, path: &Path) -> Result { + assert!(path.is_relative()); + if path.file_name().is_none() { + if !self.recursive { + return Err(Error::new( + ErrorKind::AlreadyExists, + "directory already exists", + )); + } else { + return self.root.try_clone(); + } + } + + // Safe because `path` have at least one level. + let levels = path.iter().count() - 1; + let mut dir = self.root.try_clone()?; + for (idx, comp) in path.iter().enumerate() { + match dir.open_child(comp) { + Ok(v) => { + if !v.metadata()?.is_dir() { + return Err(Error::new( + ErrorKind::Other, + format!("Path {} is not a directory", v.display()), + )); + } else if !self.recursive && idx == levels { + return Err(Error::new( + ErrorKind::AlreadyExists, + "directory already exists", + )); + } + dir = v; + } + Err(_e) => { + if !self.recursive && idx != levels { + return Err(Error::new( + ErrorKind::NotFound, + format!("parent directory does not exist"), + )); + } + dir = dir.mkdir(comp, self.mode)?; + } + } + } + + Ok(dir) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + use std::fs::DirBuilder; + use std::os::unix::fs::{symlink, MetadataExt}; + use tempfile::tempdir; + + #[test] + fn test_scoped_dir_builder() { + // create temporary directory to emulate container rootfs with symlink + let rootfs_dir = tempdir().expect("failed to create tmpdir"); + DirBuilder::new() + .create(rootfs_dir.path().join("b")) + .unwrap(); + symlink(rootfs_dir.path().join("b"), rootfs_dir.path().join("a")).unwrap(); + let rootfs_path = &rootfs_dir.path().join("a"); + + // root directory doesn't exist + ScopedDirBuilder::new(rootfs_path.join("__does_not_exist__")).unwrap_err(); + ScopedDirBuilder::new("__does_not_exist__").unwrap_err(); + + // root is a file + fs::write(rootfs_path.join("txt"), "test").unwrap(); + ScopedDirBuilder::new(rootfs_path.join("txt")).unwrap_err(); + + let mut builder = ScopedDirBuilder::new(&rootfs_path).unwrap(); + + // file with the same name already exists. + builder + .create_with_unscoped_path(rootfs_path.join("txt")) + .unwrap_err(); + // parent is a file + builder.create("/txt/a").unwrap_err(); + // Not starting with root + builder.create_with_unscoped_path("/txt/a").unwrap_err(); + // creating "." without recursive mode should fail + builder + .create_with_unscoped_path(rootfs_path.join(".")) + .unwrap_err(); + // parent doesn't exist + builder + .create_with_unscoped_path(rootfs_path.join("a/b")) + .unwrap_err(); + builder.create("a/b/c").unwrap_err(); + + let path = builder.create("a").unwrap(); + assert!(rootfs_path.join("a").is_dir()); + assert_eq!(path.target(), rootfs_path.join("a").canonicalize().unwrap()); + + // Creating an existing directory without recursive mode should fail. + builder + .create_with_unscoped_path(rootfs_path.join("a")) + .unwrap_err(); + + // Creating an existing directory with recursive mode should succeed. + builder.recursive(true); + let path = builder + .create_with_unscoped_path(rootfs_path.join("a")) + .unwrap(); + assert_eq!(path.target(), rootfs_path.join("a").canonicalize().unwrap()); + let path = builder.create(".").unwrap(); + assert_eq!(path.target(), rootfs_path.canonicalize().unwrap()); + + let umask = unsafe { libc::umask(0022) }; + unsafe { libc::umask(umask) }; + + builder.mode(0o740); + let path = builder.create("a/b/c/d").unwrap(); + assert_eq!( + path.target(), + rootfs_path.join("a/b/c/d").canonicalize().unwrap() + ); + assert!(rootfs_path.join("a/b/c/d").is_dir()); + assert_eq!( + rootfs_path.join("a").metadata().unwrap().mode() & 0o777, + DIRECTORY_MODE_DEFAULT & !umask, + ); + assert_eq!( + rootfs_path.join("a/b").metadata().unwrap().mode() & 0o777, + 0o740 & !umask + ); + assert_eq!( + rootfs_path.join("a/b/c").metadata().unwrap().mode() & 0o777, + 0o740 & !umask + ); + assert_eq!( + rootfs_path.join("a/b/c/d").metadata().unwrap().mode() & 0o777, + 0o740 & !umask + ); + + // Creating should fail if some components are not directory. + builder.create("txt/e/f").unwrap_err(); + fs::write(rootfs_path.join("a/b/txt"), "test").unwrap(); + builder.create("a/b/txt/h/i").unwrap_err(); + } + + #[test] + fn test_create_root() { + let mut builder = ScopedDirBuilder::new("/").unwrap(); + builder.recursive(true); + builder.create("/").unwrap(); + builder.create(".").unwrap(); + builder.create("..").unwrap(); + builder.create("../../.").unwrap(); + builder.create("").unwrap(); + builder.create_with_unscoped_path("/").unwrap(); + builder.create_with_unscoped_path(".").unwrap(); + builder.create_with_unscoped_path("..").unwrap(); + builder.create_with_unscoped_path("../../.").unwrap(); + builder.create_with_unscoped_path("").unwrap(); + } +} diff --git a/src/libs/safe-path/src/scoped_path_resolver.rs b/src/libs/safe-path/src/scoped_path_resolver.rs new file mode 100644 index 0000000000..8d61111d79 --- /dev/null +++ b/src/libs/safe-path/src/scoped_path_resolver.rs @@ -0,0 +1,324 @@ +// Copyright (c) 2022 Alibaba Cloud +// +// SPDX-License-Identifier: Apache-2.0 +// + +use std::io::{Error, ErrorKind, Result}; +use std::path::{Component, Path, PathBuf}; + +// Follow the same configuration as +// [secure_join](https://github.com/cyphar/filepath-securejoin/blob/master/join.go#L51) +const MAX_SYMLINK_DEPTH: u32 = 255; + +fn do_scoped_resolve, U: AsRef>( + root: R, + unsafe_path: U, +) -> Result<(PathBuf, PathBuf)> { + let root = root.as_ref().canonicalize()?; + + let mut nlinks = 0u32; + let mut curr_path = unsafe_path.as_ref().to_path_buf(); + 'restart: loop { + let mut subpath = PathBuf::new(); + let mut iter = curr_path.components(); + + 'next_comp: while let Some(comp) = iter.next() { + match comp { + // Linux paths don't have prefixes. + Component::Prefix(_) => { + return Err(Error::new( + ErrorKind::Other, + format!("Invalid path prefix in: {}", unsafe_path.as_ref().display()), + )); + } + // `RootDir` should always be the first component, and Path::components() ensures + // that. + Component::RootDir | Component::CurDir => { + continue 'next_comp; + } + Component::ParentDir => { + subpath.pop(); + } + Component::Normal(n) => { + let path = root.join(&subpath).join(n); + if let Ok(v) = path.read_link() { + nlinks += 1; + if nlinks > MAX_SYMLINK_DEPTH { + return Err(Error::new( + ErrorKind::Other, + format!( + "Too many levels of symlinks: {}", + unsafe_path.as_ref().display() + ), + )); + } + curr_path = if v.is_absolute() { + v.join(iter.as_path()) + } else { + subpath.join(v).join(iter.as_path()) + }; + continue 'restart; + } else { + subpath.push(n); + } + } + } + } + + return Ok((root, subpath)); + } +} + +/// Resolve `unsafe_path` to a relative path, rooted at and constrained by `root`. +/// +/// The `scoped_resolve()` function assumes `root` exists and is an absolute path. It processes +/// each path component in `unsafe_path` as below: +/// - assume it's not a symlink and output if the component doesn't exist yet. +/// - ignore if it's "/" or ".". +/// - go to parent directory but constrained by `root` if it's "..". +/// - recursively resolve to the real path if it's a symlink. All symlink resolutions will be +/// constrained by `root`. +/// - otherwise output the path component. +/// +/// # Arguments +/// - `root`: the absolute path to constrain the symlink resolution. +/// - `unsafe_path`: the path to resolve. +/// +/// Note that the guarantees provided by this function only apply if the path components in the +/// returned PathBuf are not modified (in other words are not replaced with symlinks on the +/// filesystem) after this function has returned. You may use [crate::PinnedPathBuf] to protect +/// from such TOCTOU attacks. +pub fn scoped_resolve, U: AsRef>(root: R, unsafe_path: U) -> Result { + do_scoped_resolve(root, unsafe_path).map(|(_root, path)| path) +} + +/// Safely join `unsafe_path` to `root`, and ensure `unsafe_path` is scoped under `root`. +/// +/// The `scoped_join()` function assumes `root` exists and is an absolute path. It safely joins the +/// two given paths and ensures: +/// - The returned path is guaranteed to be scoped inside `root`. +/// - Any symbolic links in the path are evaluated with the given `root` treated as the root of the +/// filesystem, similar to a chroot. +/// +/// It's modelled after [secure_join](https://github.com/cyphar/filepath-securejoin), but only +/// for Linux systems. +/// +/// # Arguments +/// - `root`: the absolute path to scope the symlink evaluation. +/// - `unsafe_path`: the path to evaluated and joint with `root`. It is unsafe since it may try to +/// escape from the `root` by using "../" or symlinks. +/// +/// # Security +/// On success return, the `scoped_join()` function guarantees that: +/// - The resulting PathBuf must be a child path of `root` and will not contain any symlink path +/// components (they will all get expanded). +/// - When expanding symlinks, all symlink path components must be resolved relative to the provided +/// `root`. In particular, this can be considered a userspace implementation of how chroot(2) +/// operates on file paths. +/// - Non-existent path components are unaffected. +/// +/// Note that the guarantees provided by this function only apply if the path components in the +/// returned string are not modified (in other words are not replaced with symlinks on the +/// filesystem) after this function has returned. You may use [crate::PinnedPathBuf] to protect +/// from such TOCTTOU attacks. +pub fn scoped_join, U: AsRef>(root: R, unsafe_path: U) -> Result { + do_scoped_resolve(root, unsafe_path).map(|(root, path)| root.join(path)) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::fs::DirBuilder; + use std::os::unix::fs; + use tempfile::tempdir; + + #[allow(dead_code)] + #[derive(Debug)] + struct TestData<'a> { + name: &'a str, + rootfs: &'a Path, + unsafe_path: &'a str, + result: &'a str, + } + + fn exec_tests(tests: &[TestData]) { + for (i, t) in tests.iter().enumerate() { + // Create a string containing details of the test + let msg = format!("test[{}]: {:?}", i, t); + let result = scoped_resolve(t.rootfs, t.unsafe_path).unwrap(); + let msg = format!("{}, result: {:?}", msg, result); + + // Perform the checks + assert_eq!(&result, Path::new(t.result), "{}", msg); + } + } + + #[test] + fn test_scoped_resolve() { + // create temporary directory to emulate container rootfs with symlink + let rootfs_dir = tempdir().expect("failed to create tmpdir"); + DirBuilder::new() + .create(rootfs_dir.path().join("b")) + .unwrap(); + fs::symlink(rootfs_dir.path().join("b"), rootfs_dir.path().join("a")).unwrap(); + let rootfs_path = &rootfs_dir.path().join("a"); + + let tests = [ + TestData { + name: "normal path", + rootfs: rootfs_path, + unsafe_path: "a/b/c", + result: "a/b/c", + }, + TestData { + name: "path with .. at beginning", + rootfs: rootfs_path, + unsafe_path: "../../../a/b/c", + result: "a/b/c", + }, + TestData { + name: "path with complex .. pattern", + rootfs: rootfs_path, + unsafe_path: "../../../a/../../b/../../c", + result: "c", + }, + TestData { + name: "path with .. in middle", + rootfs: rootfs_path, + unsafe_path: "/usr/bin/../../bin/ls", + result: "bin/ls", + }, + TestData { + name: "path with . and ..", + rootfs: rootfs_path, + unsafe_path: "/usr/./bin/../../bin/./ls", + result: "bin/ls", + }, + TestData { + name: "path with . at end", + rootfs: rootfs_path, + unsafe_path: "/usr/./bin/../../bin/./ls/.", + result: "bin/ls", + }, + TestData { + name: "path try to escape by ..", + rootfs: rootfs_path, + unsafe_path: "/usr/./bin/../../../../bin/./ls/../ls", + result: "bin/ls", + }, + TestData { + name: "path with .. at the end", + rootfs: rootfs_path, + unsafe_path: "/usr/./bin/../../bin/./ls/..", + result: "bin", + }, + TestData { + name: "path ..", + rootfs: rootfs_path, + unsafe_path: "..", + result: "", + }, + TestData { + name: "path .", + rootfs: rootfs_path, + unsafe_path: ".", + result: "", + }, + TestData { + name: "path /", + rootfs: rootfs_path, + unsafe_path: "/", + result: "", + }, + TestData { + name: "empty path", + rootfs: rootfs_path, + unsafe_path: "", + result: "", + }, + ]; + + exec_tests(&tests); + } + + #[test] + fn test_scoped_resolve_invalid() { + scoped_resolve("./root_is_not_absolute_path", ".").unwrap_err(); + scoped_resolve("C:", ".").unwrap_err(); + scoped_resolve(r#"\\server\test"#, ".").unwrap_err(); + scoped_resolve(r#"http://localhost/test"#, ".").unwrap_err(); + // Chinese Unicode characters + scoped_resolve(r#"您好"#, ".").unwrap_err(); + } + + #[test] + fn test_scoped_resolve_symlink() { + // create temporary directory to emulate container rootfs with symlink + let rootfs_dir = tempdir().expect("failed to create tmpdir"); + let rootfs_path = &rootfs_dir.path(); + std::fs::create_dir(rootfs_path.join("symlink_dir")).unwrap(); + + fs::symlink("../../../", rootfs_path.join("1")).unwrap(); + let tests = [TestData { + name: "relative symlink beyond root", + rootfs: rootfs_path, + unsafe_path: "1", + result: "", + }]; + exec_tests(&tests); + + fs::symlink("/dddd", rootfs_path.join("2")).unwrap(); + let tests = [TestData { + name: "abs symlink pointing to non-exist directory", + rootfs: rootfs_path, + unsafe_path: "2", + result: "dddd", + }]; + exec_tests(&tests); + + fs::symlink("/", rootfs_path.join("3")).unwrap(); + let tests = [TestData { + name: "abs symlink pointing to /", + rootfs: rootfs_path, + unsafe_path: "3", + result: "", + }]; + exec_tests(&tests); + + fs::symlink("usr/bin/../bin/ls", rootfs_path.join("4")).unwrap(); + let tests = [TestData { + name: "symlink with one ..", + rootfs: rootfs_path, + unsafe_path: "4", + result: "usr/bin/ls", + }]; + exec_tests(&tests); + + fs::symlink("usr/bin/../../bin/ls", rootfs_path.join("5")).unwrap(); + let tests = [TestData { + name: "symlink with two ..", + rootfs: rootfs_path, + unsafe_path: "5", + result: "bin/ls", + }]; + exec_tests(&tests); + + fs::symlink( + "../usr/bin/../../../bin/ls", + rootfs_path.join("symlink_dir/6"), + ) + .unwrap(); + let tests = [TestData { + name: "symlink try to escape", + rootfs: rootfs_path, + unsafe_path: "symlink_dir/6", + result: "bin/ls", + }]; + exec_tests(&tests); + + // Detect symlink loop. + fs::symlink("/endpoint_b", rootfs_path.join("endpoint_a")).unwrap(); + fs::symlink("/endpoint_a", rootfs_path.join("endpoint_b")).unwrap(); + scoped_join(rootfs_path, "endpoint_a").unwrap_err(); + } +}