mirror of
https://github.com/linuxkit/linuxkit.git
synced 2025-07-19 17:26:28 +00:00
projects: add shiftfs project
Signed-off-by: Tycho Andersen <tycho@docker.com>
This commit is contained in:
parent
fd00f19c59
commit
d29b2a909c
@ -23,6 +23,8 @@ If you want to create a project, please submit a pull request to create a new di
|
|||||||
- [kernel-config](kernel-config/) an experiment on how to manage kernel config
|
- [kernel-config](kernel-config/) an experiment on how to manage kernel config
|
||||||
- [IMA-namespace](ima-namespace/) patches for supporting per-mount-namespace
|
- [IMA-namespace](ima-namespace/) patches for supporting per-mount-namespace
|
||||||
IMA policies
|
IMA policies
|
||||||
|
- [shiftfs](shiftfs/) is a filesystem for mapping mountpoints across user
|
||||||
|
namespaces
|
||||||
|
|
||||||
## Current projects not yet documented
|
## Current projects not yet documented
|
||||||
- VMWare support (VMWare)
|
- VMWare support (VMWare)
|
||||||
|
73
projects/shiftfs/Dockerfile
Normal file
73
projects/shiftfs/Dockerfile
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
FROM linuxkit/kernel-compile:1b396c221af673757703258159ddc8539843b02b@sha256:6b32d205bfc6407568324337b707d195d027328dbfec554428ea93e7b0a8299b AS kernel-build
|
||||||
|
|
||||||
|
ARG KERNEL_VERSION
|
||||||
|
ARG KERNEL_SERIES
|
||||||
|
ARG DEBUG
|
||||||
|
|
||||||
|
ENV KERNEL_SOURCE=https://www.kernel.org/pub/linux/kernel/v4.x/linux-${KERNEL_VERSION}.tar.xz
|
||||||
|
|
||||||
|
RUN curl -fsSL -o linux-${KERNEL_VERSION}.tar.xz ${KERNEL_SOURCE}
|
||||||
|
|
||||||
|
RUN cat linux-${KERNEL_VERSION}.tar.xz | tar --absolute-names -xJ && mv /linux-${KERNEL_VERSION} /linux
|
||||||
|
|
||||||
|
COPY kernel_config-${KERNEL_SERIES} /linux/arch/x86/configs/x86_64_defconfig
|
||||||
|
COPY kernel_config.debug /linux/debug_config
|
||||||
|
|
||||||
|
RUN if [ -n "${DEBUG}" ]; then \
|
||||||
|
sed -i 's/CONFIG_PANIC_ON_OOPS=y/# CONFIG_PANIC_ON_OOPS is not set/' /linux/arch/x86/configs/x86_64_defconfig; \
|
||||||
|
cat /linux/debug_config >> /linux/arch/x86/configs/x86_64_defconfig; \
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Apply local patches
|
||||||
|
COPY patches-${KERNEL_SERIES} /patches
|
||||||
|
WORKDIR /linux
|
||||||
|
RUN set -e && for patch in /patches/*.patch; do \
|
||||||
|
echo "Applying $patch"; \
|
||||||
|
patch -p1 < "$patch"; \
|
||||||
|
done
|
||||||
|
|
||||||
|
RUN mkdir /out
|
||||||
|
|
||||||
|
# Kernel
|
||||||
|
RUN make defconfig && \
|
||||||
|
make oldconfig && \
|
||||||
|
make -j "$(getconf _NPROCESSORS_ONLN)" KCFLAGS="-fno-pie" && \
|
||||||
|
cp arch/x86_64/boot/bzImage /out/kernel && \
|
||||||
|
cp System.map /out && \
|
||||||
|
([ -n "${DEBUG}" ] && cp vmlinux /out || true)
|
||||||
|
|
||||||
|
# Modules
|
||||||
|
RUN make INSTALL_MOD_PATH=/tmp/kernel-modules modules_install && \
|
||||||
|
( DVER=$(basename $(find /tmp/kernel-modules/lib/modules/ -mindepth 1 -maxdepth 1)) && \
|
||||||
|
cd /tmp/kernel-modules/lib/modules/$DVER && \
|
||||||
|
rm build source && \
|
||||||
|
ln -s /usr/src/linux-headers-$DVER build ) && \
|
||||||
|
( cd /tmp/kernel-modules && tar cf /out/kernel.tar lib )
|
||||||
|
|
||||||
|
# Headers (userspace API)
|
||||||
|
RUN mkdir -p /tmp/kernel-headers/usr && \
|
||||||
|
make INSTALL_HDR_PATH=/tmp/kernel-headers/usr headers_install && \
|
||||||
|
( cd /tmp/kernel-headers && tar cf /out/kernel-headers.tar usr )
|
||||||
|
|
||||||
|
# Headers (kernel development)
|
||||||
|
RUN DVER=$(basename $(find /tmp/kernel-modules/lib/modules/ -mindepth 1 -maxdepth 1)) && \
|
||||||
|
dir=/tmp/usr/src/linux-headers-$DVER && \
|
||||||
|
mkdir -p $dir && \
|
||||||
|
cp /linux/.config $dir && \
|
||||||
|
cp /linux/Module.symvers $dir && \
|
||||||
|
find . -path './include/*' -prune -o \
|
||||||
|
-path './arch/*/include' -prune -o \
|
||||||
|
-path './scripts/*' -prune -o \
|
||||||
|
-type f \( -name 'Makefile*' -o -name 'Kconfig*' -o -name 'Kbuild*' -o \
|
||||||
|
-name '*.lds' -o -name '*.pl' -o -name '*.sh' \) | \
|
||||||
|
tar cf - -T - | (cd $dir; tar xf -) && \
|
||||||
|
( cd /tmp && tar cf /out/kernel-dev.tar usr/src )
|
||||||
|
|
||||||
|
RUN printf "KERNEL_SOURCE=${KERNEL_SOURCE}\n" > /out/kernel-source-info
|
||||||
|
|
||||||
|
|
||||||
|
FROM scratch
|
||||||
|
ENTRYPOINT []
|
||||||
|
CMD []
|
||||||
|
WORKDIR /
|
||||||
|
COPY --from=kernel-build /out/* /
|
66
projects/shiftfs/Makefile
Normal file
66
projects/shiftfs/Makefile
Normal file
@ -0,0 +1,66 @@
|
|||||||
|
# This builds the supported LinuxKit kernels. Kernels are wrapped up
|
||||||
|
# in a minimal toybox container, which contains the bzImage, a tar
|
||||||
|
# ball with modules and the kernel source.
|
||||||
|
#
|
||||||
|
# Each kernel is pushed to hub twice, once as
|
||||||
|
# linuxkit/kernel:<kernel>.<major>.<minor>-<hash> and once as
|
||||||
|
# inuxkit/kernel:<kernel>.<major>.x. The <hash> is the git tree hash
|
||||||
|
# of the current directory. The build will only rebuild the kernel
|
||||||
|
# image if the git tree hash changed.
|
||||||
|
|
||||||
|
# Git tree hash of this directory. Override to force build
|
||||||
|
HASH?=$(shell git ls-tree HEAD -- ../$(notdir $(CURDIR)) | awk '{print $$3}')
|
||||||
|
# Name and Org on Hub
|
||||||
|
ORG?=linuxkitprojects
|
||||||
|
IMAGE:=kernel-shiftfs
|
||||||
|
|
||||||
|
.PHONY: check tag push sign
|
||||||
|
# Targets:
|
||||||
|
# build: builds all kernels
|
||||||
|
# push: pushes all tagged kernel images to hub
|
||||||
|
# sign: sign and push all kernel images to hub
|
||||||
|
build:
|
||||||
|
push:
|
||||||
|
sign:
|
||||||
|
|
||||||
|
# A template for defining kernel build
|
||||||
|
# Arguments:
|
||||||
|
# $1: Full kernel version, e.g., 4.9.22
|
||||||
|
# $2: Kernel "series", e.g., 4.9.x
|
||||||
|
# $3: Build a debug kernel (used as suffix for image)
|
||||||
|
# This defines targets like:
|
||||||
|
# build_4.9.x, push_4.9.x and sign_4.9.x and adds them as dependencies
|
||||||
|
# to the global targets
|
||||||
|
# Set $3 to "_dbg", to build debug kernels. This defines targets like
|
||||||
|
# build_4.9.x_dbg and adds "_dbg" to the hub image name.
|
||||||
|
define kernel
|
||||||
|
build_$(2)$(3): Dockerfile Makefile $(wildcard patches-$(2)/*) kernel_config-$(2) kernel_config.debug
|
||||||
|
docker pull $(ORG)/$(IMAGE):$(1)$(3)-$(HASH) || \
|
||||||
|
docker build \
|
||||||
|
--build-arg KERNEL_VERSION=$(1) \
|
||||||
|
--build-arg KERNEL_SERIES=$(2) \
|
||||||
|
--build-arg DEBUG=$(3) \
|
||||||
|
--no-cache -t $(ORG)/$(IMAGE):$(1)$(3)-$(HASH) .
|
||||||
|
|
||||||
|
push_$(2)$(3): build_$(2)$(3)
|
||||||
|
docker pull $(ORG)/$(IMAGE):$(1)$(3)-$(HASH) || \
|
||||||
|
(docker push $(ORG)/$(IMAGE):$(1)$(3)-$(HASH) && \
|
||||||
|
docker tag $(ORG)/$(IMAGE):$(1)$(3)-$(HASH) $(ORG)/$(IMAGE):$(2)$(3) && \
|
||||||
|
docker push $(ORG)/$(IMAGE):$(2)$(3))
|
||||||
|
|
||||||
|
sign_$(2)$(3): build_$(2)$(3)
|
||||||
|
DOCKER_CONTENT_TRUST=1 docker pull $(ORG)/$(IMAGE):$(1)$(3)-$(HASH) || \
|
||||||
|
(DOCKER_CONTENT_TRUST=1 docker push $(ORG)/$(IMAGE):$(1)$(3)-$(HASH) && \
|
||||||
|
docker tag $(ORG)/$(IMAGE):$(1)$(3)-$(HASH) $(ORG)/$(IMAGE):$(2)$(3) && \
|
||||||
|
DOCKER_CONTENT_TRUST=1 docker push $(ORG)/$(IMAGE):$(2)$(3))
|
||||||
|
|
||||||
|
build: build_$(2)$(3)
|
||||||
|
push: push_$(2)$(3)
|
||||||
|
sign: sign_$(2)$(3)
|
||||||
|
endef
|
||||||
|
|
||||||
|
#
|
||||||
|
# Build Targets
|
||||||
|
# Debug targets only for latest stable and LTS stable
|
||||||
|
#
|
||||||
|
$(eval $(call kernel,4.11.4,4.11.x))
|
30
projects/shiftfs/README.md
Normal file
30
projects/shiftfs/README.md
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
## shiftfs
|
||||||
|
|
||||||
|
Shiftfs is a virtual filesystem for mapping mountpoints across user namespaces.
|
||||||
|
The idea is that it would be useful for dockerds spawning containers: they can
|
||||||
|
keep filesystems on the host disk in terms of real root, but mount the
|
||||||
|
container roots via shiftfs, allowing containers to share a particular
|
||||||
|
filesystem with different uid maps, while not having to uidshift every file on
|
||||||
|
disk (and thus destroying some of the sharing properties).
|
||||||
|
|
||||||
|
The version included here is the v2 version of shiftfs, using the superblock's
|
||||||
|
user namespace instead of mountopts to figure out mappings. Thus, an extra step
|
||||||
|
of "marking" mounts is needed. For example:
|
||||||
|
|
||||||
|
# mkdir source
|
||||||
|
# touch source/foo # a root owned file
|
||||||
|
# mount -t shiftfs -o mark source source
|
||||||
|
# chmod 777 source
|
||||||
|
|
||||||
|
Now, let's make a user namespace:
|
||||||
|
|
||||||
|
# setuid 1000 unshare -rm
|
||||||
|
# cat /proc/self/uidmap
|
||||||
|
0 1000 1
|
||||||
|
# mkdir dest
|
||||||
|
# mount -t shiftfs source dest
|
||||||
|
# stat dest/foo | grep Uid
|
||||||
|
Access: (0644/-rw-r--r--) Uid: ( 0/ root) Gid: ( 0/ root)
|
||||||
|
|
||||||
|
And thanks to the magic of shiftfs, the file is root owned in the user
|
||||||
|
namespce.
|
3857
projects/shiftfs/kernel_config-4.11.x
Normal file
3857
projects/shiftfs/kernel_config-4.11.x
Normal file
File diff suppressed because it is too large
Load Diff
26
projects/shiftfs/kernel_config.debug
Normal file
26
projects/shiftfs/kernel_config.debug
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
|
||||||
|
|
||||||
|
## LinuxKit DEBUG OPTIONS ##
|
||||||
|
|
||||||
|
CONFIG_LOCKDEP=y
|
||||||
|
CONFIG_FRAME_POINTER=y
|
||||||
|
CONFIG_LOCKUP_DETECTOR=y
|
||||||
|
CONFIG_DETECT_HUNG_TASK=y
|
||||||
|
CONFIG_DEBUG_TIMEKEEPING=y
|
||||||
|
CONFIG_DEBUG_RT_MUTEXES=y
|
||||||
|
CONFIG_DEBUG_SPINLOCK=y
|
||||||
|
CONFIG_DEBUG_MUTEXES=y
|
||||||
|
CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y
|
||||||
|
CONFIG_DEBUG_LOCK_ALLOC=y
|
||||||
|
CONFIG_PROVE_LOCKING=y
|
||||||
|
CONFIG_LOCK_STAT=y
|
||||||
|
CONFIG_DEBUG_ATOMIC_SLEEP=y
|
||||||
|
CONFIG_DEBUG_LIST=y
|
||||||
|
CONFIG_DEBUG_NOTIFIERS=y
|
||||||
|
CONFIG_PROVE_RCU=y
|
||||||
|
CONFIG_RCU_TRACE=y
|
||||||
|
CONFIG_KGDB=y
|
||||||
|
CONFIG_KGDB_SERIAL_CONSOLE=y
|
||||||
|
CONFIG_KGDBOC=y
|
||||||
|
CONFIG_DEBUG_RODATA_TEST=y
|
||||||
|
CONFIG_DEBUG_WX=y
|
@ -0,0 +1,929 @@
|
|||||||
|
From bec86f3997034944e349e947808dc1766f79767d Mon Sep 17 00:00:00 2001
|
||||||
|
From: James Bottomley <James.Bottomley@HansenPartnership.com>
|
||||||
|
Date: Fri, 14 Apr 2017 14:22:01 -0600
|
||||||
|
Subject: [PATCH 1/2] shiftfs: uid/gid shifting filesystem (s_user_ns version)
|
||||||
|
|
||||||
|
This allows any subtree to be uid/gid shifted and bound elsewhere. It
|
||||||
|
does this by operating simlarly to overlayfs. Its primary use is for
|
||||||
|
shifting the underlying uids of filesystems used to support
|
||||||
|
unpriviliged (uid shifted) containers. The usual use case here is
|
||||||
|
that the container is operating with an uid shifted unprivileged root
|
||||||
|
but sometimes needs to make use of or work with a filesystem image
|
||||||
|
that has root at real uid 0.
|
||||||
|
|
||||||
|
The mechanism is to allow any subordinate mount namespace to mount a
|
||||||
|
shiftfs filesystem (by marking it FS_USERNS_MOUNT) but only allowing
|
||||||
|
it to mount marked subtrees (using the -o mark option as root). Once
|
||||||
|
mounted, the subtree is mapped via the super block user namespace so
|
||||||
|
that the interior ids of the mounting user namespace are the ids
|
||||||
|
written to the filesystem.
|
||||||
|
|
||||||
|
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
v1 - based on original shiftfs with uid mappings now done via s_user_ns
|
||||||
|
v2 - fix revalidation of dentries
|
||||||
|
add inode aliasing
|
||||||
|
---
|
||||||
|
fs/Kconfig | 8 +
|
||||||
|
fs/Makefile | 1 +
|
||||||
|
fs/shiftfs.c | 847 +++++++++++++++++++++++++++++++++++++++++++++
|
||||||
|
include/uapi/linux/magic.h | 2 +
|
||||||
|
4 files changed, 858 insertions(+)
|
||||||
|
|
||||||
|
diff --git a/fs/Kconfig b/fs/Kconfig
|
||||||
|
index b0e42b6a96b9..a66dff8d4256 100644
|
||||||
|
--- a/fs/Kconfig
|
||||||
|
+++ b/fs/Kconfig
|
||||||
|
@@ -106,6 +106,14 @@ source "fs/autofs4/Kconfig"
|
||||||
|
source "fs/fuse/Kconfig"
|
||||||
|
source "fs/overlayfs/Kconfig"
|
||||||
|
|
||||||
|
+config SHIFT_FS
|
||||||
|
+ tristate "UID/GID shifting overlay filesystem for containers"
|
||||||
|
+ help
|
||||||
|
+ This filesystem can overlay any mounted filesystem and shift
|
||||||
|
+ the uid/gid the files appear at. The idea is that
|
||||||
|
+ unprivileged containers can use this to mount root volumes
|
||||||
|
+ using this technique.
|
||||||
|
+
|
||||||
|
menu "Caches"
|
||||||
|
|
||||||
|
source "fs/fscache/Kconfig"
|
||||||
|
diff --git a/fs/Makefile b/fs/Makefile
|
||||||
|
index 7bbaca9c67b1..2aa3ad47a286 100644
|
||||||
|
--- a/fs/Makefile
|
||||||
|
+++ b/fs/Makefile
|
||||||
|
@@ -128,3 +128,4 @@ obj-y += exofs/ # Multiple modules
|
||||||
|
obj-$(CONFIG_CEPH_FS) += ceph/
|
||||||
|
obj-$(CONFIG_PSTORE) += pstore/
|
||||||
|
obj-$(CONFIG_EFIVAR_FS) += efivarfs/
|
||||||
|
+obj-$(CONFIG_SHIFT_FS) += shiftfs.o
|
||||||
|
diff --git a/fs/shiftfs.c b/fs/shiftfs.c
|
||||||
|
new file mode 100644
|
||||||
|
index 000000000000..ea8ac57b3ce1
|
||||||
|
--- /dev/null
|
||||||
|
+++ b/fs/shiftfs.c
|
||||||
|
@@ -0,0 +1,847 @@
|
||||||
|
+#include <linux/cred.h>
|
||||||
|
+#include <linux/mount.h>
|
||||||
|
+#include <linux/file.h>
|
||||||
|
+#include <linux/fs.h>
|
||||||
|
+#include <linux/namei.h>
|
||||||
|
+#include <linux/module.h>
|
||||||
|
+#include <linux/kernel.h>
|
||||||
|
+#include <linux/magic.h>
|
||||||
|
+#include <linux/parser.h>
|
||||||
|
+#include <linux/seq_file.h>
|
||||||
|
+#include <linux/statfs.h>
|
||||||
|
+#include <linux/slab.h>
|
||||||
|
+#include <linux/user_namespace.h>
|
||||||
|
+#include <linux/uidgid.h>
|
||||||
|
+#include <linux/xattr.h>
|
||||||
|
+
|
||||||
|
+struct shiftfs_super_info {
|
||||||
|
+ struct vfsmount *mnt;
|
||||||
|
+ struct user_namespace *userns;
|
||||||
|
+ bool mark;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static struct inode *shiftfs_new_inode(struct super_block *sb, umode_t mode,
|
||||||
|
+ struct dentry *dentry);
|
||||||
|
+
|
||||||
|
+enum {
|
||||||
|
+ OPT_MARK,
|
||||||
|
+ OPT_LAST,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+/* global filesystem options */
|
||||||
|
+static const match_table_t tokens = {
|
||||||
|
+ { OPT_MARK, "mark" },
|
||||||
|
+ { OPT_LAST, NULL }
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static const struct cred *shiftfs_get_up_creds(struct super_block *sb)
|
||||||
|
+{
|
||||||
|
+ struct shiftfs_super_info *ssi = sb->s_fs_info;
|
||||||
|
+ struct cred *cred = prepare_creds();
|
||||||
|
+
|
||||||
|
+ if (!cred)
|
||||||
|
+ return NULL;
|
||||||
|
+
|
||||||
|
+ cred->fsuid = KUIDT_INIT(from_kuid(sb->s_user_ns, cred->fsuid));
|
||||||
|
+ cred->fsgid = KGIDT_INIT(from_kgid(sb->s_user_ns, cred->fsgid));
|
||||||
|
+ put_user_ns(cred->user_ns);
|
||||||
|
+ cred->user_ns = get_user_ns(ssi->userns);
|
||||||
|
+
|
||||||
|
+ return cred;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static const struct cred *shiftfs_new_creds(const struct cred **newcred,
|
||||||
|
+ struct super_block *sb)
|
||||||
|
+{
|
||||||
|
+ const struct cred *cred = shiftfs_get_up_creds(sb);
|
||||||
|
+
|
||||||
|
+ *newcred = cred;
|
||||||
|
+
|
||||||
|
+ if (cred)
|
||||||
|
+ cred = override_creds(cred);
|
||||||
|
+ else
|
||||||
|
+ printk(KERN_ERR "shiftfs: Credential override failed: no memory\n");
|
||||||
|
+
|
||||||
|
+ return cred;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void shiftfs_old_creds(const struct cred *oldcred,
|
||||||
|
+ const struct cred **newcred)
|
||||||
|
+{
|
||||||
|
+ if (!*newcred)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ revert_creds(oldcred);
|
||||||
|
+ put_cred(*newcred);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_parse_options(struct shiftfs_super_info *ssi, char *options)
|
||||||
|
+{
|
||||||
|
+ char *p;
|
||||||
|
+ substring_t args[MAX_OPT_ARGS];
|
||||||
|
+
|
||||||
|
+ ssi->mark = false;
|
||||||
|
+
|
||||||
|
+ while ((p = strsep(&options, ",")) != NULL) {
|
||||||
|
+ int token;
|
||||||
|
+
|
||||||
|
+ if (!*p)
|
||||||
|
+ continue;
|
||||||
|
+
|
||||||
|
+ token = match_token(p, tokens, args);
|
||||||
|
+ switch (token) {
|
||||||
|
+ case OPT_MARK:
|
||||||
|
+ ssi->mark = true;
|
||||||
|
+ break;
|
||||||
|
+ default:
|
||||||
|
+ return -EINVAL;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void shiftfs_d_release(struct dentry *dentry)
|
||||||
|
+{
|
||||||
|
+ struct dentry *real = dentry->d_fsdata;
|
||||||
|
+
|
||||||
|
+ dput(real);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct dentry *shiftfs_d_real(struct dentry *dentry,
|
||||||
|
+ const struct inode *inode,
|
||||||
|
+ unsigned int flags)
|
||||||
|
+{
|
||||||
|
+ struct dentry *real = dentry->d_fsdata;
|
||||||
|
+
|
||||||
|
+ if (unlikely(real->d_flags & DCACHE_OP_REAL))
|
||||||
|
+ return real->d_op->d_real(real, real->d_inode, flags);
|
||||||
|
+
|
||||||
|
+ return real;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_d_weak_revalidate(struct dentry *dentry, unsigned int flags)
|
||||||
|
+{
|
||||||
|
+ struct dentry *real = dentry->d_fsdata;
|
||||||
|
+
|
||||||
|
+ if (d_unhashed(real))
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ if (!(real->d_flags & DCACHE_OP_WEAK_REVALIDATE))
|
||||||
|
+ return 1;
|
||||||
|
+
|
||||||
|
+ return real->d_op->d_weak_revalidate(real, flags);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_d_revalidate(struct dentry *dentry, unsigned int flags)
|
||||||
|
+{
|
||||||
|
+ struct dentry *real = dentry->d_fsdata;
|
||||||
|
+ struct inode *reali = d_inode(real), *inode = d_inode(dentry);
|
||||||
|
+ int ret;
|
||||||
|
+
|
||||||
|
+ if (d_unhashed(real))
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * inode state of underlying changed from positive to negative
|
||||||
|
+ * or vice versa; force a lookup to update our view
|
||||||
|
+ */
|
||||||
|
+ if (d_is_negative(real) != d_is_negative(dentry))
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * non dir link count is > 1 and our inode is currently not in
|
||||||
|
+ * the inode hash => need to drop and reget our dentry to make
|
||||||
|
+ * sure we're aliasing it correctly.
|
||||||
|
+ */
|
||||||
|
+ if (reali &&!S_ISDIR(reali->i_mode) && reali->i_nlink > 1 &&
|
||||||
|
+ (!inode || inode_unhashed(inode)))
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ if (!(real->d_flags & DCACHE_OP_REVALIDATE))
|
||||||
|
+ return 1;
|
||||||
|
+
|
||||||
|
+ ret = real->d_op->d_revalidate(real, flags);
|
||||||
|
+
|
||||||
|
+ if (ret == 0 && !(flags & LOOKUP_RCU))
|
||||||
|
+ d_invalidate(real);
|
||||||
|
+
|
||||||
|
+ return ret;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static const struct dentry_operations shiftfs_dentry_ops = {
|
||||||
|
+ .d_release = shiftfs_d_release,
|
||||||
|
+ .d_real = shiftfs_d_real,
|
||||||
|
+ .d_revalidate = shiftfs_d_revalidate,
|
||||||
|
+ .d_weak_revalidate = shiftfs_d_weak_revalidate,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static int shiftfs_readlink(struct dentry *dentry, char __user *data,
|
||||||
|
+ int flags)
|
||||||
|
+{
|
||||||
|
+ struct dentry *real = dentry->d_fsdata;
|
||||||
|
+ const struct inode_operations *iop = real->d_inode->i_op;
|
||||||
|
+
|
||||||
|
+ if (iop->readlink)
|
||||||
|
+ return iop->readlink(real, data, flags);
|
||||||
|
+
|
||||||
|
+ return -EINVAL;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static const char *shiftfs_get_link(struct dentry *dentry, struct inode *inode,
|
||||||
|
+ struct delayed_call *done)
|
||||||
|
+{
|
||||||
|
+ if (dentry) {
|
||||||
|
+ struct dentry *real = dentry->d_fsdata;
|
||||||
|
+ struct inode *reali = real->d_inode;
|
||||||
|
+ const struct inode_operations *iop = reali->i_op;
|
||||||
|
+ const char *res = ERR_PTR(-EPERM);
|
||||||
|
+
|
||||||
|
+ if (iop->get_link)
|
||||||
|
+ res = iop->get_link(real, reali, done);
|
||||||
|
+
|
||||||
|
+ return res;
|
||||||
|
+ } else {
|
||||||
|
+ /* RCU lookup not supported */
|
||||||
|
+ return ERR_PTR(-ECHILD);
|
||||||
|
+ }
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_setxattr(struct dentry *dentry, struct inode *inode,
|
||||||
|
+ const char *name, const void *value,
|
||||||
|
+ size_t size, int flags)
|
||||||
|
+{
|
||||||
|
+ struct dentry *real = dentry->d_fsdata;
|
||||||
|
+ int err = -EOPNOTSUPP;
|
||||||
|
+ const struct cred *oldcred, *newcred;
|
||||||
|
+
|
||||||
|
+ oldcred = shiftfs_new_creds(&newcred, dentry->d_sb);
|
||||||
|
+ err = vfs_setxattr(real, name, value, size, flags);
|
||||||
|
+ shiftfs_old_creds(oldcred, &newcred);
|
||||||
|
+
|
||||||
|
+ return err;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_xattr_get(const struct xattr_handler *handler,
|
||||||
|
+ struct dentry *dentry, struct inode *inode,
|
||||||
|
+ const char *name, void *value, size_t size)
|
||||||
|
+{
|
||||||
|
+ struct dentry *real = dentry->d_fsdata;
|
||||||
|
+ int err;
|
||||||
|
+ const struct cred *oldcred, *newcred;
|
||||||
|
+
|
||||||
|
+ oldcred = shiftfs_new_creds(&newcred, dentry->d_sb);
|
||||||
|
+ err = vfs_getxattr(real, name, value, size);
|
||||||
|
+ shiftfs_old_creds(oldcred, &newcred);
|
||||||
|
+
|
||||||
|
+ return err;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static ssize_t shiftfs_listxattr(struct dentry *dentry, char *list,
|
||||||
|
+ size_t size)
|
||||||
|
+{
|
||||||
|
+ struct dentry *real = dentry->d_fsdata;
|
||||||
|
+ int err;
|
||||||
|
+ const struct cred *oldcred, *newcred;
|
||||||
|
+
|
||||||
|
+ oldcred = shiftfs_new_creds(&newcred, dentry->d_sb);
|
||||||
|
+ err = vfs_listxattr(real, list, size);
|
||||||
|
+ shiftfs_old_creds(oldcred, &newcred);
|
||||||
|
+
|
||||||
|
+ return err;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_removexattr(struct dentry *dentry, const char *name)
|
||||||
|
+{
|
||||||
|
+ struct dentry *real = dentry->d_fsdata;
|
||||||
|
+ int err;
|
||||||
|
+ const struct cred *oldcred, *newcred;
|
||||||
|
+
|
||||||
|
+ oldcred = shiftfs_new_creds(&newcred, dentry->d_sb);
|
||||||
|
+ err = vfs_removexattr(real, name);
|
||||||
|
+ shiftfs_old_creds(oldcred, &newcred);
|
||||||
|
+
|
||||||
|
+ return err;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_xattr_set(const struct xattr_handler *handler,
|
||||||
|
+ struct dentry *dentry, struct inode *inode,
|
||||||
|
+ const char *name, const void *value, size_t size,
|
||||||
|
+ int flags)
|
||||||
|
+{
|
||||||
|
+ if (!value)
|
||||||
|
+ return shiftfs_removexattr(dentry, name);
|
||||||
|
+ return shiftfs_setxattr(dentry, inode, name, value, size, flags);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void shiftfs_fill_inode(struct inode *inode, struct dentry *dentry)
|
||||||
|
+{
|
||||||
|
+ struct inode *reali;
|
||||||
|
+
|
||||||
|
+ if (!dentry)
|
||||||
|
+ return;
|
||||||
|
+
|
||||||
|
+ reali = dentry->d_inode;
|
||||||
|
+
|
||||||
|
+ if (!reali->i_op->get_link)
|
||||||
|
+ inode->i_opflags |= IOP_NOFOLLOW;
|
||||||
|
+
|
||||||
|
+ inode->i_mapping = reali->i_mapping;
|
||||||
|
+ inode->i_private = dentry;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_make_object(struct inode *dir, struct dentry *dentry,
|
||||||
|
+ umode_t mode, const char *symlink,
|
||||||
|
+ struct dentry *hardlink, bool excl)
|
||||||
|
+{
|
||||||
|
+ struct dentry *real = dir->i_private, *new = dentry->d_fsdata,
|
||||||
|
+ *realhardlink = NULL;
|
||||||
|
+ struct inode *reali = real->d_inode, *newi;
|
||||||
|
+ const struct inode_operations *iop = reali->i_op;
|
||||||
|
+ int err;
|
||||||
|
+ const struct cred *oldcred, *newcred;
|
||||||
|
+ bool op_ok = false;
|
||||||
|
+
|
||||||
|
+ if (hardlink) {
|
||||||
|
+ realhardlink = hardlink->d_fsdata;
|
||||||
|
+ op_ok = iop->link;
|
||||||
|
+ } else {
|
||||||
|
+ switch (mode & S_IFMT) {
|
||||||
|
+ case S_IFDIR:
|
||||||
|
+ op_ok = iop->mkdir;
|
||||||
|
+ break;
|
||||||
|
+ case S_IFREG:
|
||||||
|
+ op_ok = iop->create;
|
||||||
|
+ break;
|
||||||
|
+ case S_IFLNK:
|
||||||
|
+ op_ok = iop->symlink;
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+ if (!op_ok)
|
||||||
|
+ return -EINVAL;
|
||||||
|
+
|
||||||
|
+
|
||||||
|
+ newi = shiftfs_new_inode(dentry->d_sb, mode, realhardlink);
|
||||||
|
+ if (!newi)
|
||||||
|
+ return -ENOMEM;
|
||||||
|
+
|
||||||
|
+ oldcred = shiftfs_new_creds(&newcred, dentry->d_sb);
|
||||||
|
+
|
||||||
|
+ inode_lock_nested(reali, I_MUTEX_PARENT);
|
||||||
|
+
|
||||||
|
+ err = -EINVAL; /* shut gcc up about uninit var */
|
||||||
|
+ if (hardlink) {
|
||||||
|
+ err = vfs_link(realhardlink, reali, new, NULL);
|
||||||
|
+ } else {
|
||||||
|
+ switch (mode & S_IFMT) {
|
||||||
|
+ case S_IFDIR:
|
||||||
|
+ err = vfs_mkdir(reali, new, mode);
|
||||||
|
+ break;
|
||||||
|
+ case S_IFREG:
|
||||||
|
+ err = vfs_create(reali, new, mode, excl);
|
||||||
|
+ break;
|
||||||
|
+ case S_IFLNK:
|
||||||
|
+ err = vfs_symlink(reali, new, symlink);
|
||||||
|
+ }
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ shiftfs_old_creds(oldcred, &newcred);
|
||||||
|
+
|
||||||
|
+ if (err)
|
||||||
|
+ goto out_dput;
|
||||||
|
+
|
||||||
|
+ if (!hardlink)
|
||||||
|
+ shiftfs_fill_inode(newi, new);
|
||||||
|
+ else if (inode_unhashed(newi) && !S_ISDIR(newi->i_mode))
|
||||||
|
+ /*
|
||||||
|
+ * although dentry and hardlink now each point to
|
||||||
|
+ * newi, the link count was 1 when they were created,
|
||||||
|
+ * so insert into the inode cache now that the link
|
||||||
|
+ * count has gone above one.
|
||||||
|
+ */
|
||||||
|
+ __insert_inode_hash(newi, (unsigned long)d_inode(new));
|
||||||
|
+
|
||||||
|
+ d_instantiate(dentry, newi);
|
||||||
|
+
|
||||||
|
+ new = NULL;
|
||||||
|
+ newi = NULL;
|
||||||
|
+
|
||||||
|
+ out_dput:
|
||||||
|
+ dput(new);
|
||||||
|
+ iput(newi);
|
||||||
|
+ inode_unlock(reali);
|
||||||
|
+
|
||||||
|
+ return err;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_create(struct inode *dir, struct dentry *dentry,
|
||||||
|
+ umode_t mode, bool excl)
|
||||||
|
+{
|
||||||
|
+ mode |= S_IFREG;
|
||||||
|
+
|
||||||
|
+ return shiftfs_make_object(dir, dentry, mode, NULL, NULL, excl);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_mkdir(struct inode *dir, struct dentry *dentry,
|
||||||
|
+ umode_t mode)
|
||||||
|
+{
|
||||||
|
+ mode |= S_IFDIR;
|
||||||
|
+
|
||||||
|
+ return shiftfs_make_object(dir, dentry, mode, NULL, NULL, false);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_link(struct dentry *hardlink, struct inode *dir,
|
||||||
|
+ struct dentry *dentry)
|
||||||
|
+{
|
||||||
|
+ return shiftfs_make_object(dir, dentry, 0, NULL, hardlink, false);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_symlink(struct inode *dir, struct dentry *dentry,
|
||||||
|
+ const char *symlink)
|
||||||
|
+{
|
||||||
|
+ return shiftfs_make_object(dir, dentry, S_IFLNK, symlink, NULL, false);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_rm(struct inode *dir, struct dentry *dentry, bool rmdir)
|
||||||
|
+{
|
||||||
|
+ struct dentry *real = dir->i_private, *new = dentry->d_fsdata;
|
||||||
|
+ struct inode *reali = real->d_inode;
|
||||||
|
+ int err;
|
||||||
|
+ const struct cred *oldcred, *newcred;
|
||||||
|
+
|
||||||
|
+ inode_lock_nested(reali, I_MUTEX_PARENT);
|
||||||
|
+
|
||||||
|
+ oldcred = shiftfs_new_creds(&newcred, dentry->d_sb);
|
||||||
|
+
|
||||||
|
+ if (rmdir)
|
||||||
|
+ err = vfs_rmdir(reali, new);
|
||||||
|
+ else
|
||||||
|
+ err = vfs_unlink(reali, new, NULL);
|
||||||
|
+
|
||||||
|
+ shiftfs_old_creds(oldcred, &newcred);
|
||||||
|
+ inode_unlock(reali);
|
||||||
|
+
|
||||||
|
+ return err;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_unlink(struct inode *dir, struct dentry *dentry)
|
||||||
|
+{
|
||||||
|
+ return shiftfs_rm(dir, dentry, false);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_rmdir(struct inode *dir, struct dentry *dentry)
|
||||||
|
+{
|
||||||
|
+ return shiftfs_rm(dir, dentry, true);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_rename(struct inode *olddir, struct dentry *old,
|
||||||
|
+ struct inode *newdir, struct dentry *new,
|
||||||
|
+ unsigned int flags)
|
||||||
|
+{
|
||||||
|
+ struct dentry *rodd = olddir->i_private, *rndd = newdir->i_private,
|
||||||
|
+ *realold = old->d_fsdata,
|
||||||
|
+ *realnew = new->d_fsdata, *trap;
|
||||||
|
+ struct inode *realolddir = rodd->d_inode, *realnewdir = rndd->d_inode;
|
||||||
|
+ int err = -EINVAL;
|
||||||
|
+ const struct cred *oldcred, *newcred;
|
||||||
|
+
|
||||||
|
+ trap = lock_rename(rndd, rodd);
|
||||||
|
+
|
||||||
|
+ if (trap == realold || trap == realnew)
|
||||||
|
+ goto out_unlock;
|
||||||
|
+
|
||||||
|
+ oldcred = shiftfs_new_creds(&newcred, old->d_sb);
|
||||||
|
+
|
||||||
|
+ err = vfs_rename(realolddir, realold, realnewdir,
|
||||||
|
+ realnew, NULL, flags);
|
||||||
|
+
|
||||||
|
+ shiftfs_old_creds(oldcred, &newcred);
|
||||||
|
+
|
||||||
|
+ out_unlock:
|
||||||
|
+ unlock_rename(rndd, rodd);
|
||||||
|
+
|
||||||
|
+ return err;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct dentry *shiftfs_lookup(struct inode *dir, struct dentry *dentry,
|
||||||
|
+ unsigned int flags)
|
||||||
|
+{
|
||||||
|
+ struct dentry *real = dir->i_private, *new;
|
||||||
|
+ struct inode *reali = real->d_inode, *newi;
|
||||||
|
+ const struct cred *oldcred, *newcred;
|
||||||
|
+
|
||||||
|
+ inode_lock(reali);
|
||||||
|
+ oldcred = shiftfs_new_creds(&newcred, dentry->d_sb);
|
||||||
|
+ new = lookup_one_len(dentry->d_name.name, real, dentry->d_name.len);
|
||||||
|
+ shiftfs_old_creds(oldcred, &newcred);
|
||||||
|
+ inode_unlock(reali);
|
||||||
|
+
|
||||||
|
+ if (IS_ERR(new))
|
||||||
|
+ return new;
|
||||||
|
+
|
||||||
|
+ dentry->d_fsdata = new;
|
||||||
|
+
|
||||||
|
+ newi = NULL;
|
||||||
|
+ if (!new->d_inode)
|
||||||
|
+ goto out;
|
||||||
|
+
|
||||||
|
+ newi = shiftfs_new_inode(dentry->d_sb, new->d_inode->i_mode, new);
|
||||||
|
+ if (!newi) {
|
||||||
|
+ dput(new);
|
||||||
|
+ return ERR_PTR(-ENOMEM);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ out:
|
||||||
|
+ return d_splice_alias(newi, dentry);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_permission(struct inode *inode, int mask)
|
||||||
|
+{
|
||||||
|
+ struct dentry *real = inode->i_private;
|
||||||
|
+ struct inode *reali = real->d_inode;
|
||||||
|
+ const struct inode_operations *iop = reali->i_op;
|
||||||
|
+ int err;
|
||||||
|
+ const struct cred *oldcred, *newcred;
|
||||||
|
+
|
||||||
|
+ if (mask & MAY_NOT_BLOCK)
|
||||||
|
+ return -ECHILD;
|
||||||
|
+
|
||||||
|
+ oldcred = shiftfs_new_creds(&newcred, inode->i_sb);
|
||||||
|
+ if (iop->permission)
|
||||||
|
+ err = iop->permission(reali, mask);
|
||||||
|
+ else
|
||||||
|
+ err = generic_permission(reali, mask);
|
||||||
|
+ shiftfs_old_creds(oldcred, &newcred);
|
||||||
|
+
|
||||||
|
+ return err;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr)
|
||||||
|
+{
|
||||||
|
+ struct dentry *real = dentry->d_fsdata;
|
||||||
|
+ struct inode *reali = real->d_inode;
|
||||||
|
+ const struct inode_operations *iop = reali->i_op;
|
||||||
|
+ struct iattr newattr = *attr;
|
||||||
|
+ const struct cred *oldcred, *newcred;
|
||||||
|
+ struct super_block *sb = dentry->d_sb;
|
||||||
|
+ int err;
|
||||||
|
+
|
||||||
|
+ newattr.ia_uid = KUIDT_INIT(from_kuid(sb->s_user_ns, attr->ia_uid));
|
||||||
|
+ newattr.ia_gid = KGIDT_INIT(from_kgid(sb->s_user_ns, attr->ia_gid));
|
||||||
|
+
|
||||||
|
+ oldcred = shiftfs_new_creds(&newcred, dentry->d_sb);
|
||||||
|
+ inode_lock(reali);
|
||||||
|
+ if (iop->setattr)
|
||||||
|
+ err = iop->setattr(real, &newattr);
|
||||||
|
+ else
|
||||||
|
+ err = simple_setattr(real, &newattr);
|
||||||
|
+ inode_unlock(reali);
|
||||||
|
+ shiftfs_old_creds(oldcred, &newcred);
|
||||||
|
+
|
||||||
|
+ if (err)
|
||||||
|
+ return err;
|
||||||
|
+
|
||||||
|
+ /* all OK, reflect the change on our inode */
|
||||||
|
+ setattr_copy(d_inode(dentry), attr);
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||||
|
+ struct kstat *stat)
|
||||||
|
+{
|
||||||
|
+ struct inode *inode = dentry->d_inode;
|
||||||
|
+ struct dentry *real = inode->i_private;
|
||||||
|
+ struct inode *reali = real->d_inode;
|
||||||
|
+ const struct inode_operations *iop = reali->i_op;
|
||||||
|
+ int err = 0;
|
||||||
|
+
|
||||||
|
+ mnt = dentry->d_sb->s_fs_info;
|
||||||
|
+
|
||||||
|
+ if (iop->getattr)
|
||||||
|
+ err = iop->getattr(mnt, real, stat);
|
||||||
|
+ else
|
||||||
|
+ generic_fillattr(reali, stat);
|
||||||
|
+
|
||||||
|
+ if (err)
|
||||||
|
+ return err;
|
||||||
|
+
|
||||||
|
+ /* transform the underlying id */
|
||||||
|
+ stat->uid = make_kuid(inode->i_sb->s_user_ns, __kuid_val(stat->uid));
|
||||||
|
+ stat->gid = make_kgid(inode->i_sb->s_user_ns, __kgid_val(stat->gid));
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static const struct inode_operations shiftfs_inode_ops = {
|
||||||
|
+ .lookup = shiftfs_lookup,
|
||||||
|
+ .getattr = shiftfs_getattr,
|
||||||
|
+ .setattr = shiftfs_setattr,
|
||||||
|
+ .permission = shiftfs_permission,
|
||||||
|
+ .mkdir = shiftfs_mkdir,
|
||||||
|
+ .symlink = shiftfs_symlink,
|
||||||
|
+ .get_link = shiftfs_get_link,
|
||||||
|
+ .readlink = shiftfs_readlink,
|
||||||
|
+ .unlink = shiftfs_unlink,
|
||||||
|
+ .rmdir = shiftfs_rmdir,
|
||||||
|
+ .rename = shiftfs_rename,
|
||||||
|
+ .link = shiftfs_link,
|
||||||
|
+ .create = shiftfs_create,
|
||||||
|
+ .mknod = NULL, /* no special files currently */
|
||||||
|
+ .listxattr = shiftfs_listxattr,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static int shiftfs_test(struct inode *inode, void *data)
|
||||||
|
+{
|
||||||
|
+ struct dentry *d1 = inode->i_private, *d2 = data;
|
||||||
|
+ struct inode *i1 = d_inode(d1), *i2 = d_inode(d2);
|
||||||
|
+
|
||||||
|
+ return i1 && i1 == i2;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_set(struct inode *inode, void *data)
|
||||||
|
+{
|
||||||
|
+ struct dentry *dentry = data;
|
||||||
|
+
|
||||||
|
+ shiftfs_fill_inode(inode, dentry);
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct inode *shiftfs_new_inode(struct super_block *sb, umode_t mode,
|
||||||
|
+ struct dentry *dentry)
|
||||||
|
+{
|
||||||
|
+ struct inode *inode;
|
||||||
|
+ struct inode *reali = dentry ? d_inode(dentry): NULL;
|
||||||
|
+ bool use_inode_hash = false;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * Here we hash the inode only if the underlying link count is
|
||||||
|
+ * greater than one and it's not a directory (meaning the hash
|
||||||
|
+ * contains all items that might be aliases). We keep this
|
||||||
|
+ * accurate by checking the underlying link count on
|
||||||
|
+ * revalidation and forcing a new lookup if the underlying
|
||||||
|
+ * link count is raised.
|
||||||
|
+ *
|
||||||
|
+ * Note: if the link count drops again, we don't remove the
|
||||||
|
+ * inode from the hash, so the hash contains all inodes that
|
||||||
|
+ * may be aliases plus a few others.
|
||||||
|
+ */
|
||||||
|
+ if (reali)
|
||||||
|
+ use_inode_hash = ACCESS_ONCE(reali->i_nlink) > 1 &&
|
||||||
|
+ !S_ISDIR(reali->i_mode);
|
||||||
|
+
|
||||||
|
+ if (use_inode_hash) {
|
||||||
|
+ inode = iget5_locked(sb, (unsigned long)reali, shiftfs_test,
|
||||||
|
+ shiftfs_set, dentry);
|
||||||
|
+ if (inode && !(inode->i_state & I_NEW))
|
||||||
|
+ return inode;
|
||||||
|
+ } else {
|
||||||
|
+ inode = new_inode(sb);
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (!inode)
|
||||||
|
+ return NULL;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * our inode is completely vestigial. All lookups, getattr
|
||||||
|
+ * and permission checks are done on the underlying inode, so
|
||||||
|
+ * what the user sees is entirely from the underlying inode.
|
||||||
|
+ */
|
||||||
|
+ mode &= S_IFMT;
|
||||||
|
+
|
||||||
|
+ inode->i_ino = get_next_ino();
|
||||||
|
+ inode->i_mode = mode;
|
||||||
|
+ inode->i_flags |= S_NOATIME | S_NOCMTIME;
|
||||||
|
+
|
||||||
|
+ inode->i_op = &shiftfs_inode_ops;
|
||||||
|
+
|
||||||
|
+ if (use_inode_hash)
|
||||||
|
+ unlock_new_inode(inode);
|
||||||
|
+ else
|
||||||
|
+ shiftfs_fill_inode(inode, dentry);
|
||||||
|
+
|
||||||
|
+ return inode;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_show_options(struct seq_file *m, struct dentry *dentry)
|
||||||
|
+{
|
||||||
|
+ struct super_block *sb = dentry->d_sb;
|
||||||
|
+ struct shiftfs_super_info *ssi = sb->s_fs_info;
|
||||||
|
+
|
||||||
|
+ if (ssi->mark)
|
||||||
|
+ seq_show_option(m, "mark", NULL);
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static int shiftfs_statfs(struct dentry *dentry, struct kstatfs *buf)
|
||||||
|
+{
|
||||||
|
+ struct super_block *sb = dentry->d_sb;
|
||||||
|
+ struct shiftfs_super_info *ssi = sb->s_fs_info;
|
||||||
|
+ struct dentry *root = sb->s_root;
|
||||||
|
+ struct dentry *realroot = root->d_fsdata;
|
||||||
|
+ struct path realpath = { .mnt = ssi->mnt, .dentry = realroot };
|
||||||
|
+ int err;
|
||||||
|
+
|
||||||
|
+ err = vfs_statfs(&realpath, buf);
|
||||||
|
+ if (err)
|
||||||
|
+ return err;
|
||||||
|
+
|
||||||
|
+ buf->f_type = sb->s_magic;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void shiftfs_put_super(struct super_block *sb)
|
||||||
|
+{
|
||||||
|
+ struct shiftfs_super_info *ssi = sb->s_fs_info;
|
||||||
|
+
|
||||||
|
+ mntput(ssi->mnt);
|
||||||
|
+ put_user_ns(ssi->userns);
|
||||||
|
+ kfree(ssi);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static const struct xattr_handler shiftfs_xattr_handler = {
|
||||||
|
+ .prefix = "",
|
||||||
|
+ .get = shiftfs_xattr_get,
|
||||||
|
+ .set = shiftfs_xattr_set,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+const struct xattr_handler *shiftfs_xattr_handlers[] = {
|
||||||
|
+ &shiftfs_xattr_handler,
|
||||||
|
+ NULL
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static const struct super_operations shiftfs_super_ops = {
|
||||||
|
+ .put_super = shiftfs_put_super,
|
||||||
|
+ .show_options = shiftfs_show_options,
|
||||||
|
+ .statfs = shiftfs_statfs,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+struct shiftfs_data {
|
||||||
|
+ void *data;
|
||||||
|
+ const char *path;
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static int shiftfs_fill_super(struct super_block *sb, void *raw_data,
|
||||||
|
+ int silent)
|
||||||
|
+{
|
||||||
|
+ struct shiftfs_data *data = raw_data;
|
||||||
|
+ char *name = kstrdup(data->path, GFP_KERNEL);
|
||||||
|
+ int err = -ENOMEM;
|
||||||
|
+ struct shiftfs_super_info *ssi = NULL;
|
||||||
|
+ struct path path;
|
||||||
|
+ struct dentry *dentry;
|
||||||
|
+
|
||||||
|
+ if (!name)
|
||||||
|
+ goto out;
|
||||||
|
+
|
||||||
|
+ ssi = kzalloc(sizeof(*ssi), GFP_KERNEL);
|
||||||
|
+ if (!ssi)
|
||||||
|
+ goto out;
|
||||||
|
+
|
||||||
|
+ err = -EPERM;
|
||||||
|
+ err = shiftfs_parse_options(ssi, data->data);
|
||||||
|
+ if (err)
|
||||||
|
+ goto out;
|
||||||
|
+
|
||||||
|
+ /* to mark a mount point, must be real root */
|
||||||
|
+ if (ssi->mark && !capable(CAP_SYS_ADMIN))
|
||||||
|
+ goto out;
|
||||||
|
+
|
||||||
|
+ /* else to mount a mark, must be userns admin */
|
||||||
|
+ if (!ssi->mark && !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
|
||||||
|
+ goto out;
|
||||||
|
+
|
||||||
|
+ err = kern_path(name, LOOKUP_FOLLOW, &path);
|
||||||
|
+ if (err)
|
||||||
|
+ goto out;
|
||||||
|
+
|
||||||
|
+ err = -EPERM;
|
||||||
|
+
|
||||||
|
+ if (!S_ISDIR(path.dentry->d_inode->i_mode)) {
|
||||||
|
+ err = -ENOTDIR;
|
||||||
|
+ goto out_put;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ sb->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1;
|
||||||
|
+ if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
|
||||||
|
+ printk(KERN_ERR "shiftfs: maximum stacking depth exceeded\n");
|
||||||
|
+ err = -EINVAL;
|
||||||
|
+ goto out_put;
|
||||||
|
+ }
|
||||||
|
+
|
||||||
|
+ if (ssi->mark) {
|
||||||
|
+ /*
|
||||||
|
+ * this part is visible unshifted, so make sure no
|
||||||
|
+ * executables that could be used to give suid
|
||||||
|
+ * privileges
|
||||||
|
+ */
|
||||||
|
+ sb->s_iflags = SB_I_NOEXEC;
|
||||||
|
+ ssi->mnt = path.mnt;
|
||||||
|
+ dentry = path.dentry;
|
||||||
|
+ } else {
|
||||||
|
+ struct shiftfs_super_info *mp_ssi;
|
||||||
|
+
|
||||||
|
+ /*
|
||||||
|
+ * this leg executes if we're admin capable in
|
||||||
|
+ * the namespace, so be very careful
|
||||||
|
+ */
|
||||||
|
+ if (path.dentry->d_sb->s_magic != SHIFTFS_MAGIC)
|
||||||
|
+ goto out_put;
|
||||||
|
+ mp_ssi = path.dentry->d_sb->s_fs_info;
|
||||||
|
+ if (!mp_ssi->mark)
|
||||||
|
+ goto out_put;
|
||||||
|
+ ssi->mnt = mntget(mp_ssi->mnt);
|
||||||
|
+ dentry = dget(path.dentry->d_fsdata);
|
||||||
|
+ path_put(&path);
|
||||||
|
+ }
|
||||||
|
+ ssi->userns = get_user_ns(dentry->d_sb->s_user_ns);
|
||||||
|
+ sb->s_fs_info = ssi;
|
||||||
|
+ sb->s_magic = SHIFTFS_MAGIC;
|
||||||
|
+ sb->s_op = &shiftfs_super_ops;
|
||||||
|
+ sb->s_xattr = shiftfs_xattr_handlers;
|
||||||
|
+ sb->s_d_op = &shiftfs_dentry_ops;
|
||||||
|
+ sb->s_root = d_make_root(shiftfs_new_inode(sb, S_IFDIR, dentry));
|
||||||
|
+ sb->s_root->d_fsdata = dentry;
|
||||||
|
+
|
||||||
|
+ return 0;
|
||||||
|
+
|
||||||
|
+ out_put:
|
||||||
|
+ path_put(&path);
|
||||||
|
+ out:
|
||||||
|
+ kfree(name);
|
||||||
|
+ kfree(ssi);
|
||||||
|
+ return err;
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct dentry *shiftfs_mount(struct file_system_type *fs_type,
|
||||||
|
+ int flags, const char *dev_name, void *data)
|
||||||
|
+{
|
||||||
|
+ struct shiftfs_data d = { data, dev_name };
|
||||||
|
+
|
||||||
|
+ return mount_nodev(fs_type, flags, &d, shiftfs_fill_super);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static struct file_system_type shiftfs_type = {
|
||||||
|
+ .owner = THIS_MODULE,
|
||||||
|
+ .name = "shiftfs",
|
||||||
|
+ .mount = shiftfs_mount,
|
||||||
|
+ .kill_sb = kill_anon_super,
|
||||||
|
+ .fs_flags = FS_USERNS_MOUNT,
|
||||||
|
+};
|
||||||
|
+
|
||||||
|
+static int __init shiftfs_init(void)
|
||||||
|
+{
|
||||||
|
+ return register_filesystem(&shiftfs_type);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+static void __exit shiftfs_exit(void)
|
||||||
|
+{
|
||||||
|
+ unregister_filesystem(&shiftfs_type);
|
||||||
|
+}
|
||||||
|
+
|
||||||
|
+MODULE_ALIAS_FS("shiftfs");
|
||||||
|
+MODULE_AUTHOR("James Bottomley");
|
||||||
|
+MODULE_DESCRIPTION("uid/gid shifting bind filesystem");
|
||||||
|
+MODULE_LICENSE("GPL v2");
|
||||||
|
+module_init(shiftfs_init)
|
||||||
|
+module_exit(shiftfs_exit)
|
||||||
|
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
|
||||||
|
index e230af2e6855..a2fdb01a1a4e 100644
|
||||||
|
--- a/include/uapi/linux/magic.h
|
||||||
|
+++ b/include/uapi/linux/magic.h
|
||||||
|
@@ -85,4 +85,6 @@
|
||||||
|
#define BALLOON_KVM_MAGIC 0x13661366
|
||||||
|
#define ZSMALLOC_MAGIC 0x58295829
|
||||||
|
|
||||||
|
+#define SHIFTFS_MAGIC 0x6a656a62
|
||||||
|
+
|
||||||
|
#endif /* __LINUX_MAGIC_H__ */
|
||||||
|
--
|
||||||
|
2.11.0
|
||||||
|
|
@ -0,0 +1,44 @@
|
|||||||
|
From b0eb5c2b15df95ddec67436766f613aa7dd031be Mon Sep 17 00:00:00 2001
|
||||||
|
From: Tycho Andersen <tycho@docker.com>
|
||||||
|
Date: Fri, 14 Apr 2017 15:37:31 -0600
|
||||||
|
Subject: [PATCH 2/2] shiftfs: update to compile with a528d35e8bfcc
|
||||||
|
|
||||||
|
Signed-off-by: Tycho Andersen <tycho@docker.com>
|
||||||
|
---
|
||||||
|
fs/shiftfs.c | 12 +++++++-----
|
||||||
|
1 file changed, 7 insertions(+), 5 deletions(-)
|
||||||
|
|
||||||
|
diff --git a/fs/shiftfs.c b/fs/shiftfs.c
|
||||||
|
index ea8ac57b3ce1..fbe336ca0aa1 100644
|
||||||
|
--- a/fs/shiftfs.c
|
||||||
|
+++ b/fs/shiftfs.c
|
||||||
|
@@ -545,19 +545,21 @@ static int shiftfs_setattr(struct dentry *dentry, struct iattr *attr)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
-static int shiftfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||||
|
- struct kstat *stat)
|
||||||
|
+static int shiftfs_getattr(const struct path *path, struct kstat *stat,
|
||||||
|
+ u32 request_mask, unsigned int flags)
|
||||||
|
{
|
||||||
|
- struct inode *inode = dentry->d_inode;
|
||||||
|
+ struct inode *inode = d_inode(path->dentry);
|
||||||
|
struct dentry *real = inode->i_private;
|
||||||
|
struct inode *reali = real->d_inode;
|
||||||
|
const struct inode_operations *iop = reali->i_op;
|
||||||
|
+ struct path realpath;
|
||||||
|
int err = 0;
|
||||||
|
|
||||||
|
- mnt = dentry->d_sb->s_fs_info;
|
||||||
|
+ realpath.mnt = path->dentry->d_sb->s_fs_info;
|
||||||
|
+ realpath.dentry = real;
|
||||||
|
|
||||||
|
if (iop->getattr)
|
||||||
|
- err = iop->getattr(mnt, real, stat);
|
||||||
|
+ err = iop->getattr(&realpath, stat, request_mask, flags);
|
||||||
|
else
|
||||||
|
generic_fillattr(reali, stat);
|
||||||
|
|
||||||
|
--
|
||||||
|
2.11.0
|
||||||
|
|
55
projects/shiftfs/shiftfs.yml
Normal file
55
projects/shiftfs/shiftfs.yml
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
kernel:
|
||||||
|
image: "linuxkitprojects/kernel-shiftfs:4.11.4-881a041fc14bd95814cf140b5e98d97dd65160b5"
|
||||||
|
cmdline: "console=ttyS0 console=tty0 page_poison=1"
|
||||||
|
init:
|
||||||
|
- linuxkit/init:2599bcd5013ce5962aa155ee8929c26160de13bd
|
||||||
|
- linuxkit/runc:3a4e6cbf15470f62501b019b55e1caac5ee7689f
|
||||||
|
- linuxkit/containerd:b50181bc6e0084e5fcd6b6ad3cf433c4f66cae5a
|
||||||
|
- linuxkit/ca-certificates:75cf419fb58770884c3464eb687ec8dfc704169d
|
||||||
|
onboot:
|
||||||
|
- name: sysctl
|
||||||
|
image: "linuxkit/sysctl:3aa6bc663c2849ef239be7d941d3eaf3e6fcc018"
|
||||||
|
- name: binfmt
|
||||||
|
image: "linuxkit/binfmt:8ac5535f57f0c6f5fe88317b9d22a7677093c765"
|
||||||
|
- name: dhcpcd
|
||||||
|
image: "linuxkit/dhcpcd:7d2b8aaaf20c24ad7d11a5ea2ea5b4a80dc966f1"
|
||||||
|
command: ["/sbin/dhcpcd", "--nobackground", "-f", "/dhcpcd.conf", "-1"]
|
||||||
|
services:
|
||||||
|
- name: getty
|
||||||
|
image: "linuxkit/getty:ef9d667af71089326419fb08e9cc9d567cf15748"
|
||||||
|
env:
|
||||||
|
- INSECURE=true
|
||||||
|
- name: rngd
|
||||||
|
image: "linuxkit/rngd:1fa4de44c961bb5075647181891a3e7e7ba51c31"
|
||||||
|
- name: nginx
|
||||||
|
image: "nginx:alpine"
|
||||||
|
capabilities:
|
||||||
|
- CAP_NET_BIND_SERVICE
|
||||||
|
- CAP_CHOWN
|
||||||
|
- CAP_SETUID
|
||||||
|
- CAP_SETGID
|
||||||
|
- CAP_DAC_OVERRIDE
|
||||||
|
files:
|
||||||
|
- path: etc/containerd/config.toml
|
||||||
|
contents: |
|
||||||
|
state = "/run/containerd"
|
||||||
|
root = "/var/lib/containerd"
|
||||||
|
snapshotter = "overlay"
|
||||||
|
subreaper = false
|
||||||
|
|
||||||
|
[grpc]
|
||||||
|
address = "/run/containerd/containerd.sock"
|
||||||
|
uid = 0
|
||||||
|
gid = 0
|
||||||
|
|
||||||
|
[debug]
|
||||||
|
address = "/run/containerd/debug.sock"
|
||||||
|
level = "info"
|
||||||
|
|
||||||
|
[metrics]
|
||||||
|
address = ":13337"
|
||||||
|
trust:
|
||||||
|
org:
|
||||||
|
- linuxkit
|
||||||
|
image:
|
||||||
|
- nginx:alpine
|
Loading…
Reference in New Issue
Block a user