Backport transparent binfmt_misc to 4.4

As we released this in the beta channel, and it is a nice feature that our users love,
backporting this to 4.4 so we don't have to revert it or conditionally behave differently.

This is upstream Linux commits
- 9a08c352d05305ca7651540c3b107da1e4e1f40b fs: add filp_clone_open API
- 948b701a607f123df92ed29084413e5dd8cda2ed binfmt_misc: add persistent opened binary handler for containers

Signed-off-by: Justin Cormack <justin.cormack@docker.com>
This commit is contained in:
Justin Cormack 2017-01-11 14:28:42 +00:00
parent 26e24a760a
commit 3bc7060843
2 changed files with 196 additions and 0 deletions

View File

@ -0,0 +1,64 @@
From 9a08c352d05305ca7651540c3b107da1e4e1f40b Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Wed, 17 Feb 2016 16:49:38 -0800
Subject: [PATCH] fs: add filp_clone_open API
I need an API that allows me to obtain a clone of the current file
pointer to pass in to an exec handler. I've labelled this as an
internal API because I can't see how it would be useful outside of the
fs subsystem. The use case will be a persistent binfmt_misc handler.
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Acked-by: Jan Kara <jack@suse.cz>
---
fs/internal.h | 1 +
fs/open.c | 20 ++++++++++++++++++++
2 files changed, 21 insertions(+)
diff --git a/fs/internal.h b/fs/internal.h
index b71deee..c8ca0c9 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -108,6 +108,7 @@ extern long do_handle_open(int mountdirfd,
struct file_handle __user *ufh, int open_flag);
extern int open_check_o_direct(struct file *f);
extern int vfs_open(const struct path *, struct file *, const struct cred *);
+extern struct file *filp_clone_open(struct file *);
/*
* inode.c
diff --git a/fs/open.c b/fs/open.c
index 17cb6b1..bfe6f2b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1002,6 +1002,26 @@ struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
}
EXPORT_SYMBOL(file_open_root);
+struct file *filp_clone_open(struct file *oldfile)
+{
+ struct file *file;
+ int retval;
+
+ file = get_empty_filp();
+ if (IS_ERR(file))
+ return file;
+
+ file->f_flags = oldfile->f_flags;
+ retval = vfs_open(&oldfile->f_path, file, oldfile->f_cred);
+ if (retval) {
+ put_filp(file);
+ return ERR_PTR(retval);
+ }
+
+ return file;
+}
+EXPORT_SYMBOL(filp_clone_open);
+
long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
{
struct open_flags op;
--
1.9.1

View File

@ -0,0 +1,132 @@
From 948b701a607f123df92ed29084413e5dd8cda2ed Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@HansenPartnership.com>
Date: Wed, 17 Feb 2016 16:51:16 -0800
Subject: [PATCH] binfmt_misc: add persistent opened binary handler for
containers
This patch adds a new flag 'F' to the binfmt handlers. If you pass in
'F' the binary that runs the emulation will be opened immediately and
in future, will be cloned from the open file.
The net effect is that the handler survives both changeroots and mount
namespace changes, making it easy to work with foreign architecture
containers without contaminating the container image with the
emulator.
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
---
fs/binfmt_misc.c | 41 +++++++++++++++++++++++++++++++++++++++--
1 file changed, 39 insertions(+), 2 deletions(-)
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 3a3ced7..8a108c4 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -26,6 +26,8 @@
#include <linux/fs.h>
#include <linux/uaccess.h>
+#include "internal.h"
+
#ifdef DEBUG
# define USE_DEBUG 1
#else
@@ -43,6 +45,7 @@ enum {
#define MISC_FMT_PRESERVE_ARGV0 (1 << 31)
#define MISC_FMT_OPEN_BINARY (1 << 30)
#define MISC_FMT_CREDENTIALS (1 << 29)
+#define MISC_FMT_OPEN_FILE (1 << 28)
typedef struct {
struct list_head list;
@@ -54,6 +57,7 @@ enum {
char *interpreter; /* filename of interpreter */
char *name;
struct dentry *dentry;
+ struct file *interp_file;
} Node;
static DEFINE_RWLOCK(entries_lock);
@@ -201,7 +205,13 @@ static int load_misc_binary(struct linux_binprm *bprm)
if (retval < 0)
goto error;
- interp_file = open_exec(iname);
+ if (fmt->flags & MISC_FMT_OPEN_FILE && fmt->interp_file) {
+ interp_file = filp_clone_open(fmt->interp_file);
+ if (!IS_ERR(interp_file))
+ deny_write_access(interp_file);
+ } else {
+ interp_file = open_exec(iname);
+ }
retval = PTR_ERR(interp_file);
if (IS_ERR(interp_file))
goto error;
@@ -285,6 +295,11 @@ static char *check_special_flags(char *sfs, Node *e)
e->flags |= (MISC_FMT_CREDENTIALS |
MISC_FMT_OPEN_BINARY);
break;
+ case 'F':
+ pr_debug("register: flag: F: open interpreter file now\n");
+ p++;
+ e->flags |= MISC_FMT_OPEN_FILE;
+ break;
default:
cont = 0;
}
@@ -543,6 +558,8 @@ static void entry_status(Node *e, char *page)
*dp++ = 'O';
if (e->flags & MISC_FMT_CREDENTIALS)
*dp++ = 'C';
+ if (e->flags & MISC_FMT_OPEN_FILE)
+ *dp++ = 'F';
*dp++ = '\n';
if (!test_bit(Magic, &e->flags)) {
@@ -590,6 +607,11 @@ static void kill_node(Node *e)
}
write_unlock(&entries_lock);
+ if ((e->flags & MISC_FMT_OPEN_FILE) && e->interp_file) {
+ filp_close(e->interp_file, NULL);
+ e->interp_file = NULL;
+ }
+
if (dentry) {
drop_nlink(d_inode(dentry));
d_drop(dentry);
@@ -698,6 +720,21 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
goto out2;
}
+ if (e->flags & MISC_FMT_OPEN_FILE) {
+ struct file *f;
+
+ f = open_exec(e->interpreter);
+ if (IS_ERR(f)) {
+ err = PTR_ERR(f);
+ pr_notice("register: failed to install interpreter file %s\n", e->interpreter);
+ simple_release_fs(&bm_mnt, &entry_count);
+ iput(inode);
+ inode = NULL;
+ goto out2;
+ }
+ e->interp_file = f;
+ }
+
e->dentry = dget(dentry);
inode->i_private = e;
inode->i_fop = &bm_entry_operations;
@@ -716,7 +753,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
if (err) {
kfree(e);
- return -EINVAL;
+ return err;
}
return count;
}
--
1.9.1