#include <fcntl.h>
#include <linux/limits.h>
#include <stdlib.h>
+#include <stdio.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/mman.h>
unsigned long mountflags;
const char *optstr;
int error;
+ bool inner;
};
struct avl_tree mounts;
return ret;
}
-static int do_mount(const char *root, const char *source, const char *target, const char *filesystemtype,
- unsigned long orig_mountflags, const char *optstr, int error)
+static int do_mount(const char *root, const char *orig_source, const char *target, const char *filesystemtype,
+ unsigned long orig_mountflags, const char *optstr, int error, bool inner)
{
struct stat s;
char new[PATH_MAX];
+ char *source = (char *)orig_source;
int fd;
bool is_bind = (orig_mountflags & MS_BIND);
bool is_mask = (source == (void *)(-1));
return error;
}
+ if (!is_mask && orig_source && inner) {
+ if (asprintf(&source, "%s%s", root, orig_source) < 0)
+ return ENOMEM;
+ }
+
snprintf(new, sizeof(new), "%s%s", root, target?target:source);
if (is_mask) {
if (error)
ERROR("failed to mount -B %s %s: %m\n", source, new);
+ if (inner)
+ free(source);
+
return error;
}
mountflags |= MS_REMOUNT;
if (error)
ERROR("failed to mount %s %s: %m\n", source, new);
+ if (inner)
+ free(source);
+
return error;
}
DEBUG("mount %s%s %s (%s)\n", (mountflags & MS_BIND)?"-B ":"", source, new,
(mountflags & MS_RDONLY)?"ro":"rw");
+ if (inner)
+ free(source);
+
return 0;
}
-int add_mount(const char *source, const char *target, const char *filesystemtype,
- unsigned long mountflags, const char *optstr, int error)
+static int _add_mount(const char *source, const char *target, const char *filesystemtype,
+ unsigned long mountflags, const char *optstr, int error, bool inner)
{
assert(target != NULL);
m->mountflags = mountflags;
m->error = error;
+ m->inner = inner;
avl_insert(&mounts, &m->avl);
DEBUG("adding mount %s %s bind(%d) ro(%d) err(%d)\n", (m->source == (void*)(-1))?"mask":m->source, m->target,
return 0;
}
+int add_mount(const char *source, const char *target, const char *filesystemtype,
+ unsigned long mountflags, const char *optstr, int error)
+{
+ return _add_mount(source, target, filesystemtype, mountflags, optstr, error, false);
+}
+
+int add_mount_inner(const char *source, const char *target, const char *filesystemtype,
+ unsigned long mountflags, const char *optstr, int error)
+{
+ return _add_mount(source, target, filesystemtype, mountflags, optstr, error, true);
+}
+
int add_mount_bind(const char *path, int readonly, int error)
{
unsigned long mountflags = MS_BIND;
add_mount_bind(l->path, 1, -1);
avl_for_each_element(&mounts, m, avl)
- if (do_mount(jailroot, m->source, m->target, m->filesystemtype, m->mountflags, m->optstr, m->error))
+ if (do_mount(jailroot, m->source, m->target, m->filesystemtype, m->mountflags, m->optstr, m->error, m->inner))
return -1;
return 0;
int mkdir_p(char *dir, mode_t mask);
int add_mount(const char *source, const char *target, const char *filesystemtype,
unsigned long mountflags, const char *optstr, int error);
+int add_mount_inner(const char *source, const char *target, const char *filesystemtype,
+ unsigned long mountflags, const char *optstr, int error);
int add_mount_bind(const char *path, int readonly, int error);
int parseOCImount(struct blob_attr *msg);
int add_path_and_deps(const char *path, int readonly, int error, int lib);
add_mount(NULL, "/dev", "tmpfs", MS_NOATIME | MS_NOEXEC | MS_NOSUID, "size=1M", -1);
add_mount(NULL, "/dev/pts", "devpts", MS_NOATIME | MS_NOEXEC | MS_NOSUID, "newinstance,ptmxmode=0666,mode=0620,gid=5", 0);
- if (opts.procfs || jsonfile)
- add_mount("proc", "/proc", "proc", MS_RDONLY | MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID, NULL, -1);
+ if (opts.procfs || jsonfile) {
+ add_mount("proc", "/proc", "proc", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID, NULL, -1);
+
+ /*
+ * hack to make /proc/sys/net read-write while the rest of /proc/sys is read-only
+ * which cannot be expressed with OCI spec, but happends to be very useful.
+ * Only apply it if '/proc/sys' is not already listed as mount, maskedPath or
+ * readonlyPath.
+ * If not running in a new network namespace, only make /proc/sys read-only.
+ * If running in a new network namespace, temporarily stash (ie. mount-bind)
+ * /proc/sys/net into (totally unrelated, but surely existing) /proc/self/net.
+ * Then we mount-bind /proc/sys read-only and then mount-move /proc/self/net into
+ * /proc/sys/net.
+ * This works because mounts are executed in incrementing strcmp() order and
+ * /proc/self/net appears there before /proc/sys/net and hence the operation
+ * succeeds as the bind-mount of /proc/self/net is performed first and then
+ * move-mount of /proc/sys/net follows because 'e' preceeds 'y' in the ASCII
+ * table (and in the alphabet).
+ */
+ if (!add_mount(NULL, "/proc/sys", NULL, MS_BIND | MS_RDONLY, NULL, -1))
+ if (opts.namespace & CLONE_NEWNET)
+ if (!add_mount_inner("/proc/self/net", "/proc/sys/net", NULL, MS_MOVE, NULL, -1))
+ add_mount_inner("/proc/sys/net", "/proc/self/net", NULL, MS_BIND, NULL, -1);
+ }
if (opts.sysfs || jsonfile)
add_mount("sysfs", "/sys", "sysfs", MS_NOATIME | MS_NODEV | MS_NOEXEC | MS_NOSUID | MS_RDONLY, NULL, -1);
if (jsonfile)
add_mount("shm", "/dev/shm", "tmpfs", MS_NOSUID | MS_NOEXEC | MS_NODEV, "mode=1777", -1);
+
}
if (pipe(&pipes[0]) < 0 || pipe(&pipes[2]) < 0)