From 71e75f401133510b91555ae7a457191a972b48ba Mon Sep 17 00:00:00 2001 From: Daniel Golle Date: Sun, 12 Jul 2020 23:47:52 +0100 Subject: [PATCH] jail: refactor mount support to cover OCI spec Extend existing support for bind-mounts to allow arbitrary mounts defined in OCI spec. Signed-off-by: Daniel Golle --- jail/fs.c | 284 +++++++++++++++++++++++++++++++++++++++++++++++++--- jail/fs.h | 9 +- jail/jail.c | 187 ++++++---------------------------- 3 files changed, 310 insertions(+), 170 deletions(-) diff --git a/jail/fs.c b/jail/fs.c index 8cc47d3..3f090dd 100644 --- a/jail/fs.c +++ b/jail/fs.c @@ -1,6 +1,7 @@ /* * Copyright (C) 2015 John Crispin * Copyright (C) 2015 Etienne Champetier + * Copyright (C) 2020 Daniel Golle * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License version 2.1 @@ -24,9 +25,12 @@ #include #include #include +#include #include #include +#include +#include #include "elf.h" #include "fs.h" @@ -34,42 +38,296 @@ #include "log.h" struct mount { - struct avl_node avl; - const char *path; - int readonly; - int error; + struct avl_node avl; + const char *source; + const char *target; + const char *filesystemtype; + unsigned long mountflags; + const char *optstr; + int error; }; struct avl_tree mounts; -int add_mount(const char *path, int readonly, int error) +int mkdir_p(char *dir, mode_t mask) { - assert(path != NULL); + char *l = strrchr(dir, '/'); + int ret; + + if (!l) + return 0; + + *l = '\0'; + + if (mkdir_p(dir, mask)) + return -1; + + *l = '/'; + + ret = mkdir(dir, mask); + if (ret && errno == EEXIST) + return 0; + + if (ret) + ERROR("mkdir(%s, %d) failed: %m\n", dir, mask); + + return ret; +} + +static int do_mount(const char *root, const char *source, const char *target, const char *filesystemtype, + unsigned long orig_mountflags, const char *optstr, int error) +{ + struct stat s; + char new[PATH_MAX]; + int fd; + bool is_bind = (orig_mountflags & MS_BIND); + unsigned long mountflags = orig_mountflags; + + if (is_bind && stat(source, &s)) { + ERROR("stat(%s) failed: %m\n", source); + return error; + } + + snprintf(new, sizeof(new), "%s%s", root, target?target:source); + + if (!is_bind || S_ISDIR(s.st_mode)) { + mkdir_p(new, 0755); + } else { + mkdir_p(dirname(new), 0755); + snprintf(new, sizeof(new), "%s%s", root, target?target:source); + fd = creat(new, 0644); + if (fd == -1) { + ERROR("creat(%s) failed: %m\n", new); + return -1; + } + close(fd); + } - if (avl_find(&mounts, path)) + if (mountflags & MS_BIND) { + if (mount(source, new, filesystemtype, MS_BIND, optstr)) { + ERROR("failed to mount -B %s %s: %m\n", source, new); + } + mountflags |= MS_REMOUNT; + } + + if (mount(source, new, filesystemtype, mountflags, optstr)) { + ERROR("failed to mount %s %s: %m\n", source, new); + return -1; + } + + DEBUG("mount %s%s %s (%s)\n", (mountflags & MS_BIND)?"-B ":"", source, new, + (mountflags & MS_RDONLY)?"ro":"rw"); + + return 0; +} + +int add_mount(const char *source, const char *target, const char *filesystemtype, + unsigned long mountflags, const char *optstr, int error) +{ + assert(target != NULL); + + if (avl_find(&mounts, target)) return 1; struct mount *m; m = calloc(1, sizeof(struct mount)); assert(m != NULL); - m->avl.key = m->path = strdup(path); - m->readonly = readonly; + m->avl.key = m->target = strdup(target); + if (source) + m->source = strdup(source); + if (filesystemtype) + m->filesystemtype = strdup(filesystemtype); + m->mountflags = mountflags; m->error = error; avl_insert(&mounts, &m->avl); - DEBUG("adding mount %s ro(%d) err(%d)\n", m->path, m->readonly, m->error != 0); + DEBUG("adding mount %s %s bind(%d) ro(%d) err(%d)\n", m->source, m->target, + !!(m->mountflags & MS_BIND), !!(m->mountflags & MS_RDONLY), m->error != 0); + return 0; } +int add_mount_bind(const char *path, int readonly, int error) +{ + unsigned long mountflags = MS_BIND; + + if (readonly) + mountflags |= MS_RDONLY; + + return add_mount(path, path, NULL, mountflags, NULL, error); +} + + +enum { + OCI_MOUNT_SOURCE, + OCI_MOUNT_DESTINATION, + OCI_MOUNT_TYPE, + OCI_MOUNT_OPTIONS, + __OCI_MOUNT_MAX, +}; + +static const struct blobmsg_policy oci_mount_policy[] = { + [OCI_MOUNT_SOURCE] = { "source", BLOBMSG_TYPE_STRING }, + [OCI_MOUNT_DESTINATION] = { "destination", BLOBMSG_TYPE_STRING }, + [OCI_MOUNT_TYPE] = { "type", BLOBMSG_TYPE_STRING }, + [OCI_MOUNT_OPTIONS] = { "options", BLOBMSG_TYPE_ARRAY }, +}; + +struct mount_opt { + struct list_head list; + char *optstr; +}; + +static int parseOCImountopts(struct blob_attr *msg, unsigned long *mount_flags, char **mount_data, int *error) +{ + struct blob_attr *cur; + int rem; + unsigned long mf = 0; + char *tmp; + struct list_head fsopts = LIST_HEAD_INIT(fsopts); + size_t len = 0; + struct mount_opt *opt; + + blobmsg_for_each_attr(cur, msg, rem) { + tmp = blobmsg_get_string(cur); + if (!strcmp("ro", tmp)) + mf |= MS_RDONLY; + else if (!strcmp("rw", tmp)) + mf &= ~MS_RDONLY; + else if (!strcmp("bind", tmp)) + mf = MS_BIND; + else if (!strcmp("rbind", tmp)) + mf |= MS_BIND | MS_REC; + else if (!strcmp("sync", tmp)) + mf |= MS_SYNCHRONOUS; + else if (!strcmp("async", tmp)) + mf &= ~MS_SYNCHRONOUS; + else if (!strcmp("atime", tmp)) + mf &= ~MS_NOATIME; + else if (!strcmp("noatime", tmp)) + mf |= MS_NOATIME; + else if (!strcmp("defaults", tmp)) + mf = 0; /* rw, suid, dev, exec, auto, nouser, and async */ + else if (!strcmp("dev", tmp)) + mf &= ~MS_NODEV; + else if (!strcmp("nodev", tmp)) + mf |= MS_NODEV; + else if (!strcmp("diratime", tmp)) + mf &= ~MS_NODIRATIME; + else if (!strcmp("nodiratime", tmp)) + mf |= MS_NODIRATIME; + else if (!strcmp("dirsync", tmp)) + mf |= MS_DIRSYNC; + else if (!strcmp("exec", tmp)) + mf &= ~MS_NOEXEC; + else if (!strcmp("noexec", tmp)) + mf |= MS_NOEXEC; + else if (!strcmp("mand", tmp)) + mf |= MS_MANDLOCK; + else if (!strcmp("nomand", tmp)) + mf &= ~MS_MANDLOCK; + else if (!strcmp("relatime", tmp)) + mf |= MS_RELATIME; + else if (!strcmp("norelatime", tmp)) + mf &= ~MS_RELATIME; + else if (!strcmp("strictatime", tmp)) + mf |= MS_STRICTATIME; + else if (!strcmp("nostrictatime", tmp)) + mf &= ~MS_STRICTATIME; + else if (!strcmp("lazytime", tmp)) + mf |= MS_LAZYTIME; + else if (!strcmp("nostrictatime", tmp)) + mf &= ~MS_LAZYTIME; + else if (!strcmp("suid", tmp)) + mf &= ~MS_NOSUID; + else if (!strcmp("nosuid", tmp)) + mf |= MS_NOSUID; + else if (!strcmp("remount", tmp)) + mf |= MS_REMOUNT; + else if(!strcmp("nofail", tmp)) + *error = 0; + else if (!strcmp("auto", tmp) || + !strcmp("noauto", tmp) || + !strcmp("user", tmp) || + !strcmp("group", tmp) || + !strcmp("_netdev", tmp)) + DEBUG("ignoring built-in mount option %s\n", tmp); + else { + /* filesystem-specific free-form option */ + opt = calloc(1, sizeof(*opt)); + opt->optstr = tmp; + list_add_tail(&opt->list, &fsopts); + } + }; + + *mount_flags = mf; + + list_for_each_entry(opt, &fsopts, list) { + if (len) + ++len; + + len += strlen(opt->optstr); + }; + + if (!len) + return 0; + + *mount_data = calloc(len + 1, sizeof(char)); + if (!mount_data) + return ENOMEM; + + len = 0; + list_for_each_entry(opt, &fsopts, list) { + if (len) + strcat(*mount_data, ","); + + strcat(*mount_data, opt->optstr); + ++len; + }; + + list_del(&fsopts); + + DEBUG("mount flags(%08lx) fsopts(\"%s\")\n", mf, *mount_data?:""); + + return 0; +}; + +int parseOCImount(struct blob_attr *msg) +{ + struct blob_attr *tb[__OCI_MOUNT_MAX]; + unsigned long mount_flags = 0; + char *mount_data = NULL; + int ret, err = -1; + + blobmsg_parse(oci_mount_policy, __OCI_MOUNT_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); + + if (!tb[OCI_MOUNT_DESTINATION]) + return EINVAL; + + if (tb[OCI_MOUNT_OPTIONS]) { + ret = parseOCImountopts(tb[OCI_MOUNT_OPTIONS], &mount_flags, &mount_data, &err); + if (ret) + return ret; + } + + add_mount(tb[OCI_MOUNT_SOURCE] ? blobmsg_get_string(tb[OCI_MOUNT_SOURCE]) : NULL, + blobmsg_get_string(tb[OCI_MOUNT_DESTINATION]), + tb[OCI_MOUNT_TYPE] ? blobmsg_get_string(tb[OCI_MOUNT_TYPE]) : NULL, + mount_flags, mount_data, err); + + return 0; +}; + + int mount_all(const char *jailroot) { struct library *l; struct mount *m; avl_for_each_element(&libraries, l, avl) - add_mount(l->path, 1, -1); + add_mount_bind(l->path, 1, -1); avl_for_each_element(&mounts, m, avl) - if (mount_bind(jailroot, m->path, m->readonly, m->error)) + if (do_mount(jailroot, m->source, m->target, m->filesystemtype, m->mountflags, m->optstr, m->error)) return -1; return 0; @@ -119,7 +377,7 @@ int add_path_and_deps(const char *path, int readonly, int error, int lib) fd = open(path, O_RDONLY|O_CLOEXEC); if (fd == -1) return error; - add_mount(path, readonly, error); + add_mount_bind(path, readonly, error); } else { if (avl_find(&libraries, path)) return 0; diff --git a/jail/fs.h b/jail/fs.h index 343335f..e7283a3 100644 --- a/jail/fs.h +++ b/jail/fs.h @@ -13,7 +13,14 @@ #ifndef _JAIL_FS_H_ #define _JAIL_FS_H_ -int add_mount(const char *path, int readonly, int error); +#include +#include + +int mkdir_p(char *dir, mode_t mask); +int add_mount(const char *source, const char *target, const char *filesystemtype, + unsigned long mountflags, const char *optstr, int error); +int add_mount_bind(const char *path, int readonly, int error); +int parseOCImount(struct blob_attr *msg); int add_path_and_deps(const char *path, int readonly, int error, int lib); int mount_all(const char *jailroot); void mount_list_init(void); diff --git a/jail/jail.c b/jail/jail.c index d63c467..e8abd8a 100644 --- a/jail/jail.c +++ b/jail/jail.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include @@ -119,15 +118,11 @@ static void free_hooklist(struct hook_execvpe *hooklist) } } -static void free_opts(bool all) { +static void free_opts(bool child) { char **tmp; - free(opts.hostname); - free(opts.cwd); - free(opts.extroot); - free(opts.uidmap); - free(opts.gidmap); - if (all) { + /* we need to keep argv, envp and seccomp filter in child */ + if (child) { if (opts.ociseccomp) { free(opts.ociseccomp->filter); free(opts.ociseccomp); @@ -146,6 +141,11 @@ static void free_opts(bool all) { free(opts.envp); }; + free(opts.hostname); + free(opts.cwd); + free(opts.extroot); + free(opts.uidmap); + free(opts.gidmap); free_hooklist(opts.hooks.createRuntime); free_hooklist(opts.hooks.createContainer); free_hooklist(opts.hooks.startContainer); @@ -163,85 +163,6 @@ static char child_stack[STACK_SIZE]; int console_fd; -static int mkdir_p(char *dir, mode_t mask) -{ - char *l = strrchr(dir, '/'); - int ret; - - if (!l) - return 0; - - *l = '\0'; - - if (mkdir_p(dir, mask)) - return -1; - - *l = '/'; - - ret = mkdir(dir, mask); - if (ret && errno == EEXIST) - return 0; - - if (ret) - ERROR("mkdir(%s, %d) failed: %m\n", dir, mask); - - return ret; -} - -static int _mount_bind(const char *root, const char *path, const char *target, int readonly, int strict, int error) -{ - struct stat s; - char new[PATH_MAX]; - int fd; - int remount_flags = MS_BIND | MS_REMOUNT; - - if (stat(path, &s)) { - ERROR("stat(%s) failed: %m\n", path); - return error; - } - - snprintf(new, sizeof(new), "%s%s", root, target?target:path); - - if (S_ISDIR(s.st_mode)) { - mkdir_p(new, 0755); - } else { - mkdir_p(dirname(new), 0755); - snprintf(new, sizeof(new), "%s%s", root, target?target:path); - fd = creat(new, 0644); - if (fd == -1) { - ERROR("creat(%s) failed: %m\n", new); - return -1; - } - close(fd); - } - - if (mount(path, new, NULL, MS_BIND, NULL)) { - ERROR("failed to mount -B %s %s: %m\n", path, new); - return -1; - } - - if (readonly) - remount_flags |= MS_RDONLY; - - if (strict) - remount_flags |= MS_NOEXEC | MS_NOSUID | MS_NODEV; - - if ((strict || readonly) && mount(NULL, new, NULL, remount_flags, NULL)) { - ERROR("failed to remount (%s%s%s) %s: %m\n", readonly?"ro":"rw", - (readonly && strict)?", ":"", strict?"strict":"", new); - return -1; - } - - DEBUG("mount -B %s %s (%s%s%s)\n", path, new, - readonly?"ro":"rw", (readonly && strict)?", ":"", strict?"strict":""); - - return 0; -} - -int mount_bind(const char *root, const char *path, int readonly, int error) { - return _mount_bind(root, path, NULL, readonly, 0, error); -} - static int mount_overlay(char *jail_root, char *overlaydir) { char *upperdir, *workdir, *optsstr, *upperetc, *upperresolvconf; const char mountoptsformat[] = "lowerdir=%s,upperdir=%s,workdir=%s"; @@ -549,17 +470,16 @@ static int build_jail_fs(void) return -1; } + /* make sure /etc/resolv.conf exists if in new network namespace */ if (opts.namespace & CLONE_NEWNET) { - char hostdir[PATH_MAX], jailetc[PATH_MAX], jaillink[PATH_MAX]; + char jailetc[PATH_MAX], jaillink[PATH_MAX]; - snprintf(hostdir, PATH_MAX, "/tmp/resolv.conf-%s.d", opts.name); - mkdir_p(hostdir, 0755); - _mount_bind(jail_root, hostdir, "/tmp/resolv.conf.d", 1, 1, -1); snprintf(jailetc, PATH_MAX, "%s/etc", jail_root); mkdir_p(jailetc, 0755); snprintf(jaillink, PATH_MAX, "%s/etc/resolv.conf", jail_root); if (overlaydir) unlink(jaillink); + symlink("../tmp/resolv.conf.d/resolv.conf.auto", jaillink); } @@ -1183,57 +1103,6 @@ out_createruntime: return ret; }; -enum { - OCI_MOUNT_SOURCE, - OCI_MOUNT_DESTINATION, - OCI_MOUNT_TYPE, - OCI_MOUNT_OPTIONS, - __OCI_MOUNT_MAX, -}; - -static const struct blobmsg_policy oci_mount_policy[] = { - [OCI_MOUNT_SOURCE] = { "source", BLOBMSG_TYPE_STRING }, - [OCI_MOUNT_DESTINATION] = { "destination", BLOBMSG_TYPE_STRING }, - [OCI_MOUNT_TYPE] = { "type", BLOBMSG_TYPE_STRING }, - [OCI_MOUNT_OPTIONS] = { "options", BLOBMSG_TYPE_ARRAY }, -}; - -static int parseOCImount(struct blob_attr *msg) -{ - struct blob_attr *tb[__OCI_MOUNT_MAX]; - - blobmsg_parse(oci_mount_policy, __OCI_MOUNT_MAX, tb, blobmsg_data(msg), blobmsg_len(msg)); - - if (!tb[OCI_MOUNT_DESTINATION]) - return EINVAL; - - if (!strcmp("proc", blobmsg_get_string(tb[OCI_MOUNT_TYPE])) && - !strcmp("/proc", blobmsg_get_string(tb[OCI_MOUNT_DESTINATION]))) { - opts.procfs = true; - return 0; - } - - if (!strcmp("sysfs", blobmsg_get_string(tb[OCI_MOUNT_TYPE])) && - !strcmp("/sys", blobmsg_get_string(tb[OCI_MOUNT_DESTINATION]))) { - opts.sysfs = true; - return 0; - } - - if (!strcmp("tmpfs", blobmsg_get_string(tb[OCI_MOUNT_TYPE])) && - !strcmp("/dev", blobmsg_get_string(tb[OCI_MOUNT_DESTINATION]))) { - /* we always mount a small tmpfs on /dev */ - return 0; - } - - INFO("ignoring unsupported mount %s %s -t %s -o %s\n", - blobmsg_get_string(tb[OCI_MOUNT_SOURCE]), - blobmsg_get_string(tb[OCI_MOUNT_DESTINATION]), - blobmsg_get_string(tb[OCI_MOUNT_TYPE]), - blobmsg_format_json(tb[OCI_MOUNT_OPTIONS], true)); - - return 0; -}; - enum { OCI_PROCESS_USER_UID, @@ -1648,7 +1517,7 @@ int main(int argc, char **argv) break; case 'S': opts.seccomp = optarg; - add_mount(optarg, 1, -1); + add_mount_bind(optarg, 1, -1); break; case 'C': opts.capabilities = optarg; @@ -1676,11 +1545,11 @@ int main(int argc, char **argv) break; case 'u': opts.namespace |= CLONE_NEWNS; - add_mount(ubus, 0, -1); + add_mount_bind(ubus, 0, -1); break; case 'l': opts.namespace |= CLONE_NEWNS; - add_mount(log, 0, -1); + add_mount_bind(log, 0, -1); break; case 'U': opts.user = optarg; @@ -1786,26 +1655,32 @@ int main(int argc, char **argv) if (opts.namespace) { if (opts.namespace & CLONE_NEWNS) { - add_mount("/dev/full", 0, -1); - add_mount("/dev/null", 0, -1); - add_mount("/dev/random", 0, -1); - add_mount("/dev/urandom", 0, -1); - add_mount("/dev/zero", 0, -1); - add_mount("/dev/ptmx", 0, -1); - add_mount("/dev/tty", 0, -1); + add_mount_bind("/dev/full", 0, -1); + add_mount_bind("/dev/null", 0, -1); + add_mount_bind("/dev/random", 0, -1); + add_mount_bind("/dev/urandom", 0, -1); + add_mount_bind("/dev/zero", 0, -1); + add_mount_bind("/dev/ptmx", 0, -1); + add_mount_bind("/dev/tty", 0, -1); if (!opts.extroot && (opts.user || opts.group)) { - add_mount("/etc/passwd", 0, -1); - add_mount("/etc/group", 0, -1); + add_mount_bind("/etc/passwd", 0, -1); + add_mount_bind("/etc/group", 0, -1); } #if defined(__GLIBC__) if (!opts.extroot) - add_mount("/etc/nsswitch.conf", 0, -1); + add_mount_bind("/etc/nsswitch.conf", 0, -1); #endif if (!(opts.namespace & CLONE_NEWNET)) { - add_mount("/etc/resolv.conf", 0, -1); + add_mount_bind("/etc/resolv.conf", 0, -1); + } else { + char hostdir[PATH_MAX]; + + snprintf(hostdir, PATH_MAX, "/tmp/resolv.conf-%s.d", opts.name); + mkdir_p(hostdir, 0755); + add_mount(hostdir, "/tmp/resolv.conf.d", NULL, MS_BIND | MS_NOEXEC | MS_NOATIME | MS_NOSUID | MS_NODEV | MS_RDONLY, NULL, -1); } } -- 2.30.2