jail: refactor mount support to cover OCI spec
authorDaniel Golle <daniel@makrotopia.org>
Sun, 12 Jul 2020 22:47:52 +0000 (23:47 +0100)
committerDaniel Golle <daniel@makrotopia.org>
Mon, 13 Jul 2020 11:14:49 +0000 (12:14 +0100)
Extend existing support for bind-mounts to allow arbitrary mounts
defined in OCI spec.

Signed-off-by: Daniel Golle <daniel@makrotopia.org>
jail/fs.c
jail/fs.h
jail/jail.c

index 8cc47d32f2b768b7fca9a0ddb511a0a5f718fe3e..3f090dd8e1940baa75f989b43b7528e8227c118d 100644 (file)
--- a/jail/fs.c
+++ b/jail/fs.c
@@ -1,6 +1,7 @@
 /*
  * Copyright (C) 2015 John Crispin <blogic@openwrt.org>
  * Copyright (C) 2015 Etienne Champetier <champetier.etienne@gmail.com>
+ * Copyright (C) 2020 Daniel Golle <daniel@makrotopia.org>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU Lesser General Public License version 2.1
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <unistd.h>
+#include <libgen.h>
 
 #include <libubox/avl.h>
 #include <libubox/avl-cmp.h>
+#include <libubox/blobmsg.h>
+#include <libubox/list.h>
 
 #include "elf.h"
 #include "fs.h"
 #include "log.h"
 
 struct mount {
-        struct avl_node avl;
-        const char *path;
-        int readonly;
-        int error;
+       struct avl_node avl;
+       const char *source;
+       const char *target;
+       const char *filesystemtype;
+       unsigned long mountflags;
+       const char *optstr;
+       int error;
 };
 
 struct avl_tree mounts;
 
-int add_mount(const char *path, int readonly, int error)
+int mkdir_p(char *dir, mode_t mask)
 {
-       assert(path != NULL);
+       char *l = strrchr(dir, '/');
+       int ret;
+
+       if (!l)
+               return 0;
+
+       *l = '\0';
+
+       if (mkdir_p(dir, mask))
+               return -1;
+
+       *l = '/';
+
+       ret = mkdir(dir, mask);
+       if (ret && errno == EEXIST)
+               return 0;
+
+       if (ret)
+               ERROR("mkdir(%s, %d) failed: %m\n", dir, mask);
+
+       return ret;
+}
+
+static int do_mount(const char *root, const char *source, const char *target, const char *filesystemtype,
+                   unsigned long orig_mountflags, const char *optstr, int error)
+{
+       struct stat s;
+       char new[PATH_MAX];
+       int fd;
+       bool is_bind = (orig_mountflags & MS_BIND);
+       unsigned long mountflags = orig_mountflags;
+
+       if (is_bind && stat(source, &s)) {
+               ERROR("stat(%s) failed: %m\n", source);
+               return error;
+       }
+
+       snprintf(new, sizeof(new), "%s%s", root, target?target:source);
+
+       if (!is_bind || S_ISDIR(s.st_mode)) {
+               mkdir_p(new, 0755);
+       } else {
+               mkdir_p(dirname(new), 0755);
+               snprintf(new, sizeof(new), "%s%s", root, target?target:source);
+               fd = creat(new, 0644);
+               if (fd == -1) {
+                       ERROR("creat(%s) failed: %m\n", new);
+                       return -1;
+               }
+               close(fd);
+       }
 
-       if (avl_find(&mounts, path))
+       if (mountflags & MS_BIND) {
+               if (mount(source, new, filesystemtype, MS_BIND, optstr)) {
+                       ERROR("failed to mount -B %s %s: %m\n", source, new);
+               }
+               mountflags |= MS_REMOUNT;
+       }
+
+       if (mount(source, new, filesystemtype, mountflags, optstr)) {
+               ERROR("failed to mount %s %s: %m\n", source, new);
+               return -1;
+       }
+
+       DEBUG("mount %s%s %s (%s)\n", (mountflags & MS_BIND)?"-B ":"", source, new,
+             (mountflags & MS_RDONLY)?"ro":"rw");
+
+       return 0;
+}
+
+int add_mount(const char *source, const char *target, const char *filesystemtype,
+             unsigned long mountflags, const char *optstr, int error)
+{
+       assert(target != NULL);
+
+       if (avl_find(&mounts, target))
                return 1;
 
        struct mount *m;
        m = calloc(1, sizeof(struct mount));
        assert(m != NULL);
-       m->avl.key = m->path = strdup(path);
-       m->readonly = readonly;
+       m->avl.key = m->target = strdup(target);
+       if (source)
+               m->source = strdup(source);
+       if (filesystemtype)
+               m->filesystemtype = strdup(filesystemtype);
+       m->mountflags = mountflags;
        m->error = error;
 
        avl_insert(&mounts, &m->avl);
-       DEBUG("adding mount %s ro(%d) err(%d)\n", m->path, m->readonly, m->error != 0);
+       DEBUG("adding mount %s %s bind(%d) ro(%d) err(%d)\n", m->source, m->target,
+               !!(m->mountflags & MS_BIND), !!(m->mountflags & MS_RDONLY), m->error != 0);
+
        return 0;
 }
 
+int add_mount_bind(const char *path, int readonly, int error)
+{
+       unsigned long mountflags = MS_BIND;
+
+       if (readonly)
+               mountflags |= MS_RDONLY;
+
+       return add_mount(path, path, NULL, mountflags, NULL, error);
+}
+
+
+enum {
+       OCI_MOUNT_SOURCE,
+       OCI_MOUNT_DESTINATION,
+       OCI_MOUNT_TYPE,
+       OCI_MOUNT_OPTIONS,
+       __OCI_MOUNT_MAX,
+};
+
+static const struct blobmsg_policy oci_mount_policy[] = {
+       [OCI_MOUNT_SOURCE] = { "source", BLOBMSG_TYPE_STRING },
+       [OCI_MOUNT_DESTINATION] = { "destination", BLOBMSG_TYPE_STRING },
+       [OCI_MOUNT_TYPE] = { "type", BLOBMSG_TYPE_STRING },
+       [OCI_MOUNT_OPTIONS] = { "options", BLOBMSG_TYPE_ARRAY },
+};
+
+struct mount_opt {
+       struct list_head list;
+       char *optstr;
+};
+
+static int parseOCImountopts(struct blob_attr *msg, unsigned long *mount_flags, char **mount_data, int *error)
+{
+       struct blob_attr *cur;
+       int rem;
+       unsigned long mf = 0;
+       char *tmp;
+       struct list_head fsopts = LIST_HEAD_INIT(fsopts);
+       size_t len = 0;
+       struct mount_opt *opt;
+
+       blobmsg_for_each_attr(cur, msg, rem) {
+               tmp = blobmsg_get_string(cur);
+               if (!strcmp("ro", tmp))
+                       mf |= MS_RDONLY;
+               else if (!strcmp("rw", tmp))
+                       mf &= ~MS_RDONLY;
+               else if (!strcmp("bind", tmp))
+                       mf = MS_BIND;
+               else if (!strcmp("rbind", tmp))
+                       mf |= MS_BIND | MS_REC;
+               else if (!strcmp("sync", tmp))
+                       mf |= MS_SYNCHRONOUS;
+               else if (!strcmp("async", tmp))
+                       mf &= ~MS_SYNCHRONOUS;
+               else if (!strcmp("atime", tmp))
+                       mf &= ~MS_NOATIME;
+               else if (!strcmp("noatime", tmp))
+                       mf |= MS_NOATIME;
+               else if (!strcmp("defaults", tmp))
+                       mf = 0; /* rw, suid, dev, exec, auto, nouser, and async */
+               else if (!strcmp("dev", tmp))
+                       mf &= ~MS_NODEV;
+               else if (!strcmp("nodev", tmp))
+                       mf |= MS_NODEV;
+               else if (!strcmp("diratime", tmp))
+                       mf &= ~MS_NODIRATIME;
+               else if (!strcmp("nodiratime", tmp))
+                       mf |= MS_NODIRATIME;
+               else if (!strcmp("dirsync", tmp))
+                       mf |= MS_DIRSYNC;
+               else if (!strcmp("exec", tmp))
+                       mf &= ~MS_NOEXEC;
+               else if (!strcmp("noexec", tmp))
+                       mf |= MS_NOEXEC;
+               else if (!strcmp("mand", tmp))
+                       mf |= MS_MANDLOCK;
+               else if (!strcmp("nomand", tmp))
+                       mf &= ~MS_MANDLOCK;
+               else if (!strcmp("relatime", tmp))
+                       mf |= MS_RELATIME;
+               else if (!strcmp("norelatime", tmp))
+                       mf &= ~MS_RELATIME;
+               else if (!strcmp("strictatime", tmp))
+                       mf |= MS_STRICTATIME;
+               else if (!strcmp("nostrictatime", tmp))
+                       mf &= ~MS_STRICTATIME;
+               else if (!strcmp("lazytime", tmp))
+                       mf |= MS_LAZYTIME;
+               else if (!strcmp("nostrictatime", tmp))
+                       mf &= ~MS_LAZYTIME;
+               else if (!strcmp("suid", tmp))
+                       mf &= ~MS_NOSUID;
+               else if (!strcmp("nosuid", tmp))
+                       mf |= MS_NOSUID;
+               else if (!strcmp("remount", tmp))
+                       mf |= MS_REMOUNT;
+               else if(!strcmp("nofail", tmp))
+                       *error = 0;
+               else if (!strcmp("auto", tmp) ||
+                        !strcmp("noauto", tmp) ||
+                        !strcmp("user", tmp) ||
+                        !strcmp("group", tmp) ||
+                        !strcmp("_netdev", tmp))
+                       DEBUG("ignoring built-in mount option %s\n", tmp);
+               else {
+                       /* filesystem-specific free-form option */
+                       opt = calloc(1, sizeof(*opt));
+                       opt->optstr = tmp;
+                       list_add_tail(&opt->list, &fsopts);
+               }
+       };
+
+       *mount_flags = mf;
+
+       list_for_each_entry(opt, &fsopts, list) {
+               if (len)
+                       ++len;
+
+               len += strlen(opt->optstr);
+       };
+
+       if (!len)
+               return 0;
+
+       *mount_data = calloc(len + 1, sizeof(char));
+       if (!mount_data)
+               return ENOMEM;
+
+       len = 0;
+       list_for_each_entry(opt, &fsopts, list) {
+               if (len)
+                       strcat(*mount_data, ",");
+
+               strcat(*mount_data, opt->optstr);
+               ++len;
+       };
+
+       list_del(&fsopts);
+
+       DEBUG("mount flags(%08lx) fsopts(\"%s\")\n", mf, *mount_data?:"");
+
+       return 0;
+};
+
+int parseOCImount(struct blob_attr *msg)
+{
+       struct blob_attr *tb[__OCI_MOUNT_MAX];
+       unsigned long mount_flags = 0;
+       char *mount_data = NULL;
+       int ret, err = -1;
+
+       blobmsg_parse(oci_mount_policy, __OCI_MOUNT_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
+
+       if (!tb[OCI_MOUNT_DESTINATION])
+               return EINVAL;
+
+       if (tb[OCI_MOUNT_OPTIONS]) {
+               ret = parseOCImountopts(tb[OCI_MOUNT_OPTIONS], &mount_flags, &mount_data, &err);
+               if (ret)
+                       return ret;
+       }
+
+       add_mount(tb[OCI_MOUNT_SOURCE] ? blobmsg_get_string(tb[OCI_MOUNT_SOURCE]) : NULL,
+                 blobmsg_get_string(tb[OCI_MOUNT_DESTINATION]),
+                 tb[OCI_MOUNT_TYPE] ? blobmsg_get_string(tb[OCI_MOUNT_TYPE]) : NULL,
+                 mount_flags, mount_data, err);
+
+       return 0;
+};
+
+
 int mount_all(const char *jailroot) {
        struct library *l;
        struct mount *m;
 
        avl_for_each_element(&libraries, l, avl)
-               add_mount(l->path, 1, -1);
+               add_mount_bind(l->path, 1, -1);
 
        avl_for_each_element(&mounts, m, avl)
-               if (mount_bind(jailroot, m->path, m->readonly, m->error))
+               if (do_mount(jailroot, m->source, m->target, m->filesystemtype, m->mountflags, m->optstr, m->error))
                        return -1;
 
        return 0;
@@ -119,7 +377,7 @@ int add_path_and_deps(const char *path, int readonly, int error, int lib)
                fd = open(path, O_RDONLY|O_CLOEXEC);
                if (fd == -1)
                        return error;
-               add_mount(path, readonly, error);
+               add_mount_bind(path, readonly, error);
        } else {
                if (avl_find(&libraries, path))
                        return 0;
index 343335fbee8581e0b70c2cd4279e635234abb6f4..e7283a30a4f6a7b1b5bc27d3c6ac749df4f59c3d 100644 (file)
--- a/jail/fs.h
+++ b/jail/fs.h
 #ifndef _JAIL_FS_H_
 #define _JAIL_FS_H_
 
-int add_mount(const char *path, int readonly, int error);
+#include <sys/mount.h>
+#include <libubox/blobmsg.h>
+
+int mkdir_p(char *dir, mode_t mask);
+int add_mount(const char *source, const char *target, const char *filesystemtype,
+             unsigned long mountflags, const char *optstr, int error);
+int add_mount_bind(const char *path, int readonly, int error);
+int parseOCImount(struct blob_attr *msg);
 int add_path_and_deps(const char *path, int readonly, int error, int lib);
 int mount_all(const char *jailroot);
 void mount_list_init(void);
index d63c467a7aaf0185ff966e0336a26889612d2377..e8abd8a8d3bf7002aa038d64b968108182bc005b 100644 (file)
@@ -25,7 +25,6 @@
 #include <string.h>
 #include <sys/stat.h>
 #include <fcntl.h>
-#include <libgen.h>
 #include <sched.h>
 #include <linux/limits.h>
 #include <linux/filter.h>
@@ -119,15 +118,11 @@ static void free_hooklist(struct hook_execvpe *hooklist)
        }
 }
 
-static void free_opts(bool all) {
+static void free_opts(bool child) {
        char **tmp;
 
-       free(opts.hostname);
-       free(opts.cwd);
-       free(opts.extroot);
-       free(opts.uidmap);
-       free(opts.gidmap);
-       if (all) {
+       /* we need to keep argv, envp and seccomp filter in child */
+       if (child) {
                if (opts.ociseccomp) {
                        free(opts.ociseccomp->filter);
                        free(opts.ociseccomp);
@@ -146,6 +141,11 @@ static void free_opts(bool all) {
                free(opts.envp);
        };
 
+       free(opts.hostname);
+       free(opts.cwd);
+       free(opts.extroot);
+       free(opts.uidmap);
+       free(opts.gidmap);
        free_hooklist(opts.hooks.createRuntime);
        free_hooklist(opts.hooks.createContainer);
        free_hooklist(opts.hooks.startContainer);
@@ -163,85 +163,6 @@ static char child_stack[STACK_SIZE];
 
 int console_fd;
 
-static int mkdir_p(char *dir, mode_t mask)
-{
-       char *l = strrchr(dir, '/');
-       int ret;
-
-       if (!l)
-               return 0;
-
-       *l = '\0';
-
-       if (mkdir_p(dir, mask))
-               return -1;
-
-       *l = '/';
-
-       ret = mkdir(dir, mask);
-       if (ret && errno == EEXIST)
-               return 0;
-
-       if (ret)
-               ERROR("mkdir(%s, %d) failed: %m\n", dir, mask);
-
-       return ret;
-}
-
-static int _mount_bind(const char *root, const char *path, const char *target, int readonly, int strict, int error)
-{
-       struct stat s;
-       char new[PATH_MAX];
-       int fd;
-       int remount_flags = MS_BIND | MS_REMOUNT;
-
-       if (stat(path, &s)) {
-               ERROR("stat(%s) failed: %m\n", path);
-               return error;
-       }
-
-       snprintf(new, sizeof(new), "%s%s", root, target?target:path);
-
-       if (S_ISDIR(s.st_mode)) {
-               mkdir_p(new, 0755);
-       } else {
-               mkdir_p(dirname(new), 0755);
-               snprintf(new, sizeof(new), "%s%s", root, target?target:path);
-               fd = creat(new, 0644);
-               if (fd == -1) {
-                       ERROR("creat(%s) failed: %m\n", new);
-                       return -1;
-               }
-               close(fd);
-       }
-
-       if (mount(path, new, NULL, MS_BIND, NULL)) {
-               ERROR("failed to mount -B %s %s: %m\n", path, new);
-               return -1;
-       }
-
-       if (readonly)
-               remount_flags |= MS_RDONLY;
-
-       if (strict)
-               remount_flags |= MS_NOEXEC | MS_NOSUID | MS_NODEV;
-
-       if ((strict || readonly) && mount(NULL, new, NULL, remount_flags, NULL)) {
-               ERROR("failed to remount (%s%s%s) %s: %m\n", readonly?"ro":"rw",
-                     (readonly && strict)?", ":"", strict?"strict":"", new);
-               return -1;
-       }
-
-       DEBUG("mount -B %s %s (%s%s%s)\n", path, new,
-             readonly?"ro":"rw", (readonly && strict)?", ":"", strict?"strict":"");
-
-       return 0;
-}
-
-int mount_bind(const char *root, const char *path, int readonly, int error) {
-       return _mount_bind(root, path, NULL, readonly, 0, error);
-}
-
 static int mount_overlay(char *jail_root, char *overlaydir) {
        char *upperdir, *workdir, *optsstr, *upperetc, *upperresolvconf;
        const char mountoptsformat[] = "lowerdir=%s,upperdir=%s,workdir=%s";
@@ -549,17 +470,16 @@ static int build_jail_fs(void)
                return -1;
        }
 
+       /* make sure /etc/resolv.conf exists if in new network namespace */
        if (opts.namespace & CLONE_NEWNET) {
-               char hostdir[PATH_MAX], jailetc[PATH_MAX], jaillink[PATH_MAX];
+               char jailetc[PATH_MAX], jaillink[PATH_MAX];
 
-               snprintf(hostdir, PATH_MAX, "/tmp/resolv.conf-%s.d", opts.name);
-               mkdir_p(hostdir, 0755);
-               _mount_bind(jail_root, hostdir, "/tmp/resolv.conf.d", 1, 1, -1);
                snprintf(jailetc, PATH_MAX, "%s/etc", jail_root);
                mkdir_p(jailetc, 0755);
                snprintf(jaillink, PATH_MAX, "%s/etc/resolv.conf", jail_root);
                if (overlaydir)
                        unlink(jaillink);
+
                symlink("../tmp/resolv.conf.d/resolv.conf.auto", jaillink);
        }
 
@@ -1183,57 +1103,6 @@ out_createruntime:
        return ret;
 };
 
-enum {
-       OCI_MOUNT_SOURCE,
-       OCI_MOUNT_DESTINATION,
-       OCI_MOUNT_TYPE,
-       OCI_MOUNT_OPTIONS,
-       __OCI_MOUNT_MAX,
-};
-
-static const struct blobmsg_policy oci_mount_policy[] = {
-       [OCI_MOUNT_SOURCE] = { "source", BLOBMSG_TYPE_STRING },
-       [OCI_MOUNT_DESTINATION] = { "destination", BLOBMSG_TYPE_STRING },
-       [OCI_MOUNT_TYPE] = { "type", BLOBMSG_TYPE_STRING },
-       [OCI_MOUNT_OPTIONS] = { "options", BLOBMSG_TYPE_ARRAY },
-};
-
-static int parseOCImount(struct blob_attr *msg)
-{
-       struct blob_attr *tb[__OCI_MOUNT_MAX];
-
-       blobmsg_parse(oci_mount_policy, __OCI_MOUNT_MAX, tb, blobmsg_data(msg), blobmsg_len(msg));
-
-       if (!tb[OCI_MOUNT_DESTINATION])
-               return EINVAL;
-
-       if (!strcmp("proc", blobmsg_get_string(tb[OCI_MOUNT_TYPE])) &&
-           !strcmp("/proc", blobmsg_get_string(tb[OCI_MOUNT_DESTINATION]))) {
-               opts.procfs = true;
-               return 0;
-       }
-
-       if (!strcmp("sysfs", blobmsg_get_string(tb[OCI_MOUNT_TYPE])) &&
-           !strcmp("/sys", blobmsg_get_string(tb[OCI_MOUNT_DESTINATION]))) {
-               opts.sysfs = true;
-               return 0;
-       }
-
-       if (!strcmp("tmpfs", blobmsg_get_string(tb[OCI_MOUNT_TYPE])) &&
-           !strcmp("/dev", blobmsg_get_string(tb[OCI_MOUNT_DESTINATION]))) {
-               /* we always mount a small tmpfs on /dev */
-               return 0;
-       }
-
-       INFO("ignoring unsupported mount %s %s -t %s -o %s\n",
-               blobmsg_get_string(tb[OCI_MOUNT_SOURCE]),
-               blobmsg_get_string(tb[OCI_MOUNT_DESTINATION]),
-               blobmsg_get_string(tb[OCI_MOUNT_TYPE]),
-               blobmsg_format_json(tb[OCI_MOUNT_OPTIONS], true));
-
-       return 0;
-};
-
 
 enum {
        OCI_PROCESS_USER_UID,
@@ -1648,7 +1517,7 @@ int main(int argc, char **argv)
                        break;
                case 'S':
                        opts.seccomp = optarg;
-                       add_mount(optarg, 1, -1);
+                       add_mount_bind(optarg, 1, -1);
                        break;
                case 'C':
                        opts.capabilities = optarg;
@@ -1676,11 +1545,11 @@ int main(int argc, char **argv)
                        break;
                case 'u':
                        opts.namespace |= CLONE_NEWNS;
-                       add_mount(ubus, 0, -1);
+                       add_mount_bind(ubus, 0, -1);
                        break;
                case 'l':
                        opts.namespace |= CLONE_NEWNS;
-                       add_mount(log, 0, -1);
+                       add_mount_bind(log, 0, -1);
                        break;
                case 'U':
                        opts.user = optarg;
@@ -1786,26 +1655,32 @@ int main(int argc, char **argv)
 
        if (opts.namespace) {
                if (opts.namespace & CLONE_NEWNS) {
-                       add_mount("/dev/full", 0, -1);
-                       add_mount("/dev/null", 0, -1);
-                       add_mount("/dev/random", 0, -1);
-                       add_mount("/dev/urandom", 0, -1);
-                       add_mount("/dev/zero", 0, -1);
-                       add_mount("/dev/ptmx", 0, -1);
-                       add_mount("/dev/tty", 0, -1);
+                       add_mount_bind("/dev/full", 0, -1);
+                       add_mount_bind("/dev/null", 0, -1);
+                       add_mount_bind("/dev/random", 0, -1);
+                       add_mount_bind("/dev/urandom", 0, -1);
+                       add_mount_bind("/dev/zero", 0, -1);
+                       add_mount_bind("/dev/ptmx", 0, -1);
+                       add_mount_bind("/dev/tty", 0, -1);
 
                        if (!opts.extroot && (opts.user || opts.group)) {
-                               add_mount("/etc/passwd", 0, -1);
-                               add_mount("/etc/group", 0, -1);
+                               add_mount_bind("/etc/passwd", 0, -1);
+                               add_mount_bind("/etc/group", 0, -1);
                        }
 
 #if defined(__GLIBC__)
                        if (!opts.extroot)
-                               add_mount("/etc/nsswitch.conf", 0, -1);
+                               add_mount_bind("/etc/nsswitch.conf", 0, -1);
 #endif
 
                        if (!(opts.namespace & CLONE_NEWNET)) {
-                               add_mount("/etc/resolv.conf", 0, -1);
+                               add_mount_bind("/etc/resolv.conf", 0, -1);
+                       } else {
+                               char hostdir[PATH_MAX];
+
+                               snprintf(hostdir, PATH_MAX, "/tmp/resolv.conf-%s.d", opts.name);
+                               mkdir_p(hostdir, 0755);
+                               add_mount(hostdir, "/tmp/resolv.conf.d", NULL, MS_BIND | MS_NOEXEC | MS_NOATIME | MS_NOSUID | MS_NODEV | MS_RDONLY, NULL, -1);
                        }
                }