xen: add privcmd driver
authorJeremy Fitzhardinge <jeremy@goop.org>
Mon, 9 Feb 2009 20:05:49 +0000 (12:05 -0800)
committerJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Wed, 20 Oct 2010 23:22:29 +0000 (16:22 -0700)
The privcmd interface in xenfs allows the tool stack in the privileged
domain to get fairly direct access to the hypervisor in order to do
various management things such as domain construction.

[ Impact: new xenfs interface for privileged operations ]

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
drivers/xen/xenfs/Makefile
drivers/xen/xenfs/privcmd.c [new file with mode: 0644]
drivers/xen/xenfs/super.c
drivers/xen/xenfs/xenfs.h
include/xen/Kbuild
include/xen/privcmd.h [new file with mode: 0644]

index 5d45ff13cc01515f6c4362b0ad54c8b48dae3078..4a0be9a82af36d22691f38dc476300d2fe9ad315 100644 (file)
@@ -1,4 +1,4 @@
 obj-$(CONFIG_XENFS) += xenfs.o
 
 xenfs-y                          = super.o xenbus.o
-xenfs-$(CONFIG_XEN_DOM0) += xenstored.o
+xenfs-$(CONFIG_XEN_DOM0) += xenstored.o privcmd.o
diff --git a/drivers/xen/xenfs/privcmd.c b/drivers/xen/xenfs/privcmd.c
new file mode 100644 (file)
index 0000000..c7192f3
--- /dev/null
@@ -0,0 +1,436 @@
+/******************************************************************************
+ * privcmd.c
+ *
+ * Interface to privileged domain-0 commands.
+ *
+ * Copyright (c) 2002-2004, K A Fraser, B Dragovic
+ */
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/uaccess.h>
+#include <linux/swap.h>
+#include <linux/smp_lock.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/seq_file.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/tlb.h>
+#include <asm/xen/hypervisor.h>
+#include <asm/xen/hypercall.h>
+
+#include <xen/xen.h>
+#include <xen/privcmd.h>
+#include <xen/interface/xen.h>
+#include <xen/features.h>
+#include <xen/page.h>
+
+#ifndef HAVE_ARCH_PRIVCMD_MMAP
+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma);
+#endif
+
+struct remap_data {
+       unsigned long mfn;
+       unsigned domid;
+       pgprot_t prot;
+};
+
+static int remap_area_mfn_pte_fn(pte_t *ptep, pgtable_t token,
+                                unsigned long addr, void *data)
+{
+       struct remap_data *rmd = data;
+       pte_t pte = pte_mkspecial(pfn_pte(rmd->mfn++, rmd->prot));
+
+       xen_set_domain_pte(ptep, pte, rmd->domid);
+
+       return 0;
+}
+
+int remap_domain_mfn_range(struct vm_area_struct *vma, unsigned long addr,
+                          unsigned long mfn, unsigned long size,
+                          pgprot_t prot, unsigned domid)
+{
+       struct remap_data rmd;
+       int err;
+
+       prot = __pgprot(pgprot_val(prot) | _PAGE_IOMAP);
+
+       vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
+
+       rmd.mfn = mfn;
+       rmd.prot = prot;
+       rmd.domid = domid;
+
+       err = apply_to_page_range(vma->vm_mm, addr, size,
+                                 remap_area_mfn_pte_fn, &rmd);
+
+       return err;
+}
+
+static long privcmd_ioctl_hypercall(void __user *udata)
+{
+       struct privcmd_hypercall hypercall;
+       long ret;
+
+       if (copy_from_user(&hypercall, udata, sizeof(hypercall)))
+               return -EFAULT;
+
+       ret = privcmd_call(hypercall.op,
+                          hypercall.arg[0], hypercall.arg[1],
+                          hypercall.arg[2], hypercall.arg[3],
+                          hypercall.arg[4]);
+
+       return ret;
+}
+
+static void free_page_list(struct list_head *pages)
+{
+       struct page *p, *n;
+
+       list_for_each_entry_safe(p, n, pages, lru)
+               __free_page(p);
+
+       INIT_LIST_HEAD(pages);
+}
+
+/*
+ * Given an array of items in userspace, return a list of pages
+ * containing the data.  If copying fails, either because of memory
+ * allocation failure or a problem reading user memory, return an
+ * error code; its up to the caller to dispose of any partial list.
+ */
+static int gather_array(struct list_head *pagelist,
+                       unsigned nelem, size_t size,
+                       void __user *data)
+{
+       unsigned pageidx;
+       void *pagedata;
+       int ret;
+
+       if (size > PAGE_SIZE)
+               return 0;
+
+       pageidx = PAGE_SIZE;
+       pagedata = NULL;        /* quiet, gcc */
+       while (nelem--) {
+               if (pageidx > PAGE_SIZE-size) {
+                       struct page *page = alloc_page(GFP_KERNEL);
+
+                       ret = -ENOMEM;
+                       if (page == NULL)
+                               goto fail;
+
+                       pagedata = page_address(page);
+
+                       list_add_tail(&page->lru, pagelist);
+                       pageidx = 0;
+               }
+
+               ret = -EFAULT;
+               if (copy_from_user(pagedata + pageidx, data, size))
+                       goto fail;
+
+               data += size;
+               pageidx += size;
+       }
+
+       ret = 0;
+
+fail:
+       return ret;
+}
+
+/*
+ * Call function "fn" on each element of the array fragmented
+ * over a list of pages.
+ */
+static int traverse_pages(unsigned nelem, size_t size,
+                         struct list_head *pos,
+                         int (*fn)(void *data, void *state),
+                         void *state)
+{
+       void *pagedata;
+       unsigned pageidx;
+       int ret;
+
+       BUG_ON(size > PAGE_SIZE);
+
+       pageidx = PAGE_SIZE;
+       pagedata = NULL;        /* hush, gcc */
+
+       while (nelem--) {
+               if (pageidx > PAGE_SIZE-size) {
+                       struct page *page;
+                       pos = pos->next;
+                       page = list_entry(pos, struct page, lru);
+                       pagedata = page_address(page);
+                       pageidx = 0;
+               }
+
+               ret = (*fn)(pagedata + pageidx, state);
+               if (ret)
+                       break;
+               pageidx += size;
+       }
+
+       return ret;
+}
+
+struct mmap_mfn_state {
+       unsigned long va;
+       struct vm_area_struct *vma;
+       domid_t domain;
+};
+
+static int mmap_mfn_range(void *data, void *state)
+{
+       struct privcmd_mmap_entry *msg = data;
+       struct mmap_mfn_state *st = state;
+       struct vm_area_struct *vma = st->vma;
+       int rc;
+
+       /* Do not allow range to wrap the address space. */
+       if ((msg->npages > (LONG_MAX >> PAGE_SHIFT)) ||
+           ((unsigned long)(msg->npages << PAGE_SHIFT) >= -st->va))
+               return -EINVAL;
+
+       /* Range chunks must be contiguous in va space. */
+       if ((msg->va != st->va) ||
+           ((msg->va+(msg->npages<<PAGE_SHIFT)) > vma->vm_end))
+               return -EINVAL;
+
+       rc = remap_domain_mfn_range(vma,
+                                   msg->va & PAGE_MASK,
+                                   msg->mfn,
+                                   msg->npages << PAGE_SHIFT,
+                                   vma->vm_page_prot,
+                                   st->domain);
+       if (rc < 0)
+               return rc;
+
+       st->va += msg->npages << PAGE_SHIFT;
+
+       return 0;
+}
+
+static long privcmd_ioctl_mmap(void __user *udata)
+{
+       struct privcmd_mmap mmapcmd;
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma;
+       int rc;
+       LIST_HEAD(pagelist);
+       struct mmap_mfn_state state;
+
+       if (!xen_initial_domain())
+               return -EPERM;
+
+       if (copy_from_user(&mmapcmd, udata, sizeof(mmapcmd)))
+               return -EFAULT;
+
+       rc = gather_array(&pagelist,
+                         mmapcmd.num, sizeof(struct privcmd_mmap_entry),
+                         mmapcmd.entry);
+
+       if (rc || list_empty(&pagelist))
+               goto out;
+
+       down_write(&mm->mmap_sem);
+
+       {
+               struct page *page = list_first_entry(&pagelist,
+                                                    struct page, lru);
+               struct privcmd_mmap_entry *msg = page_address(page);
+
+               vma = find_vma(mm, msg->va);
+               rc = -EINVAL;
+
+               if (!vma || (msg->va != vma->vm_start) ||
+                   !privcmd_enforce_singleshot_mapping(vma))
+                       goto out_up;
+       }
+
+       state.va = vma->vm_start;
+       state.vma = vma;
+       state.domain = mmapcmd.dom;
+
+       rc = traverse_pages(mmapcmd.num, sizeof(struct privcmd_mmap_entry),
+                           &pagelist,
+                           mmap_mfn_range, &state);
+
+
+out_up:
+       up_write(&mm->mmap_sem);
+
+out:
+       free_page_list(&pagelist);
+
+       return rc;
+}
+
+struct mmap_batch_state {
+       domid_t domain;
+       unsigned long va;
+       struct vm_area_struct *vma;
+       int err;
+
+       xen_pfn_t __user *user;
+};
+
+static int mmap_batch_fn(void *data, void *state)
+{
+       xen_pfn_t *mfnp = data;
+       struct mmap_batch_state *st = state;
+
+       if (remap_domain_mfn_range(st->vma, st->va & PAGE_MASK,
+                                  *mfnp, PAGE_SIZE,
+                                  st->vma->vm_page_prot, st->domain) < 0) {
+               *mfnp |= 0xf0000000U;
+               st->err++;
+       }
+       st->va += PAGE_SIZE;
+
+       return 0;
+}
+
+static int mmap_return_errors(void *data, void *state)
+{
+       xen_pfn_t *mfnp = data;
+       struct mmap_batch_state *st = state;
+
+       put_user(*mfnp, st->user++);
+
+       return 0;
+}
+
+static long privcmd_ioctl_mmap_batch(void __user *udata)
+{
+       int ret;
+       struct privcmd_mmapbatch m;
+       struct mm_struct *mm = current->mm;
+       struct vm_area_struct *vma;
+       unsigned long nr_pages;
+       LIST_HEAD(pagelist);
+       struct mmap_batch_state state;
+
+       if (!xen_initial_domain())
+               return -EPERM;
+
+       if (copy_from_user(&m, udata, sizeof(m)))
+               return -EFAULT;
+
+       nr_pages = m.num;
+       if ((m.num <= 0) || (nr_pages > (LONG_MAX >> PAGE_SHIFT)))
+               return -EINVAL;
+
+       ret = gather_array(&pagelist, m.num, sizeof(xen_pfn_t),
+                          m.arr);
+
+       if (ret || list_empty(&pagelist))
+               goto out;
+
+       down_write(&mm->mmap_sem);
+
+       vma = find_vma(mm, m.addr);
+       ret = -EINVAL;
+       if (!vma ||
+           (m.addr != vma->vm_start) ||
+           ((m.addr + (nr_pages << PAGE_SHIFT)) != vma->vm_end) ||
+           !privcmd_enforce_singleshot_mapping(vma)) {
+               up_write(&mm->mmap_sem);
+               goto out;
+       }
+
+       state.domain = m.dom;
+       state.vma = vma;
+       state.va = m.addr;
+       state.err = 0;
+
+       ret = traverse_pages(m.num, sizeof(xen_pfn_t),
+                            &pagelist, mmap_batch_fn, &state);
+
+       up_write(&mm->mmap_sem);
+
+       if (state.err > 0) {
+               ret = state.err;
+
+               state.user = udata;
+               traverse_pages(m.num, sizeof(xen_pfn_t),
+                              &pagelist,
+                              mmap_return_errors, &state);
+       }
+
+out:
+       free_page_list(&pagelist);
+
+       return ret;
+}
+
+static long privcmd_ioctl(struct file *file,
+                         unsigned int cmd, unsigned long data)
+{
+       int ret = -ENOSYS;
+       void __user *udata = (void __user *) data;
+
+       switch (cmd) {
+       case IOCTL_PRIVCMD_HYPERCALL:
+               ret = privcmd_ioctl_hypercall(udata);
+               break;
+
+       case IOCTL_PRIVCMD_MMAP:
+               ret = privcmd_ioctl_mmap(udata);
+               break;
+
+       case IOCTL_PRIVCMD_MMAPBATCH:
+               ret = privcmd_ioctl_mmap_batch(udata);
+               break;
+
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+       return ret;
+}
+
+#ifndef HAVE_ARCH_PRIVCMD_MMAP
+static int privcmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+       return VM_FAULT_SIGBUS;
+}
+
+static struct vm_operations_struct privcmd_vm_ops = {
+       .fault = privcmd_fault
+};
+
+static int privcmd_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       /* Unsupported for auto-translate guests. */
+       if (xen_feature(XENFEAT_auto_translated_physmap))
+               return -ENOSYS;
+
+       /* DONTCOPY is essential for Xen as copy_page_range is broken. */
+       vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
+       vma->vm_ops = &privcmd_vm_ops;
+       vma->vm_private_data = NULL;
+
+       return 0;
+}
+
+static int privcmd_enforce_singleshot_mapping(struct vm_area_struct *vma)
+{
+       return (xchg(&vma->vm_private_data, (void *)1) == NULL);
+}
+#endif
+
+const struct file_operations privcmd_file_ops = {
+       .unlocked_ioctl = privcmd_ioctl,
+       .mmap = privcmd_mmap,
+};
index 3cf7707217f2b0332daae1774c5e7c502010f5bd..8c7462866e90c69ff33f820b8b01b108c3433a15 100644 (file)
@@ -96,6 +96,8 @@ static int xenfs_fill_super(struct super_block *sb, void *data, int silent)
                                  &xsd_kva_file_ops, NULL, S_IRUSR|S_IWUSR);
                xenfs_create_file(sb, sb->s_root, "xsd_port",
                                  &xsd_port_file_ops, NULL, S_IRUSR|S_IWUSR);
+               xenfs_create_file(sb, sb->s_root, "privcmd",
+                                 &privcmd_file_ops, NULL, S_IRUSR|S_IWUSR);
        }
 
        return rc;
index 5056306e7aa8efc547377049b28ba416c0fe8365..b68aa6200003575549e7a5844ae7b43b6bec135f 100644 (file)
@@ -2,6 +2,7 @@
 #define _XENFS_XENBUS_H
 
 extern const struct file_operations xenbus_file_ops;
+extern const struct file_operations privcmd_file_ops;
 extern const struct file_operations xsd_kva_file_ops;
 extern const struct file_operations xsd_port_file_ops;
 
index 4e65c16a445b06f573afc055e92c7d4b9280d907..84ad8f02fee5e5904c31c21915db049de7c3b2b3 100644 (file)
@@ -1 +1,2 @@
 header-y += evtchn.h
+header-y += privcmd.h
diff --git a/include/xen/privcmd.h b/include/xen/privcmd.h
new file mode 100644 (file)
index 0000000..b42cdfd
--- /dev/null
@@ -0,0 +1,80 @@
+/******************************************************************************
+ * privcmd.h
+ *
+ * Interface to /proc/xen/privcmd.
+ *
+ * Copyright (c) 2003-2005, K A Fraser
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef __LINUX_PUBLIC_PRIVCMD_H__
+#define __LINUX_PUBLIC_PRIVCMD_H__
+
+#include <linux/types.h>
+
+typedef unsigned long xen_pfn_t;
+
+#ifndef __user
+#define __user
+#endif
+
+struct privcmd_hypercall {
+       __u64 op;
+       __u64 arg[5];
+};
+
+struct privcmd_mmap_entry {
+       __u64 va;
+       __u64 mfn;
+       __u64 npages;
+};
+
+struct privcmd_mmap {
+       int num;
+       domid_t dom; /* target domain */
+       struct privcmd_mmap_entry __user *entry;
+};
+
+struct privcmd_mmapbatch {
+       int num;     /* number of pages to populate */
+       domid_t dom; /* target domain */
+       __u64 addr;  /* virtual address */
+       xen_pfn_t __user *arr; /* array of mfns - top nibble set on err */
+};
+
+/*
+ * @cmd: IOCTL_PRIVCMD_HYPERCALL
+ * @arg: &privcmd_hypercall_t
+ * Return: Value returned from execution of the specified hypercall.
+ */
+#define IOCTL_PRIVCMD_HYPERCALL                                        \
+       _IOC(_IOC_NONE, 'P', 0, sizeof(struct privcmd_hypercall))
+#define IOCTL_PRIVCMD_MMAP                                     \
+       _IOC(_IOC_NONE, 'P', 2, sizeof(struct privcmd_mmap))
+#define IOCTL_PRIVCMD_MMAPBATCH                                        \
+       _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch))
+
+#endif /* __LINUX_PUBLIC_PRIVCMD_H__ */