On Fri, 2010-12-03 at 15:38 +0000, Daniel De Graaf wrote:
> This allows a userspace application to allocate a shared page for
> implementing inter-domain communication or device drivers. These
> shared pages can be mapped using the gntdev device or by the kernel
> in another domain.
This seems like useful functionality but is it really necessary for it
to be a separate driver to the existing gntdev driver? The broad high
level semantics of ioctl+mmap seem pretty similar. It also has some
similarities with the sort of device we will need in order to properly
allocate memory which is safe to use as an argument to a hypercall.
Do you have an example of a user of the driver?
Thanks,
Ian.
>
> Signed-off-by: Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>
> ---
> drivers/xen/Kconfig | 7 +
> drivers/xen/Makefile | 2 +
> drivers/xen/gntalloc.c | 456
> ++++++++++++++++++++++++++++++++++++++++++++++++
> include/xen/gntalloc.h | 68 +++++++
> 4 files changed, 533 insertions(+), 0 deletions(-)
> create mode 100644 drivers/xen/gntalloc.c
> create mode 100644 include/xen/gntalloc.h
>
> diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
> index a9f3a8f..0be0edc 100644
> --- a/drivers/xen/Kconfig
> +++ b/drivers/xen/Kconfig
> @@ -179,6 +179,13 @@ config XEN_GNTDEV
> help
> Allows userspace processes to map grants from other domains.
>
> +config XEN_GRANT_DEV_ALLOC
> + tristate "User-space grant reference allocator driver"
> + depends on XEN
> + help
> + Allows userspace processes to create pages with access granted
> + to other domains.
> +
> config XEN_S3
> def_bool y
> depends on XEN_DOM0 && ACPI
> diff --git a/drivers/xen/Makefile b/drivers/xen/Makefile
> index ef1ea63..9814c1d 100644
> --- a/drivers/xen/Makefile
> +++ b/drivers/xen/Makefile
> @@ -10,6 +10,7 @@ obj-$(CONFIG_XEN_XENCOMM) += xencomm.o
> obj-$(CONFIG_XEN_BALLOON) += balloon.o
> obj-$(CONFIG_XEN_DEV_EVTCHN) += xen-evtchn.o
> obj-$(CONFIG_XEN_GNTDEV) += xen-gntdev.o
> +obj-$(CONFIG_XEN_GRANT_DEV_ALLOC) += xen-gntalloc.o
> obj-$(CONFIG_XEN_PCIDEV_BACKEND) += pciback/
> obj-$(CONFIG_XEN_BLKDEV_BACKEND) += blkback/
> obj-$(CONFIG_XEN_BLKDEV_TAP) += blktap/
> @@ -25,3 +26,4 @@ obj-$(CONFIG_XEN_PLATFORM_PCI) += platform-pci.o
>
> xen-evtchn-y := evtchn.o
> xen-gntdev-y := gntdev.o
> +xen-gntalloc-y := gntalloc.o
> diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
> new file mode 100644
> index 0000000..f26adfd
> --- /dev/null
> +++ b/drivers/xen/gntalloc.c
> @@ -0,0 +1,456 @@
> +/******************************************************************************
> + * gntalloc.c
> + *
> + * Device for creating grant references (in user-space) that may be shared
> + * with other domains.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
> + */
> +
> +/*
> + * This driver exists to allow userspace programs in Linux to allocate kernel
> + * memory that will later be shared with another domain. Without this
> device,
> + * Linux userspace programs cannot create grant references.
> + *
> + * How this stuff works:
> + * X -> granting a page to Y
> + * Y -> mapping the grant from X
> + *
> + * 1. X uses the gntalloc device to allocate a page of kernel memory, P.
> + * 2. X creates an entry in the grant table that says domid(Y) can
> + * access P.
> + * 3. X gives the grant reference identifier, GREF, to Y.
> + * 4. A program in Y uses the gntdev device to map the page (owned by X
> + * and identified by GREF) into domain(Y) and then into the address
> + * space of the program. Behind the scenes, this requires a
> + * hypercall in which Xen modifies the host CPU page tables to
> + * perform the sharing -- that's where the actual cross-domain mapping
> + * occurs.
> + * 5. A program in X mmap()s a segment of the gntalloc device that
> + * corresponds to the shared page.
> + * 6. The two userspace programs can now communicate over the shared page.
> + *
> + *
> + * NOTE TO USERSPACE LIBRARIES:
> + * The grant allocation and mmap()ing are, naturally, two separate
> + * operations. You set up the sharing by calling the create ioctl() and
> + * then the mmap(). You must tear down the sharing in the reverse order
> + * (munmap() and then the destroy ioctl()).
> + *
> + * WARNING: Since Xen does not allow a guest to forcibly end the use of a
> grant
> + * reference, this device can be used to consume kernel memory by leaving
> grant
> + * references mapped by another domain when an application exits. Therefore,
> + * there is a global limit on the number of pages that can be allocated. When
> + * all references to the page are unmapped, it will be freed during the next
> + * grant operation.
> + */
> +
> +#include <asm/atomic.h>
> +#include <linux/module.h>
> +#include <linux/miscdevice.h>
> +#include <linux/kernel.h>
> +#include <linux/init.h>
> +#include <linux/slab.h>
> +#include <linux/fs.h>
> +#include <linux/device.h>
> +#include <linux/mm.h>
> +#include <asm/uaccess.h>
> +#include <linux/types.h>
> +#include <linux/list.h>
> +
> +#include <xen/xen.h>
> +#include <xen/page.h>
> +#include <xen/grant_table.h>
> +#include <xen/gntalloc.h>
> +
> +static int debug = 0;
> +module_param(debug, int, 0644);
> +
> +static int limit = 1024;
> +module_param(limit, int, 0644);
> +
> +static LIST_HEAD(gref_list);
> +static DEFINE_SPINLOCK(gref_lock);
> +static int gref_size = 0;
> +
> +/* Metadata on a grant reference. */
> +struct gntalloc_gref {
> + struct list_head next_all; /* list entry gref_list */
> + struct list_head next_file; /* list entry file->list, if open */
> + domid_t foreign_domid; /* The ID of the domain to share with. */
> + grant_ref_t gref_id; /* The grant reference number. */
> + unsigned int users; /* Use count - when zero, waiting on Xen
> */
> + struct page* page; /* The shared page. */
> +};
> +
> +struct gntalloc_file_private_data {
> + struct list_head list;
> +};
> +
> +static void __del_gref(struct gntalloc_gref *gref);
> +
> +static void do_cleanup(void)
> +{
> + struct gntalloc_gref *gref, *n;
> + list_for_each_entry_safe(gref, n, &gref_list, next_all) {
> + if (!gref->users)
> + __del_gref(gref);
> + }
> +}
> +
> +
> +static int add_gref(domid_t foreign_domid, uint32_t readonly,
> + struct gntalloc_file_private_data *priv)
> +{
> + int rc;
> + struct gntalloc_gref *gref;
> +
> + rc = -ENOMEM;
> + spin_lock(&gref_lock);
> + do_cleanup();
> + if (gref_size >= limit) {
> + spin_unlock(&gref_lock);
> + rc = -ENOSPC;
> + goto out;
> + }
> + gref_size++;
> + spin_unlock(&gref_lock);
> +
> + gref = kzalloc(sizeof(*gref), GFP_KERNEL);
> + if (!gref)
> + goto out;
> +
> + gref->foreign_domid = foreign_domid;
> + gref->users = 1;
> +
> + /* Allocate the page to share. */
> + gref->page = alloc_page(GFP_KERNEL|__GFP_ZERO);
> + if (!gref->page)
> + goto out_nopage;
> +
> + /* Grant foreign access to the page. */
> + gref->gref_id = gnttab_grant_foreign_access(foreign_domid,
> + pfn_to_mfn(page_to_pfn(gref->page)), readonly);
> + if (gref->gref_id < 0) {
> + printk(KERN_ERR "%s: failed to grant foreign access for mfn "
> + "%lu to domain %u\n", __func__,
> + pfn_to_mfn(page_to_pfn(gref->page)), foreign_domid);
> + rc = -EFAULT;
> + goto out_no_foreign_gref;
> + }
> +
> + /* Add to gref lists. */
> + spin_lock(&gref_lock);
> + list_add_tail(&gref->next_all, &gref_list);
> + list_add_tail(&gref->next_file, &priv->list);
> + spin_unlock(&gref_lock);
> +
> + return gref->gref_id;
> +
> +out_no_foreign_gref:
> + __free_page(gref->page);
> +out_nopage:
> + kfree(gref);
> +out:
> + return rc;
> +}
> +
> +static void __del_gref(struct gntalloc_gref *gref)
> +{
> + if (gnttab_query_foreign_access(gref->gref_id))
> + return;
> +
> + if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
> + return;
> +
> + gref_size--;
> + list_del(&gref->next_all);
> +
> + __free_page(gref->page);
> + kfree(gref);
> +}
> +
> +static struct gntalloc_gref* find_gref(struct gntalloc_file_private_data
> *priv,
> + grant_ref_t gref_id)
> +{
> + struct gntalloc_gref *gref;
> + list_for_each_entry(gref, &priv->list, next_file) {
> + if (gref->gref_id == gref_id)
> + return gref;
> + }
> + return NULL;
> +}
> +
> +/*
> + * -------------------------------------
> + * File operations.
> + * -------------------------------------
> + */
> +static int gntalloc_open(struct inode *inode, struct file *filp)
> +{
> + struct gntalloc_file_private_data *priv;
> +
> + try_module_get(THIS_MODULE);
> +
> + priv = kzalloc(sizeof(*priv), GFP_KERNEL);
> + if (!priv)
> + goto out_nomem;
> + INIT_LIST_HEAD(&priv->list);
> +
> + filp->private_data = priv;
> +
> + if (debug)
> + printk("%s: priv %p\n", __FUNCTION__, priv);
> +
> + return 0;
> +
> +out_nomem:
> + return -ENOMEM;
> +}
> +
> +static int gntalloc_release(struct inode *inode, struct file *filp)
> +{
> + struct gntalloc_file_private_data *priv = filp->private_data;
> + struct gntalloc_gref *gref;
> +
> + if (debug)
> + printk("%s: priv %p\n", __FUNCTION__, priv);
> +
> + spin_lock(&gref_lock);
> + while (!list_empty(&priv->list)) {
> + gref = list_entry(priv->list.next,
> + struct gntalloc_gref, next_file);
> + list_del(&gref->next_file);
> + gref->users--;
> + if (gref->users == 0)
> + __del_gref(gref);
> + }
> + kfree(priv);
> + spin_unlock(&gref_lock);
> +
> + module_put(THIS_MODULE);
> +
> + return 0;
> +}
> +
> +static long gntalloc_ioctl_alloc(struct gntalloc_file_private_data *priv,
> + void __user *arg)
> +{
> + int rc = 0;
> + struct ioctl_gntalloc_alloc_gref op;
> +
> + if (debug)
> + printk("%s: priv %p\n", __FUNCTION__, priv);
> +
> + if (copy_from_user(&op, arg, sizeof(op))) {
> + rc = -EFAULT;
> + goto alloc_grant_out;
> + }
> + rc = add_gref(op.foreign_domid, op.readonly, priv);
> + if (rc < 0)
> + goto alloc_grant_out;
> +
> + op.gref_id = rc;
> + op.page_idx = rc;
> +
> + rc = 0;
> +
> + if (copy_to_user((void __user *)arg, &op, sizeof(op))) {
> + rc = -EFAULT;
> + goto alloc_grant_out;
> + }
> +
> +alloc_grant_out:
> + return rc;
> +}
> +
> +static long gntalloc_ioctl_dealloc(struct gntalloc_file_private_data *priv,
> + void __user *arg)
> +{
> + int rc = 0;
> + struct ioctl_gntalloc_dealloc_gref op;
> + struct gntalloc_gref *gref;
> +
> + if (debug)
> + printk("%s: priv %p\n", __FUNCTION__, priv);
> +
> + if (copy_from_user(&op, arg, sizeof(op))) {
> + rc = -EFAULT;
> + goto dealloc_grant_out;
> + }
> +
> + spin_lock(&gref_lock);
> + gref = find_gref(priv, op.gref_id);
> + if (gref) {
> + list_del(&gref->next_file);
> + gref->users--;
> + rc = 0;
> + } else {
> + rc = -EINVAL;
> + }
> +
> + do_cleanup();
> + spin_unlock(&gref_lock);
> +dealloc_grant_out:
> + return rc;
> +}
> +
> +static long gntalloc_ioctl(struct file *filp, unsigned int cmd,
> + unsigned long arg)
> +{
> + struct gntalloc_file_private_data *priv = filp->private_data;
> +
> + switch (cmd) {
> + case IOCTL_GNTALLOC_ALLOC_GREF:
> + return gntalloc_ioctl_alloc(priv, (void __user*)arg);
> +
> + case IOCTL_GNTALLOC_DEALLOC_GREF:
> + return gntalloc_ioctl_dealloc(priv, (void __user*)arg);
> +
> + default:
> + return -ENOIOCTLCMD;
> + }
> +
> + return 0;
> +}
> +
> +static int gntalloc_vma_fault(struct vm_area_struct *vma, struct vm_fault
> *vmf)
> +{
> + struct gntalloc_gref *gref = vma->vm_private_data;
> + if (!gref)
> + return VM_FAULT_SIGBUS;
> +
> + vmf->page = gref->page;
> + get_page(vmf->page);
> +
> + return 0;
> +};
> +
> +static void gntalloc_vma_close(struct vm_area_struct *vma)
> +{
> + struct gntalloc_gref *gref = vma->vm_private_data;
> + if (!gref)
> + return;
> +
> + spin_lock(&gref_lock);
> + gref->users--;
> + if (gref->users == 0)
> + __del_gref(gref);
> + spin_unlock(&gref_lock);
> +}
> +
> +static struct vm_operations_struct gntalloc_vmops = {
> + .fault = gntalloc_vma_fault,
> + .close = gntalloc_vma_close,
> +};
> +
> +static int gntalloc_mmap(struct file *filp, struct vm_area_struct *vma)
> +{
> + struct gntalloc_file_private_data *priv = filp->private_data;
> + struct gntalloc_gref *gref;
> +
> + if (debug)
> + printk("%s: priv %p, page %lu\n", __func__,
> + priv, vma->vm_pgoff);
> +
> + /*
> + * There is a 1-to-1 correspondence of grant references to shared
> + * pages, so it only makes sense to map exactly one page per
> + * call to mmap().
> + */
> + if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) != 1) {
> + printk(KERN_ERR "%s: Only one page can be memory-mapped "
> + "per grant reference.\n", __func__);
> + return -EINVAL;
> + }
> +
> + if (!(vma->vm_flags & VM_SHARED)) {
> + printk(KERN_ERR "%s: Mapping must be shared.\n",
> + __func__);
> + return -EINVAL;
> + }
> +
> + spin_lock(&gref_lock);
> + gref = find_gref(priv, vma->vm_pgoff);
> + if (gref == NULL) {
> + spin_unlock(&gref_lock);
> + printk(KERN_ERR "%s: Could not find a grant reference with "
> + "page index %lu.\n", __func__, vma->vm_pgoff);
> + return -ENOENT;
> + }
> + gref->users++;
> + spin_unlock(&gref_lock);
> +
> + vma->vm_private_data = gref;
> +
> + /* This flag prevents Bad PTE errors when the memory is unmapped. */
> + vma->vm_flags |= VM_RESERVED;
> + vma->vm_flags |= VM_DONTCOPY;
> + vma->vm_flags |= VM_IO;
> +
> + vma->vm_ops = &gntalloc_vmops;
> +
> + return 0;
> +}
> +
> +static const struct file_operations gntalloc_fops = {
> + .owner = THIS_MODULE,
> + .open = gntalloc_open,
> + .release = gntalloc_release,
> + .unlocked_ioctl = gntalloc_ioctl,
> + .mmap = gntalloc_mmap
> +};
> +
> +/*
> + * -------------------------------------
> + * Module creation/destruction.
> + * -------------------------------------
> + */
> +static struct miscdevice gntalloc_miscdev = {
> + .minor = MISC_DYNAMIC_MINOR,
> + .name = "xen/gntalloc",
> + .fops = &gntalloc_fops,
> +};
> +
> +static int __init gntalloc_init(void)
> +{
> + int err;
> +
> + if (!xen_domain()) {
> + if (debug)
> + printk(KERN_ERR "gntalloc: You must be running
> Xen\n");
> + return -ENODEV;
> + }
> +
> + err = misc_register(&gntalloc_miscdev);
> + if (err != 0) {
> + printk(KERN_ERR "Could not register misc gntalloc device\n");
> + return err;
> + }
> +
> + if (debug)
> + printk(KERN_INFO "Created grant allocation device at %d,%d\n",
> + MISC_MAJOR, gntalloc_miscdev.minor);
> +
> + return 0;
> +}
> +
> +static void __exit gntalloc_exit(void)
> +{
> + misc_deregister(&gntalloc_miscdev);
> +}
> +
> +module_init(gntalloc_init);
> +module_exit(gntalloc_exit);
> +
> +MODULE_LICENSE("GPL");
> +MODULE_AUTHOR("Carter Weatherly <carter.weatherly@xxxxxxxxxx>, "
> + "Daniel De Graaf <dgdegra@xxxxxxxxxxxxx>");
> +MODULE_DESCRIPTION("User-space grant reference allocator driver");
> diff --git a/include/xen/gntalloc.h b/include/xen/gntalloc.h
> new file mode 100644
> index 0000000..76b70d7
> --- /dev/null
> +++ b/include/xen/gntalloc.h
> @@ -0,0 +1,68 @@
> +/******************************************************************************
> + * gntalloc.h
> + *
> + * Interface to /dev/xen/gntalloc.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version 2
> + * as published by the Free Software Foundation; or, when distributed
> + * separately from the Linux kernel or incorporated into other
> + * software packages, subject to the following license:
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> copy
> + * of this source file (the "Software"), to deal in the Software without
> + * restriction, including without limitation the rights to use, copy, modify,
> + * merge, publish, distribute, sublicense, and/or sell copies of the
> Software,
> + * and to permit persons to whom the Software is furnished to do so, subject
> to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> THE
> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#ifndef __LINUX_PUBLIC_GNTALLOC_H__
> +#define __LINUX_PUBLIC_GNTALLOC_H__
> +
> +/*
> + * Allocates a new page and creates a new grant reference.
> + *
> + * N.B. The page_idx is really the address >> PAGE_SHIFT, meaning it's the
> + * page number and not an actual address. It must be shifted again prior
> + * to feeding it to mmap() (i.e. page_idx << PAGE_SHIFT).
> + */
> +#define IOCTL_GNTALLOC_ALLOC_GREF \
> +_IOC(_IOC_NONE, 'G', 1, sizeof(struct ioctl_gntalloc_alloc_gref))
> +struct ioctl_gntalloc_alloc_gref {
> + /* IN parameters */
> + /* The ID of the domain creating the grant reference. */
> + domid_t owner_domid;
> + /* The ID of the domain to be given access to the grant. */
> + domid_t foreign_domid;
> + /* The type of access given to domid. */
> + uint32_t readonly;
> + /* OUT parameters */
> + /* The grant reference of the newly created grant. */
> + grant_ref_t gref_id;
> + /* The page index (page number, NOT address) for grant mmap(). */
> + uint32_t page_idx;
> +};
> +
> +/*
> + * Deallocates the grant reference, freeing the associated page.
> + */
> +#define IOCTL_GNTALLOC_DEALLOC_GREF \
> +_IOC(_IOC_NONE, 'G', 2, sizeof(struct ioctl_gntalloc_dealloc_gref))
> +struct ioctl_gntalloc_dealloc_gref {
> + /* IN parameter */
> + /* The grant reference to deallocate. */
> + grant_ref_t gref_id;
> +};
> +#endif /* __LINUX_PUBLIC_GNTALLOC_H__ */
> --
> 1.7.2.3
>
>
> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
|