[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 2/9] Linux kernel infrastructure for Xen Share access



On the Linux kernel side, we provide some wrappers for accessing
shared pages.  They are currently reference-counted, because a future
patch allows userspace to access shared pages, and the Xen interface
will refuse the second request for access by the same domain.

The entire hypercall interface is arch-wrapped, which is probably
overkill, but I wasn't entirely sure of the needs of non-x86
architectures.  Some of this should almost certainly be in common code.

diff -r 6d476981e3a5 -r 07a00d96357d 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/share.h
--- /dev/null   Sun May 28 14:49:17 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/share.h        Wed May 
31 05:33:38 2006
@@ -0,0 +1,62 @@
+#ifndef __ASM_XEN_I386_SHARE_H
+#define __ASM_XEN_I386_SHARE_H
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <xen/interface/share.h>
+
+struct xen_share
+{
+       struct list_head list;
+       atomic_t use;
+       share_ref_t share_ref;
+       unsigned num_pages;
+       void *addr;
+       int event_channel;
+       int peerid;
+       int irq;
+       struct list_head handlers;
+};
+
+struct xen_share_handler
+{
+       struct list_head list;
+       void (*handler)(struct xen_share_handler *h);
+};
+
+/* Map a shared area.  Returns PTR_ERR(errno) on fail. */
+struct xen_share *xen_share_get(share_ref_t share_ref, unsigned pages);
+
+/* Set up handler for events. */
+void xen_share_add_handler(struct xen_share *s, struct xen_share_handler *h);
+
+/* Remove handler. */
+void xen_share_remove_handler(struct xen_share *s,
+                             struct xen_share_handler *h);
+
+/* Unmap a shared area (irq unbound if not done already). */
+void xen_share_put(struct xen_share *share);
+
+/* Register this sg list (physical kernel addresses).  Returns 0 on success. */
+int xen_sg_register(struct xen_share *share, int dirmask, u32 queue, u32 *lenp,
+                   unsigned int num_sgs, const struct xen_sg sg[]);
+
+/* Unregister this sg list: give first phys address of sg. */
+void xen_sg_unregister(struct xen_share *share, unsigned long sgaddr);
+
+/* Transfer this sg list (physical kernel addresses).  Returns len xferred. */
+int xen_sg_xfer(struct xen_share *share, u32 queue, int dir,
+               unsigned int num_sgs, const struct xen_sg sg[]);
+
+/* Place watch on this trigger.  Returns 0 on success. */
+int xen_share_watch(struct xen_share *share, int triggernum, u32 *resultp);
+
+/* Remove watch on this trigger. */
+void xen_share_unwatch(struct xen_share *share, int triggernum);
+
+/* Trigger a watch.  Returns num watching on success. */
+int xen_share_trigger(struct xen_share *share, int triggernum);
+
+/* Map a share into a vma (for userspace mmap). */
+int xen_share_map(struct xen_share *share, struct vm_area_struct *vma);
+#endif /* __ASM_XEN_I386_SHARE_H */
diff -r 6d476981e3a5 -r 07a00d96357d 
linux-2.6-xen-sparse/arch/i386/kernel/share-xen.c
--- /dev/null   Sun May 28 14:49:17 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/share-xen.c Wed May 31 05:33:38 2006
@@ -0,0 +1,280 @@
+/* x86 layer for share hypercalls.
+ * Copyright 2006 Rusty Russell <rusty@xxxxxxxxxxxxxxx> IBM Corporation
+ * 
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+#include <linux/sched.h>
+#include <linux/page-flags.h>
+#include <linux/vmalloc.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/spinlock.h>
+#include <asm/semaphore.h>
+#include <asm/share.h>
+#include <asm/io.h>
+#include <xen/evtchn.h>
+#include <asm/hypervisor.h>
+
+/* We only request each area from the hypervisor once, so track them. */
+static DECLARE_MUTEX(share_lock);
+static spinlock_t handler_lock = SPIN_LOCK_UNLOCKED;
+static LIST_HEAD(shares);
+
+static int get_evtchn_port(void)
+{
+       int err;
+       struct evtchn_alloc_unbound evtchn = { .dom = DOMID_SELF,
+                                              .remote_dom = DOMID_SELF };
+
+       err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &evtchn);
+       if (err)
+               return err;
+
+       return evtchn.port;
+}
+
+static void close_evtchn_port(int port)
+{
+       struct evtchn_close evtchn;
+       evtchn.port = port;
+       BUG_ON(HYPERVISOR_event_channel_op(EVTCHNOP_close, &evtchn) != 0);
+}      
+
+static struct xen_share *get_share(share_ref_t share_ref)
+{
+       struct xen_share *i;
+
+       list_for_each_entry(i, &shares, list) {
+               if (i->share_ref == share_ref) {
+                       atomic_inc(&i->use);
+                       return i;
+               }
+       }
+       return NULL;
+}
+
+static irqreturn_t share_irq(int irq, void *share_, struct pt_regs *regs)
+{
+       struct xen_share *share = share_;
+       struct xen_share_handler *h;
+
+       list_for_each_entry(h, &share->handlers, list)
+               h->handler(h);
+       return IRQ_HANDLED;
+}
+
+struct xen_share *create_share(share_ref_t share_ref, unsigned pages)
+{
+       pgprot_t prot;
+       int err;
+       struct vm_struct *vma;
+       struct xen_share *share;
+
+       share = kmalloc(sizeof(struct xen_share), GFP_KERNEL);
+       if (!share) {
+               err = -ENOMEM;
+               goto fail;
+       }
+
+       share->share_ref = share_ref;
+       share->num_pages = pages;
+       atomic_set(&share->use, 1);
+       INIT_LIST_HEAD(&share->handlers);
+       vma = get_vm_area(pages * PAGE_SIZE, VM_IOREMAP);
+       if (!vma) {
+               err = -ENOMEM;
+               goto free_share;
+       }
+
+       share->event_channel = get_evtchn_port();
+       if (share->event_channel < 0) {
+               err = share->event_channel;
+               goto free_vma;
+       }
+
+       err = bind_evtchn_to_irqhandler(share->event_channel, share_irq,
+                                       SA_SHIRQ, "xenshare", share);
+       if (err < 0)
+               goto close_evtchn;
+       share->irq = err;
+
+       share->peerid = HYPERVISOR_share(XEN_SHARE_get, share_ref,
+                                        share->event_channel, 0, 0);
+       if (share->peerid < 0) {
+               err = share->peerid;
+               goto unbind_evtchn;
+       }
+
+       prot = __pgprot(_PAGE_PRESENT|_PAGE_RW|_PAGE_DIRTY|_PAGE_ACCESSED);
+       err = direct_kernel_remap_pfn_range((unsigned long)vma->addr,
+                                           share_ref, pages * PAGE_SIZE,
+                                           prot, DOMID_SELF);
+       if (err)
+               goto put_share;
+       share->addr = vma->addr;
+       list_add(&share->list, &shares);
+
+       return share;
+
+put_share:
+       BUG_ON(HYPERVISOR_share(XEN_SHARE_drop,share->share_ref,0,0,0) != 0);
+unbind_evtchn:
+       unbind_from_irqhandler(share->irq, share);
+       goto free_vma;
+close_evtchn:
+       close_evtchn_port(share->event_channel);
+free_vma:
+       kfree(vma);
+free_share:
+       kfree(share);
+fail:
+       return ERR_PTR(err);
+}
+
+/* Map a shared area.  Returns PTR_ERR(errno) on fail. */
+struct xen_share *xen_share_get(share_ref_t share_ref, unsigned pages)
+{
+       struct xen_share *share;
+
+       down(&share_lock);
+       share = get_share(share_ref);
+       if (share)
+               BUG_ON(share->num_pages != pages);
+       else
+               share = create_share(share_ref, pages);
+       up(&share_lock);
+
+       return share;
+}
+
+void xen_share_add_handler(struct xen_share *s, struct xen_share_handler *h)
+{
+       spin_lock_irq(&handler_lock);
+       list_add(&h->list, &s->handlers);
+       spin_unlock_irq(&handler_lock);
+}
+
+/* Remove irq handler. */
+void xen_share_remove_handler(struct xen_share *s, struct xen_share_handler *h)
+{
+       BUG_ON(list_empty(&s->handlers));
+       spin_lock_irq(&handler_lock);
+       list_del(&h->list);
+       spin_unlock_irq(&handler_lock);
+}
+
+/* Unmap a shared area. */
+void xen_share_put(struct xen_share *share)
+{
+       down(&share_lock);
+       if (atomic_dec_and_test(&share->use)) {
+               BUG_ON(!list_empty(&share->handlers));
+               unbind_from_irqhandler(share->irq, share);
+
+               /* This also kfrees vma. */
+               vunmap(share->addr);
+               BUG_ON(HYPERVISOR_share(XEN_SHARE_drop, share->share_ref, 0,
+                                       0, 0) != 0);
+               list_del(&share->list);
+               kfree(share);
+       }
+       up(&share_lock);
+}
+
+/* Register this sg list (physical kernel addresses).  Returns 0 on success. */
+int xen_sg_register(struct xen_share *s, int dirmask, u32 queue, u32 *lenp,
+                   unsigned int num_sgs, const struct xen_sg sg[])
+{
+       struct xen_sg new_sg[XEN_SG_MAX];
+       unsigned int i;
+
+       /* We feed machine addresses to hypervisor. */
+       for (i = 0; i < num_sgs; i++) {
+               new_sg[i].addr = phys_to_machine(sg[i].addr);
+               new_sg[i].len = sg[i].len;
+       }
+
+       return HYPERVISOR_share(XEN_SHARE_sg_register, s->share_ref,
+                               xen_share_sg_arg(queue, num_sgs, dirmask),
+                               (long)new_sg,
+                               virt_to_machine(lenp));
+}
+
+/* Unregister this sg list. */
+void xen_sg_unregister(struct xen_share *s, unsigned long addr)
+{
+       BUG_ON(HYPERVISOR_share(XEN_SHARE_sg_unregister, s->share_ref,
+                               phys_to_machine(addr), 0, 0) != 0);
+}
+
+/* Transfer this sg list (physical kernel addresses).  Returns len xferred. */
+int xen_sg_xfer(struct xen_share *s, u32 queue, int dir,
+               unsigned int num_sgs, const struct xen_sg sg[])
+{
+       struct xen_sg new_sg[XEN_SG_MAX];
+       unsigned int i;
+
+       /* Hypervisor wants virtual addresses here. */
+       for (i = 0; i < num_sgs; i++) {
+               new_sg[i].addr = (long)phys_to_virt(sg[i].addr);
+               new_sg[i].len = sg[i].len;
+       }
+
+       return HYPERVISOR_share(XEN_SHARE_sg_xfer, s->share_ref,
+                               xen_share_sg_arg(queue, num_sgs, dir),
+                               (long)new_sg, 0);
+}
+
+/* Place watch on this trigger.  Returns 0 on success. */
+int xen_share_watch(struct xen_share *s, int triggernum, u32 *resultp)
+{
+       return HYPERVISOR_share(XEN_SHARE_watch, s->share_ref, triggernum,
+                               virt_to_machine(resultp), 0);
+}
+
+/* Remove watch on this trigger. */
+void xen_share_unwatch(struct xen_share *s, int triggernum)
+{
+       BUG_ON(HYPERVISOR_share(XEN_SHARE_unwatch, s->share_ref, triggernum,
+                               0, 0) != 0);
+}
+
+/* Trigger a watch.  Returns num watching on success. */
+int xen_share_trigger(struct xen_share *s, int trigger)
+{
+       return HYPERVISOR_share(XEN_SHARE_trigger, s->share_ref, trigger,0,0);
+}
+
+int xen_share_map(struct xen_share *s, struct vm_area_struct *vma)
+{
+       vma->vm_flags |= VM_RESERVED | VM_IO | VM_DONTCOPY;
+       return direct_remap_pfn_range(vma, vma->vm_start,
+                                     s->share_ref,
+                                     s->num_pages * PAGE_SIZE,
+                                     vma->vm_page_prot, DOMID_SELF);
+}
+
+EXPORT_SYMBOL_GPL(xen_share_get);
+EXPORT_SYMBOL_GPL(xen_share_put);
+EXPORT_SYMBOL_GPL(xen_share_map);
+EXPORT_SYMBOL_GPL(xen_share_trigger);
+EXPORT_SYMBOL_GPL(xen_share_watch);
+EXPORT_SYMBOL_GPL(xen_share_unwatch);
+EXPORT_SYMBOL_GPL(xen_sg_xfer);
+EXPORT_SYMBOL_GPL(xen_sg_register);
+EXPORT_SYMBOL_GPL(xen_sg_unregister);
+EXPORT_SYMBOL_GPL(xen_share_add_handler);
+EXPORT_SYMBOL_GPL(xen_share_remove_handler);
diff -r 6d476981e3a5 -r 07a00d96357d 
linux-2.6-xen-sparse/arch/i386/kernel/Makefile
--- a/linux-2.6-xen-sparse/arch/i386/kernel/Makefile    Sun May 28 14:49:17 2006
+++ b/linux-2.6-xen-sparse/arch/i386/kernel/Makefile    Wed May 31 05:33:38 2006
@@ -88,6 +88,7 @@
 include $(srctree)/scripts/Makefile.xen
 
 obj-y += fixup.o
+obj-y += share-xen.o
 microcode-$(subst m,y,$(CONFIG_MICROCODE)) := microcode-xen.o
 n-obj-xen := i8259.o timers/ reboot.o smpboot.o trampoline.o
 
diff -r 6d476981e3a5 -r 07a00d96357d 
linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c
--- a/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c   Sun May 28 14:49:17 2006
+++ b/linux-2.6-xen-sparse/arch/i386/mm/ioremap-xen.c   Wed May 31 05:33:38 2006
@@ -123,8 +123,11 @@
        /* Same as remap_pfn_range(). */
        vma->vm_flags |= VM_IO | VM_RESERVED;
 
+       /* FIXME: xenshare needs to pass DOMID_SELF. Check it's save to remove
+        * the check.
        if (domid == DOMID_SELF)
                return -EINVAL;
+       */
 
        return __direct_remap_pfn_range(
                vma->vm_mm, address, mfn, size, prot, domid);
diff -r 6d476981e3a5 -r 07a00d96357d 
linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h
--- a/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h    Sun May 
28 14:49:17 2006
+++ b/linux-2.6-xen-sparse/include/asm-i386/mach-xen/asm/hypercall.h    Wed May 
31 05:33:38 2006
@@ -359,5 +359,11 @@
        return _hypercall2(int, xenoprof_op, op, arg);
 }
 
+static inline long
+HYPERVISOR_share(
+       int op, long arg1, long arg2, long arg3, long arg4)
+{
+       return _hypercall5(long, share_op, op, arg1, arg2, arg3, arg4);
+}
 
 #endif /* __HYPERCALL_H__ */
diff -r 6d476981e3a5 -r 07a00d96357d patches/linux-2.6.12/get_vm_area.patch
--- /dev/null   Sun May 28 14:49:17 2006
+++ b/patches/linux-2.6.12/get_vm_area.patch    Wed May 31 05:33:38 2006
@@ -0,0 +1,9 @@
+diff -Naur linux-2.6.12/mm/vmalloc.c linux-2.6.12.post/mm/vmalloc.c
+--- linux-2.6.12/mm/vmalloc.c    2005-06-18 05:48:29.000000000 +1000
++++ linux-2.6.12.post/mm/vmalloc.c        2006-01-10 16:56:36.000000000 +1100
+@@ -247,6 +247,7 @@
+ {
+        return __get_vm_area(size, flags, VMALLOC_START, VMALLOC_END);
+ }
++EXPORT_SYMBOL(get_vm_area);
+

-- 
 ccontrol: http://ccontrol.ozlabs.org


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.