[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1/2] x86: add a shared page indicating the need for an EOI notification



To simplify the interface for the guest, when a guest uses this new
(sub-)hypercall, PHYSDEVOP_eoi behavior changes to unmask the
corresponding event channel at once, avoiding the eventual need for a
second hypercall from the guest.

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxxxx>

Index: 2008-11-20/xen/arch/x86/domain.c
===================================================================
--- 2008-11-20.orig/xen/arch/x86/domain.c       2008-11-20 08:56:58.000000000 
+0100
+++ 2008-11-20/xen/arch/x86/domain.c    2008-11-25 10:41:55.000000000 +0100
@@ -1823,6 +1823,14 @@ int domain_relinquish_resources(struct d
             unmap_vcpu_info(v);
         }
 
+        if ( d->arch.pirq_eoi_map )
+        {
+            unsigned long mfn = unmap_domain_page_global(d->arch.pirq_eoi_map);
+
+            d->arch.pirq_eoi_map = NULL;
+            put_page_and_type(mfn_to_page(mfn));
+        }
+
         d->arch.relmem = RELMEM_xen;
         /* fallthrough */
 
Index: 2008-11-20/xen/arch/x86/irq.c
===================================================================
--- 2008-11-20.orig/xen/arch/x86/irq.c  2008-11-20 10:13:47.000000000 +0100
+++ 2008-11-20/xen/arch/x86/irq.c       2008-11-26 15:57:06.000000000 +0100
@@ -18,6 +18,7 @@
 #include <xen/iommu.h>
 #include <asm/msi.h>
 #include <asm/current.h>
+#include <asm/flushtlb.h>
 #include <public/physdev.h>
 
 /* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
@@ -206,16 +207,42 @@ struct pending_eoi {
 static DEFINE_PER_CPU(struct pending_eoi, pending_eoi[NR_VECTORS]);
 #define pending_eoi_sp(p) ((p)[NR_VECTORS-1].vector)
 
+static inline void set_pirq_eoi(struct domain *d, unsigned int irq)
+{
+    if ( d->arch.pirq_eoi_map )
+        set_bit(irq, d->arch.pirq_eoi_map);
+}
+
+static inline void clear_pirq_eoi(struct domain *d, unsigned int irq)
+{
+    if ( d->arch.pirq_eoi_map )
+        clear_bit(irq, d->arch.pirq_eoi_map);
+}
+
+static void _irq_guest_eoi(irq_desc_t *desc)
+{
+    irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
+    unsigned int i, vector = desc - irq_desc;
+
+    if ( !(desc->status & IRQ_GUEST_EOI_PENDING) )
+        return;
+
+    for ( i = 0; i < action->nr_guests; ++i )
+        clear_pirq_eoi(action->guest[i],
+                       domain_vector_to_irq(action->guest[i], vector));
+
+    desc->status &= ~(IRQ_INPROGRESS|IRQ_GUEST_EOI_PENDING);
+    desc->handler->enable(vector);
+}
+
 static struct timer irq_guest_eoi_timer[NR_VECTORS];
 static void irq_guest_eoi_timer_fn(void *data)
 {
     irq_desc_t *desc = data;
-    unsigned vector = desc - irq_desc;
     unsigned long flags;
 
     spin_lock_irqsave(&desc->lock, flags);
-    desc->status &= ~IRQ_INPROGRESS;
-    desc->handler->enable(vector);
+    _irq_guest_eoi(desc);
     spin_unlock_irqrestore(&desc->lock, flags);
 }
 
@@ -272,8 +299,22 @@ static void __do_IRQ_guest(int vector)
 
     if ( already_pending == action->nr_guests )
     {
-        desc->handler->disable(vector);
         stop_timer(&irq_guest_eoi_timer[vector]);
+        desc->handler->disable(vector);
+        desc->status |= IRQ_GUEST_EOI_PENDING;
+        for ( i = 0; i < already_pending; ++i )
+        {
+            d = action->guest[i];
+            set_pirq_eoi(d, domain_vector_to_irq(d, vector));
+            /*
+             * Could check here whether the guest unmasked the event by now
+             * (or perhaps just re-issue the send_guest_pirq()), and if it
+             * can now accept the event,
+             * - clear all the pirq_eoi bits we already set,
+             * - re-enable the vector, and
+             * - skip the timer setup below.
+             */
+        }
         init_timer(&irq_guest_eoi_timer[vector],
                    irq_guest_eoi_timer_fn, desc, smp_processor_id());
         set_timer(&irq_guest_eoi_timer[vector], NOW() + MILLISECS(1));
@@ -382,8 +423,12 @@ static void __pirq_guest_eoi(struct doma
     action = (irq_guest_action_t *)desc->action;
     vector = desc - irq_desc;
 
-    ASSERT(!test_bit(irq, d->pirq_mask) ||
-           (action->ack_type != ACKTYPE_NONE));
+    if ( action->ack_type == ACKTYPE_NONE )
+    {
+        ASSERT(!test_bit(irq, d->pirq_mask));
+        stop_timer(&irq_guest_eoi_timer[vector]);
+        _irq_guest_eoi(desc);
+    }
 
     if ( unlikely(!test_and_clear_bit(irq, d->pirq_mask)) ||
          unlikely(--action->in_flight != 0) )
@@ -607,6 +652,11 @@ int pirq_guest_bind(struct vcpu *v, int 
 
     action->guest[action->nr_guests++] = v->domain;
 
+    if ( action->ack_type != ACKTYPE_NONE )
+        set_pirq_eoi(v->domain, irq);
+    else
+        clear_pirq_eoi(v->domain, irq);
+
  unlock_out:
     spin_unlock_irq(&desc->lock);
  out:
Index: 2008-11-20/xen/arch/x86/physdev.c
===================================================================
--- 2008-11-20.orig/xen/arch/x86/physdev.c      2008-10-13 13:36:27.000000000 
+0200
+++ 2008-11-20/xen/arch/x86/physdev.c   2008-11-25 10:42:56.000000000 +0100
@@ -191,10 +191,41 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H
         ret = -EFAULT;
         if ( copy_from_guest(&eoi, arg, 1) != 0 )
             break;
+        ret = -EINVAL;
+        if ( eoi.irq < 0 || eoi.irq >= NR_IRQS )
+            break;
+        if ( v->domain->arch.pirq_eoi_map )
+            evtchn_unmask(v->domain->pirq_to_evtchn[eoi.irq]);
         ret = pirq_guest_eoi(v->domain, eoi.irq);
         break;
     }
 
+    case PHYSDEVOP_pirq_eoi_mfn: {
+        struct physdev_pirq_eoi_mfn info;
+
+        BUILD_BUG_ON(NR_IRQS > PAGE_SIZE * 8);
+        ret = -EFAULT;
+        if ( copy_from_guest(&info, arg, 1) != 0 )
+            break;
+        ret = -EBUSY;
+        if ( v->domain->arch.pirq_eoi_map )
+            break;
+        ret = -EINVAL;
+        if ( !mfn_valid(info.mfn) ||
+             !get_page_and_type(mfn_to_page(info.mfn), v->domain,
+                                PGT_writable_page) )
+            break;
+        v->domain->arch.pirq_eoi_map = map_domain_page_global(info.mfn);
+        if ( v->domain->arch.pirq_eoi_map )
+            ret = 0;
+        else
+        {
+            put_page_and_type(mfn_to_page(info.mfn));
+            ret = -ENOSPC;
+        }
+        break;
+    }
+
     /* Legacy since 0x00030202. */
     case PHYSDEVOP_IRQ_UNMASK_NOTIFY: {
         ret = pirq_guest_unmask(v->domain);
Index: 2008-11-20/xen/arch/x86/x86_32/domain_page.c
===================================================================
--- 2008-11-20.orig/xen/arch/x86/x86_32/domain_page.c   2008-11-05 
16:54:22.000000000 +0100
+++ 2008-11-20/xen/arch/x86/x86_32/domain_page.c        2008-11-25 
10:48:06.000000000 +0100
@@ -241,9 +241,9 @@ void *map_domain_page_global(unsigned lo
     return (void *)va;
 }
 
-void unmap_domain_page_global(const void *va)
+unsigned long unmap_domain_page_global(const void *va)
 {
-    unsigned long __va = (unsigned long)va;
+    unsigned long __va = (unsigned long)va, mfn;
     l2_pgentry_t *pl2e;
     l1_pgentry_t *pl1e;
     unsigned int idx;
@@ -253,9 +253,12 @@ void unmap_domain_page_global(const void
     /* /First/, we zap the PTE. */
     pl2e = virt_to_xen_l2e(__va);
     pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(__va);
+    mfn = l1e_get_pfn(*pl1e);
     l1e_write(pl1e, l1e_empty());
 
     /* /Second/, we add to the garbage map. */
     idx = (__va - IOREMAP_VIRT_START) >> PAGE_SHIFT;
     set_bit(idx, garbage);
+
+    return mfn;
 }
Index: 2008-11-20/xen/arch/x86/x86_64/physdev.c
===================================================================
--- 2008-11-20.orig/xen/arch/x86/x86_64/physdev.c       2008-07-28 
10:57:32.000000000 +0200
+++ 2008-11-20/xen/arch/x86/x86_64/physdev.c    2008-11-26 14:32:39.000000000 
+0100
@@ -18,6 +18,9 @@
 #define physdev_eoi                compat_physdev_eoi
 #define physdev_eoi_t              physdev_eoi_compat_t
 
+#define physdev_pirq_eoi_mfn       compat_physdev_pirq_eoi_mfn
+#define physdev_pirq_eoi_mfn_t     physdev_pirq_eoi_mfn_compat_t
+
 #define physdev_set_iobitmap       compat_physdev_set_iobitmap
 #define physdev_set_iobitmap_t     physdev_set_iobitmap_compat_t
 
Index: 2008-11-20/xen/common/event_channel.c
===================================================================
--- 2008-11-20.orig/xen/common/event_channel.c  2008-10-24 11:21:38.000000000 
+0200
+++ 2008-11-20/xen/common/event_channel.c       2008-11-24 17:14:44.000000000 
+0100
@@ -762,10 +762,9 @@ long evtchn_bind_vcpu(unsigned int port,
 }
 
 
-static long evtchn_unmask(evtchn_unmask_t *unmask)
+int evtchn_unmask(unsigned int port)
 {
     struct domain *d = current->domain;
-    int            port = unmask->port;
     struct vcpu   *v;
 
     spin_lock(&d->event_lock);
@@ -916,7 +915,7 @@ long do_event_channel_op(int cmd, XEN_GU
         struct evtchn_unmask unmask;
         if ( copy_from_guest(&unmask, arg, 1) != 0 )
             return -EFAULT;
-        rc = evtchn_unmask(&unmask);
+        rc = evtchn_unmask(unmask.port);
         break;
     }
 
Index: 2008-11-20/xen/include/asm-x86/domain.h
===================================================================
--- 2008-11-20.orig/xen/include/asm-x86/domain.h        2008-11-20 
08:48:26.000000000 +0100
+++ 2008-11-20/xen/include/asm-x86/domain.h     2008-11-25 10:28:35.000000000 
+0100
@@ -238,6 +238,8 @@ struct arch_domain
     int vector_pirq[NR_VECTORS];
     s16 pirq_vector[NR_IRQS];
 
+    unsigned long *pirq_eoi_map;
+
     /* Pseudophysical e820 map (XENMEM_memory_map).  */
     struct e820entry e820[3];
     unsigned int nr_e820;
Index: 2008-11-20/xen/include/public/physdev.h
===================================================================
--- 2008-11-20.orig/xen/include/public/physdev.h        2008-08-15 
16:18:55.000000000 +0200
+++ 2008-11-20/xen/include/public/physdev.h     2008-11-24 15:16:10.000000000 
+0100
@@ -41,6 +41,21 @@ typedef struct physdev_eoi physdev_eoi_t
 DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
 
 /*
+ * Register a shared page for the hypervisor to indicate whether the guest
+ * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly
+ * once the guest used this function in that the associated event channel
+ * will automatically get unmasked. The page registered is used as a bit
+ * array indexed by Xen's PIRQ value.
+ */
+#define PHYSDEVOP_pirq_eoi_mfn          17
+struct physdev_pirq_eoi_mfn {
+    /* IN */
+    xen_pfn_t mfn;
+};
+typedef struct physdev_pirq_eoi_mfn physdev_pirq_eoi_mfn_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_mfn_t);
+
+/*
  * Query the status of an IRQ line.
  * @arg == pointer to physdev_irq_status_query structure.
  */
Index: 2008-11-20/xen/include/xen/domain_page.h
===================================================================
--- 2008-11-20.orig/xen/include/xen/domain_page.h       2008-11-05 
16:54:22.000000000 +0100
+++ 2008-11-20/xen/include/xen/domain_page.h    2008-11-26 15:45:25.000000000 
+0100
@@ -32,7 +32,7 @@ void unmap_domain_page(const void *va);
  * mappings can also be unmapped from any context.
  */
 void *map_domain_page_global(unsigned long mfn);
-void unmap_domain_page_global(const void *va);
+unsigned long unmap_domain_page_global(const void *va);
 
 #define DMCACHE_ENTRY_VALID 1U
 #define DMCACHE_ENTRY_HELD  2U
@@ -99,8 +99,15 @@ domain_mmap_cache_destroy(struct domain_
 #define map_domain_page(mfn)                mfn_to_virt(mfn)
 #define unmap_domain_page(va)               ((void)(va))
 
-#define map_domain_page_global(mfn)         mfn_to_virt(mfn)
-#define unmap_domain_page_global(va)        ((void)(va))
+static inline void *map_domain_page_global(unsigned long mfn)
+{
+    return mfn_to_virt(mfn);
+}
+
+static inline unsigned long unmap_domain_page_global(void *va)
+{
+    return virt_to_mfn(va);
+}
 
 struct domain_mmap_cache { 
 };
Index: 2008-11-20/xen/include/xen/event.h
===================================================================
--- 2008-11-20.orig/xen/include/xen/event.h     2008-09-19 14:28:28.000000000 
+0200
+++ 2008-11-20/xen/include/xen/event.h  2008-11-24 17:12:01.000000000 +0100
@@ -44,6 +44,9 @@ int evtchn_send(struct domain *d, unsign
 /* Bind a local event-channel port to the specified VCPU. */
 long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id);
 
+/* unmask the current domain's event-channel port. */
+int evtchn_unmask(unsigned int port);
+
 /* Allocate/free a Xen-attached event channel port. */
 int alloc_unbound_xen_event_channel(
     struct vcpu *local_vcpu, domid_t remote_domid);
Index: 2008-11-20/xen/include/xen/irq.h
===================================================================
--- 2008-11-20.orig/xen/include/xen/irq.h       2008-11-20 08:47:05.000000000 
+0100
+++ 2008-11-20/xen/include/xen/irq.h    2008-11-26 09:19:15.000000000 +0100
@@ -22,6 +22,7 @@ struct irqaction
 #define IRQ_PENDING    4       /* IRQ pending - replay on enable */
 #define IRQ_REPLAY     8       /* IRQ has been replayed but not acked yet */
 #define IRQ_GUEST       16      /* IRQ is handled by guest OS(es) */
+#define IRQ_GUEST_EOI_PENDING 32 /* IRQ was disabled, pending a guest EOI */
 #define IRQ_PER_CPU     256     /* IRQ is per CPU */
 
 /*



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.