[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH RFC] xen: arm: context switch vtimer PPI state.



... instead of artificially masking the timer interrupt in the timer
state and relying on the guest to unmask (which it isn't required to
do per the h/w spec, although Linux does)

To do this introduce the concept of routing a PPI to the currently
running VCPU. For such interrupts irq_get_domain returns NULL.

Then we make use of the GICD I[SC]ACTIVER registers to save and
restore the active state of the interrupt, which prevents the nested
invocations which the current masking works around.

For edge triggered interrupts we also need to context switch the
pending bit via I[SC]PENDR. Note that for level triggered interrupts
SPENDR sets a latch which is only cleared by ICPENDR (and not by h/w
state changes), therefore we do not want to context switch the pending
state for level PPIs -- instead we rely on the context switch of the
peripheral to restore the correct level.

RFC Only:
 - Not implemented for GIC v3 yet.
 - Lightly tested with hackbench on systems with level and edge
   triggered vtimer PPI.
 - Is irq_get_domain == NULL to indicate route-to-current-vcpu the
   best idea? Any alternatives?

Signed-off-by: Ian Campbell <ian.campbell@xxxxxxxxxx>
---
 xen/arch/arm/gic-v2.c        |   84 ++++++++++++++++++++++++++++++++++++++++++
 xen/arch/arm/gic.c           |   32 +++++++++++++---
 xen/arch/arm/irq.c           |   48 ++++++++++++++++++++++--
 xen/arch/arm/time.c          |   26 +------------
 xen/arch/arm/vtimer.c        |   24 ++++++++++--
 xen/include/asm-arm/domain.h |   11 ++++++
 xen/include/asm-arm/gic.h    |   14 +++++++
 xen/include/asm-arm/irq.h    |    1 +
 8 files changed, 204 insertions(+), 36 deletions(-)

diff --git a/xen/arch/arm/gic-v2.c b/xen/arch/arm/gic-v2.c
index 31fb81a..9074aca 100644
--- a/xen/arch/arm/gic-v2.c
+++ b/xen/arch/arm/gic-v2.c
@@ -156,6 +156,45 @@ static void gicv2_save_state(struct vcpu *v)
     writel_gich(0, GICH_HCR);
 }
 
+static void gicv2_save_and_mask_hwppi(struct vcpu *v, const unsigned virq,
+                                      struct hwppi_state *s)
+{
+    struct pending_irq *p = irq_to_pending(v, virq);
+    const unsigned int offs = virq / 32;
+    const unsigned int mask = (1u << (virq % 32));
+    const unsigned int pendingr = readl_gicd(GICD_ISPENDR + offs*4);
+    const unsigned int activer = readl_gicd(GICD_ISACTIVER + offs*4);
+    const unsigned int enabler = readl_gicd(GICD_ISENABLER + offs*4);
+    const bool is_edge = !!(p->desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH);
+
+    ASSERT(!is_idle_vcpu(v));
+
+    s->active = !!(activer & mask);
+    s->enabled = !!(enabler & mask);
+    s->pending = !!(pendingr & mask);
+    s->inprogress = test_and_clear_bit(_IRQ_INPROGRESS, &p->desc->status);
+
+    /* Write a 1 to IC...R to clear the corresponding bit of state */
+    if ( s->active )
+        writel_gicd(mask, GICD_ICACTIVER + offs*4);
+    /*
+     * For an edge interrupt clear the pending state, for a level interrupt
+     * this clears the latch there is no need since saving the peripheral state
+     * (and/or restoring the next VCPU) will cause the correct action.
+     */
+    if ( is_edge && s->pending )
+        writel_gicd(mask, GICD_ICPENDR + offs*4);
+
+    if ( s->enabled )
+    {
+        writel_gicd(mask, GICD_ICENABLER + offs*4);
+        set_bit(_IRQ_DISABLED, &p->desc->status);
+    }
+
+    ASSERT(!(readl_gicd(GICD_ISACTIVER + offs*4) & mask));
+    ASSERT(!(readl_gicd(GICD_ISENABLER + offs*4) & mask));
+}
+
 static void gicv2_restore_state(const struct vcpu *v)
 {
     int i;
@@ -168,6 +207,49 @@ static void gicv2_restore_state(const struct vcpu *v)
     writel_gich(GICH_HCR_EN, GICH_HCR);
 }
 
+static void gicv2_restore_hwppi(struct vcpu *v, const unsigned virq,
+                                const struct hwppi_state *s)
+{
+    struct pending_irq *p = irq_to_pending(v, virq);
+    const unsigned int offs = virq / 32;
+    const unsigned int mask = (1u << (virq % 32));
+    struct irq_desc *desc = irq_to_desc(virq);
+    const bool is_edge = !!(desc->arch.type & DT_IRQ_TYPE_EDGE_BOTH);
+    struct pending_irq *pending = irq_to_pending(v, virq);
+
+    pending->desc = desc; /* Migrate to new physical processor */
+
+    /*
+     * The IRQ must always have been set inactive and masked etc by
+     * the saving of the previous state via save_and_mask_hwppi.
+     */
+    ASSERT(!(readl_gicd(GICD_ISACTIVER + offs*4) & mask));
+    ASSERT(!(readl_gicd(GICD_ISENABLER + offs*4) & mask));
+
+    if ( s->active )
+        writel_gicd(mask, GICD_ICACTIVER + offs*4);
+
+    /*
+     * Restore pending state for edge triggered interrupts only. For
+     * level triggered interrupts the level will be restored as
+     * necessary by restoring the state of the relevant peripheral.
+     *
+     * For a level triggered interrupt ISPENDR acts as a *latch* which
+     * is only cleared by ICPENDR (i.e. the input level is no longer
+     * relevant). We certainly do not want that here.
+     */
+    if ( is_edge && s->pending )
+        writel_gicd(mask, GICD_ISPENDR + offs*4);
+    if ( s->enabled )
+    {
+        clear_bit(_IRQ_DISABLED, &p->desc->status);
+        dsb(sy);
+        writel_gicd(mask, GICD_ISENABLER + offs*4);
+    }
+    if ( s->inprogress )
+        set_bit(_IRQ_INPROGRESS, &p->desc->status);
+}
+
 static void gicv2_dump_state(const struct vcpu *v)
 {
     int i;
@@ -660,7 +742,9 @@ const static struct gic_hw_operations gicv2_ops = {
     .info                = &gicv2_info,
     .secondary_init      = gicv2_secondary_cpu_init,
     .save_state          = gicv2_save_state,
+    .save_and_mask_hwppi = gicv2_save_and_mask_hwppi,
     .restore_state       = gicv2_restore_state,
+    .restore_hwppi       = gicv2_restore_hwppi,
     .dump_state          = gicv2_dump_state,
     .gicv_setup          = gicv2v_setup,
     .gic_host_irq_type   = &gicv2_host_irq_type,
diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index 7d4ee19..7ea980d 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -81,6 +81,12 @@ void gic_save_state(struct vcpu *v)
     isb();
 }
 
+void gic_save_and_mask_hwppi(struct vcpu *v, unsigned virq,
+                             struct hwppi_state *s)
+{
+    gic_hw_ops->save_and_mask_hwppi(v, virq, s);
+}
+
 void gic_restore_state(struct vcpu *v)
 {
     ASSERT(!local_irq_is_enabled());
@@ -94,6 +100,12 @@ void gic_restore_state(struct vcpu *v)
     gic_restore_pending_irqs(v);
 }
 
+void gic_restore_hwppi(struct vcpu *v, const unsigned virq,
+                       const struct hwppi_state *s)
+{
+    gic_hw_ops->restore_hwppi(v, virq, s);
+}
+
 /*
  * needs to be called with a valid cpu_mask, ie each cpu in the mask has
  * already called gic_cpu_init
@@ -125,11 +137,15 @@ void gic_route_irq_to_xen(struct irq_desc *desc, const 
cpumask_t *cpu_mask,
 
 /* Program the GIC to route an interrupt to a guest
  *   - desc.lock must be held
+ *   - d may be NULL, in which case interrupt *must* be a PPI and is routed to
+ *     the vcpu currently running when that PPI fires. In this case the code
+ *     responsible for the related hardware must save and restore the PPI with
+ *     gic_save_and_mask_hwppi/gic_restore_hwppi.
+ *   - if d is non-NULL then the interrupt *must* be an SPI.
  */
 void gic_route_irq_to_guest(struct domain *d, struct irq_desc *desc,
                             const cpumask_t *cpu_mask, unsigned int priority)
 {
-    struct pending_irq *p;
     ASSERT(spin_is_locked(&desc->lock));
 
     desc->handler = gic_hw_ops->gic_guest_irq_type;
@@ -137,10 +153,16 @@ void gic_route_irq_to_guest(struct domain *d, struct 
irq_desc *desc,
 
     gic_set_irq_properties(desc, cpumask_of(smp_processor_id()), GIC_PRI_IRQ);
 
-    /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
-     * route SPIs to guests, it doesn't make any difference. */
-    p = irq_to_pending(d->vcpu[0], desc->irq);
-    p->desc = desc;
+    if ( d )
+    {
+        struct pending_irq *p;
+
+        /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
+         * route SPIs to guests, it doesn't make any difference. */
+        p = irq_to_pending(d->vcpu[0], desc->irq);
+        p->desc = desc;
+    }
+    /* else p->desc init'd on ctxt restore in gic_restore_hwppi */
 }
 
 int gic_irq_xlate(const u32 *intspec, unsigned int intsize,
diff --git a/xen/arch/arm/irq.c b/xen/arch/arm/irq.c
index ebdf19a..93c38ff 100644
--- a/xen/arch/arm/irq.c
+++ b/xen/arch/arm/irq.c
@@ -202,6 +202,19 @@ void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, 
int is_fiq)
         goto out;
     }
 
+    if ( irq == current->arch.virt_timer.irq )
+    {
+        ASSERT(!irq_get_domain(desc));
+
+        desc->handler->end(desc);
+
+        set_bit(_IRQ_INPROGRESS, &desc->status);
+        desc->arch.eoi_cpu = smp_processor_id();
+
+        vgic_vcpu_inject_irq(current, irq);
+        goto out_no_end;
+    }
+
     if ( test_bit(_IRQ_GUEST, &desc->status) )
     {
         struct domain *d = irq_get_domain(desc);
@@ -346,8 +359,12 @@ int setup_irq(unsigned int irq, unsigned int irqflags, 
struct irqaction *new)
         struct domain *d = irq_get_domain(desc);
 
         spin_unlock_irqrestore(&desc->lock, flags);
-        printk(XENLOG_ERR "ERROR: IRQ %u is already in use by the domain %u\n",
-               irq, d->domain_id);
+        if ( d )
+            printk(XENLOG_ERR "ERROR: IRQ %u is already in use by domain %u\n",
+                   irq, d->domain_id);
+        else
+            printk(XENLOG_ERR
+                   "ERROR: IRQ %u is already in use by <current-vcpu>\n", irq);
         return -EBUSY;
     }
 
@@ -378,6 +395,10 @@ err:
     return rc;
 }
 
+/*
+ * If d == NULL then IRQ is routed to current vcpu at time it is received and
+ * must be a PPI.
+ */
 int route_irq_to_guest(struct domain *d, unsigned int irq,
                        const char * devname)
 {
@@ -386,6 +407,8 @@ int route_irq_to_guest(struct domain *d, unsigned int irq,
     unsigned long flags;
     int retval = 0;
 
+    ASSERT( d || ( irq >=16 && irq < 32 ) );
+
     action = xmalloc(struct irqaction);
     if (!action)
         return -ENOMEM;
@@ -406,11 +429,23 @@ int route_irq_to_guest(struct domain *d, unsigned int irq,
         struct domain *ad = irq_get_domain(desc);
 
         if ( test_bit(_IRQ_GUEST, &desc->status) && d == ad )
+        {
+            printk(XENLOG_ERR
+                   "ERROR: IRQ %u is already routed to domain %p\n",
+                   irq, ad);
             goto out;
+        }
 
         if ( test_bit(_IRQ_GUEST, &desc->status) )
-            printk(XENLOG_ERR "ERROR: IRQ %u is already used by domain %u\n",
-                   irq, ad->domain_id);
+        {
+            if ( ad )
+                printk(XENLOG_ERR
+                       "ERROR: IRQ %u is already used by domain %u\n",
+                       irq, ad->domain_id);
+            else
+                printk(XENLOG_ERR
+                       "ERROR: IRQ %u is already used by <current-vcpu>\n", 
irq);
+        }
         else
             printk(XENLOG_ERR "ERROR: IRQ %u is already used by Xen\n", irq);
         retval = -EBUSY;
@@ -433,6 +468,11 @@ out:
     return retval;
 }
 
+int route_irq_to_current_vcpu(unsigned int irq, const char *devname)
+{
+    return route_irq_to_guest(NULL, irq, devname);
+}
+
 /*
  * pirq event channels. We don't use these on ARM, instead we use the
  * features of the GIC to inject virtualised normal interrupts.
diff --git a/xen/arch/arm/time.c b/xen/arch/arm/time.c
index 455f217..ffa5eef 100644
--- a/xen/arch/arm/time.c
+++ b/xen/arch/arm/time.c
@@ -169,28 +169,6 @@ static void timer_interrupt(int irq, void *dev_id, struct 
cpu_user_regs *regs)
     }
 }
 
-static void vtimer_interrupt(int irq, void *dev_id, struct cpu_user_regs *regs)
-{
-    /*
-     * Edge-triggered interrupts can be used for the virtual timer. Even
-     * if the timer output signal is masked in the context switch, the
-     * GIC will keep track that of any interrupts raised while IRQS are
-     * disabled. As soon as IRQs are re-enabled, the virtual interrupt
-     * will be injected to Xen.
-     *
-     * If an IDLE vCPU was scheduled next then we should ignore the
-     * interrupt.
-     */
-    if ( unlikely(is_idle_vcpu(current)) )
-        return;
-
-    perfc_incr(virt_timer_irqs);
-
-    current->arch.virt_timer.ctl = READ_SYSREG32(CNTV_CTL_EL0);
-    WRITE_SYSREG32(current->arch.virt_timer.ctl | CNTx_CTL_MASK, CNTV_CTL_EL0);
-    vgic_vcpu_inject_irq(current, current->arch.virt_timer.irq);
-}
-
 /* Set up the timer interrupt on this CPU */
 void __cpuinit init_timer_interrupt(void)
 {
@@ -204,8 +182,8 @@ void __cpuinit init_timer_interrupt(void)
 
     request_irq(timer_irq[TIMER_HYP_PPI], 0, timer_interrupt,
                 "hyptimer", NULL);
-    request_irq(timer_irq[TIMER_VIRT_PPI], 0, vtimer_interrupt,
-                   "virtimer", NULL);
+    route_irq_to_current_vcpu(timer_irq[TIMER_VIRT_PPI], "virtimer");
+
     request_irq(timer_irq[TIMER_PHYS_NONSECURE_PPI], 0, timer_interrupt,
                 "phytimer", NULL);
 }
diff --git a/xen/arch/arm/vtimer.c b/xen/arch/arm/vtimer.c
index 848e2c6..d1f21a1 100644
--- a/xen/arch/arm/vtimer.c
+++ b/xen/arch/arm/vtimer.c
@@ -47,9 +47,14 @@ static void phys_timer_expired(void *data)
 static void virt_timer_expired(void *data)
 {
     struct vtimer *t = data;
-    t->ctl |= CNTx_CTL_MASK;
-    vgic_vcpu_inject_irq(t->v, t->irq);
-    perfc_incr(vtimer_virt_inject);
+    t->ctl |= CNTx_CTL_PENDING;
+    if ( !(t->ctl & CNTx_CTL_MASK) )
+    {
+        /* An edge triggered interrupt should now be pending. */
+        t->ppi_state.pending = true;
+        vcpu_unblock(t->v);
+        perfc_incr(vtimer_virt_inject);
+    }
 }
 
 int domain_vtimer_init(struct domain *d)
@@ -112,6 +117,15 @@ int virt_timer_save(struct vcpu *v)
         set_timer(&v->arch.virt_timer.timer, 
ticks_to_ns(v->arch.virt_timer.cval +
                   v->domain->arch.virt_timer_base.offset - boot_count));
     }
+
+    /*
+     * Since the vtimer irq is a PPI we don't need to worry about
+     * racing against it becoming active while we are saving the
+     * state, since that requires the guest to be reading the IAR.
+     */
+    gic_save_and_mask_hwppi(v, v->arch.virt_timer.irq,
+                            &v->arch.virt_timer.ppi_state);
+
     return 0;
 }
 
@@ -126,6 +140,10 @@ int virt_timer_restore(struct vcpu *v)
     WRITE_SYSREG64(v->domain->arch.virt_timer_base.offset, CNTVOFF_EL2);
     WRITE_SYSREG64(v->arch.virt_timer.cval, CNTV_CVAL_EL0);
     WRITE_SYSREG32(v->arch.virt_timer.ctl, CNTV_CTL_EL0);
+
+    gic_restore_hwppi(v, v->arch.virt_timer.irq,
+                      &v->arch.virt_timer.ppi_state);
+
     return 0;
 }
 
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index 90ab9c3..dd50e2c 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -34,12 +34,23 @@ enum domain_type {
 extern int dom0_11_mapping;
 #define is_domain_direct_mapped(d) ((d) == hardware_domain && dom0_11_mapping)
 
+struct hwppi_state {
+    /* h/w state */
+    unsigned long enabled:1;
+    unsigned long pending:1;
+    unsigned long active:1;
+
+    /* Xen s/w state */
+    unsigned long inprogress:1;
+};
+
 struct vtimer {
     struct vcpu *v;
     int irq;
     struct timer timer;
     uint32_t ctl;
     uint64_t cval;
+    struct hwppi_state ppi_state;
 };
 
 struct arch_domain
diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h
index 187dc46..aa8cbac 100644
--- a/xen/include/asm-arm/gic.h
+++ b/xen/include/asm-arm/gic.h
@@ -247,6 +247,16 @@ extern int gicv_setup(struct domain *d);
 extern void gic_save_state(struct vcpu *v);
 extern void gic_restore_state(struct vcpu *v);
 
+/*
+ * Save/restore the state of a single PPI which must be routed to
+ * <current-vcpu> (that is, defined to be injected to the current vcpu).
+ */
+struct hwppi_state;
+extern void gic_save_and_mask_hwppi(struct vcpu *v, unsigned virq,
+                                    struct hwppi_state *s);
+extern void gic_restore_hwppi(struct vcpu *v, unsigned virq,
+                              const struct hwppi_state *s);
+
 /* SGI (AKA IPIs) */
 enum gic_sgi {
     GIC_SGI_EVENT_CHECK = 0,
@@ -293,8 +303,12 @@ struct gic_hw_operations {
     const struct gic_info *info;
     /* Save GIC registers */
     void (*save_state)(struct vcpu *);
+    void (*save_and_mask_hwppi)(struct vcpu *v, const unsigned virq,
+                                struct hwppi_state *s);
     /* Restore GIC registers */
     void (*restore_state)(const struct vcpu *);
+    void (*restore_hwppi)(struct vcpu *v, const unsigned virq,
+                          const struct hwppi_state *s);
     /* Dump GIC LR register information */
     void (*dump_state)(const struct vcpu *);
     /* Map MMIO region of GIC */
diff --git a/xen/include/asm-arm/irq.h b/xen/include/asm-arm/irq.h
index 435dfcd..a08438e 100644
--- a/xen/include/asm-arm/irq.h
+++ b/xen/include/asm-arm/irq.h
@@ -42,6 +42,7 @@ void init_secondary_IRQ(void);
 
 int route_irq_to_guest(struct domain *d, unsigned int irq,
                        const char *devname);
+int route_irq_to_current_vcpu(unsigned int irq, const char *devname);
 void arch_move_irqs(struct vcpu *v);
 
 /* Set IRQ type for an SPI */
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.