Xen project Mailing List

On Tue, 27 Mar 2018, 07:25 Stefano Stabellini, <sstabellini@xxxxxxxxxx> wrote:

On Thu, 22 Mar 2018, Andre Przywara wrote:
> Processing maintenance interrupts and accessing the list registers
> are dependent on the host's GIC version.
> Introduce vgic-v2.c to contain GICv2 specific functions.
> Implement the GICv2 specific code for syncing the emulation state
> into the VGIC registers.
> This also adds the hook to let Xen setup the host GIC addresses.
>
> This is based on Linux commit 140b086dd197, written by Marc Zyngier.
>
> Signed-off-by: Andre Przywara <andre.przywara@xxxxxxxxxx>
> ---
> Changelog v3 ... v3a:
> - take hardware IRQ lock in vgic_v2_fold_lr_state()
> - fix last remaining u32 usage
> - print message when using new VGIC
> - add TODO about racy _IRQ_INPROGRESS setting
>
> Changelog v2 ... v3:
> - remove no longer needed asm/io.h header
> - replace 0/1 with false/true for bool's
> - clear _IRQ_INPROGRESS bit when retiring hardware mapped IRQ
> - fix indentation and w/s issues
>
> Changelog v1 ... v2:
> - remove v2 specific underflow function (now generic)
> - re-add Linux code to properly handle acked level IRQs
>
> xen/arch/arm/vgic/vgic-v2.c | 259 ++++++++++++++++++++++++++++++++++++++++++++
> xen/arch/arm/vgic/vgic.c | 6 +
> xen/arch/arm/vgic/vgic.h | 9 ++
> 3 files changed, 274 insertions(+)
> create mode 100644 xen/arch/arm/vgic/vgic-v2.c
>
> diff --git a/xen/arch/arm/vgic/vgic-v2.c b/xen/arch/arm/vgic/vgic-v2.c
> new file mode 100644
> index 0000000000..1773503cfb
> --- /dev/null
> +++ b/xen/arch/arm/vgic/vgic-v2.c
> @@ -0,0 +1,259 @@
> +/*
> + * Copyright (C) 2015, 2016 ARM Ltd.
> + * Imported from Linux ("new" KVM VGIC) and heavily adapted to Xen.
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program. If not, see <http://www.gnu.org/licenses/>.
> + */
> +
> +#include <asm/new_vgic.h>
> +#include <asm/bug.h>
> +#include <asm/gic.h>
> +#include <xen/sched.h>
> +#include <xen/sizes.h>
> +
> +#include "vgic.h"
> +
> +static struct {
> + bool enabled;
> + paddr_t dbase; /* Distributor interface address */
> + paddr_t cbase; /* CPU interface address & size */
> + paddr_t csize;
> + paddr_t vbase; /* Virtual CPU interface address */
> +
> + /* Offset to add to get an 8kB contiguous region if GIC is aliased */
> + uint32_t aliased_offset;
> +} gic_v2_hw_data;
> +
> +void vgic_v2_setup_hw(paddr_t dbase, paddr_t cbase, paddr_t csize,
> + paddr_t vbase, uint32_t aliased_offset)
> +{
> + gic_v2_hw_data.enabled = true;
> + gic_v2_hw_data.dbase = dbase;
> + gic_v2_hw_data.cbase = cbase;
> + gic_v2_hw_data.csize = csize;
> + gic_v2_hw_data.vbase = vbase;
> + gic_v2_hw_data.aliased_offset = aliased_offset;
> +
> + printk("Using the new VGIC implementation.\n");
> +}
> +
> +/*
> + * transfer the content of the LRs back into the corresponding ap_list:
> + * - active bit is transferred as is
> + * - pending bit is
> + * - transferred as is in case of edge sensitive IRQs
> + * - set to the line-level (resample time) for level sensitive IRQs
> + */
> +void vgic_v2_fold_lr_state(struct vcpu *vcpu)
> +{
> + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic;
> + unsigned int used_lrs = vcpu->arch.vgic.used_lrs;
> + unsigned long flags;
> + unsigned int lr;
> +
> + if ( !used_lrs ) /* No LRs used, so nothing to sync back here. */
> + return;
> +
> + gic_hw_ops->update_hcr_status(GICH_HCR_UIE, false);
> +
> + for ( lr = 0; lr < used_lrs; lr++ )
> + {
> + struct gic_lr lr_val;
> + uint32_t intid;
> + struct vgic_irq *irq;
> + struct irq_desc *desc = NULL;
> + bool have_desc_lock = false;
> +
> + gic_hw_ops->read_lr(lr, &lr_val);
> +
> + /*
> + * TODO: Possible optimization to avoid reading LRs:
> + * Read the ELRSR to find out which of our LRs have been cleared
> + * by the guest. We just need to know the IRQ number for those, which
> + * we could save in an array when populating the LRs.
> + * This trades one MMIO access (ELRSR) for possibly more than one (LRs),
> + * but requires some more code to save the IRQ number and to handle
> + * those finished IRQs according to the algorithm below.
> + * We need some numbers to justify this: chances are that we don't
> + * have many LRs in use most of the time, so we might not save much.
> + */
> + gic_hw_ops->clear_lr(lr);
> +
> + intid = lr_val.virq;
> + irq = vgic_get_irq(vcpu->domain, vcpu, intid);
> +
> + local_irq_save(flags);

Shouldn't we disable interrupts earlier, maybe at the beginning of the
function? Is it not a problem if we take an interrupt a couple of lines
above with the read_lr and clear_lr that we do?

> + spin_lock(&irq->irq_lock);
> +
> + /* The locking order forces us to drop and re-take the locks here. */
> + if ( irq->hw )
> + {
> + spin_unlock(&irq->irq_lock);
> +
> + desc = irq_to_desc(irq->hwintid);
> + spin_lock(&desc->lock);
> + spin_lock(&irq->irq_lock);
> +
> + /* This h/w IRQ should still be assigned to the virtual IRQ. */
> + ASSERT(irq->hw && desc->irq == irq->hwintid);
> +
> + have_desc_lock = true;
> + }

I agree with Julien that this looks very fragile. Instead, I think it
would be best to always take the desc lock (if irq->hw) before the
irq_lock earlier in this function. That way, we don't have to deal with
this business of unlocking and relocking. Do you see any problems with
it? We don't change irq->hw at run time, so it looks OK to me.

> + /*
> + * If a hardware mapped IRQ has been handled for good, we need to
> + * clear the _IRQ_INPROGRESS bit to allow handling of new IRQs.
> + *
> + * TODO: This is probably racy, but is so already in the existing
> + * VGIC. A fix does not seem to be trivial.
> + */
> + if ( irq->hw && !lr_val.active && !lr_val.pending )
> + clear_bit(_IRQ_INPROGRESS, &desc->status);

I'll reply here to Julien's comment:

> I realize the current vGIC is doing exactly the same thing. But this is racy.
>
> Imagine the interrupt is firing on another pCPU (I wasn't able to rule out this even when the interrupt is following the vCPU), that pCPU may set _IRQ_INPROGRESS before this
> is cleared here.

The assumption in the old vgic was that this scenario was not possible.
vgic_migrate_irq would avoid changing physical interrupt affinity if a
virtual interrupt was currently in an LR (see xen/arch/arm/vgic.c:L298).
Instead, it would set the irq as GIC_IRQ_GUEST_MIGRATING, then at the
time of clearing the LR we would change the physical irq affinity (see
xen/arch/arm/gic-vgic.c:L240).

I think we would need a similar mechanism here to protect ourselves from
races. Is there something equivalent in the new vgic?

> + /* Always preserve the active bit */
> + irq->active = lr_val.active;
> +
> + /* Edge is the only case where we preserve the pending bit */
> + if ( irq->config == VGIC_CONFIG_EDGE && lr_val.pending )
> + {
> + irq->pending_latch = true;
> +
> + if ( vgic_irq_is_sgi(intid) )
> + irq->source |= (1U << lr_val.virt.source);
> + }
> +
> + /* Clear soft pending state when level irqs have been acked. */
> + if ( irq->config == VGIC_CONFIG_LEVEL && !lr_val.pending )
> + irq->pending_latch = false;
> +
> + /*
> + * Level-triggered mapped IRQs are special because we only
> + * observe rising edges as input to the VGIC.
> + *
> + * If the guest never acked the interrupt we have to sample
> + * the physical line and set the line level, because the
> + * device state could have changed or we simply need to
> + * process the still pending interrupt later.
> + *
> + * If this causes us to lower the level, we have to also clear
> + * the physical active state, since we will otherwise never be
> + * told when the interrupt becomes asserted again.
> + */
> + if ( vgic_irq_is_mapped_level(irq) && lr_val.pending )
> + {
> + ASSERT(irq->hwintid >= VGIC_NR_PRIVATE_IRQS);
> +
> + irq->line_level = gic_read_pending_state(desc);
> +
> + if ( !irq->line_level )
> + gic_set_active_state(desc, false);
> + }
> +
> + spin_unlock(&irq->irq_lock);
> + if ( have_desc_lock )
> + spin_unlock(&desc->lock);
> + local_irq_restore(flags);
> +
> + vgic_put_irq(vcpu->domain, irq);
> + }
> +
> + gic_hw_ops->update_hcr_status(GICH_HCR_EN, false);
> + vgic_cpu->used_lrs = 0;
> +}
> +
> +/**
> + * vgic_v2_populate_lr() - Populates an LR with the state of a given IRQ.
> + * @vcpu: The VCPU which the given @irq belongs to.
> + * @irq: The IRQ to convert into an LR. The irq_lock must be held already.
> + * @lr: The LR number to transfer the state into.
> + *
> + * This moves a virtual IRQ, represented by its vgic_irq, into a list register.
> + * Apart from translating the logical state into the LR bitfields, it also
> + * changes some state in the vgic_irq.
> + * For an edge sensitive IRQ the pending state is cleared in struct vgic_irq,
> + * for a level sensitive IRQ the pending state value is unchanged, as it is
> + * dictated directly by the input line level.
> + *
> + * If @irq describes an SGI with multiple sources, we choose the
> + * lowest-numbered source VCPU and clear that bit in the source bitmap.
> + *
> + * The irq_lock must be held by the caller.
> + */
> +void vgic_v2_populate_lr(struct vcpu *vcpu, struct vgic_irq *irq, int lr)
> +{
> + struct gic_lr lr_val = {0};
> +
> + lr_val.virq = irq->intid;
> +
> + if ( irq_is_pending(irq) )
> + {
> + lr_val.pending = true;
> +
> + if ( irq->config == VGIC_CONFIG_EDGE )
> + irq->pending_latch = false;
> +
> + if ( vgic_irq_is_sgi(irq->intid) )
> + {
> + uint32_t src = ""> > +
> + BUG_ON(!src);
> + lr_val.virt.source = (src - 1);
> + irq->source &= ~(1 << (src - 1));
> + if ( irq->source )
> + irq->pending_latch = true;
> + }
> + }
> +
> + lr_val.active = irq->active;
> +
> + if ( irq->hw )
> + {
> + lr_val.hw_status = true;
> + lr_val.hw.pirq = irq->hwintid;
> + /*
> + * Never set pending+active on a HW interrupt, as the
> + * pending state is kept at the physical distributor
> + * level.
> + */
> + if ( irq->active && irq_is_pending(irq) )
> + lr_val.pending = false;
> + }
> + else
> + {
> + if ( irq->config == VGIC_CONFIG_LEVEL )
> + lr_val.virt.eoi = true;
> + }
> +
> + /*
> + * Level-triggered mapped IRQs are special because we only observe
> + * rising edges as input to the VGIC. We therefore lower the line
> + * level here, so that we can take new virtual IRQs. See
> + * vgic_v2_fold_lr_state for more info.
> + */
> + if ( vgic_irq_is_mapped_level(irq) && lr_val.pending )
> + irq->line_level = false;
> +
> + /* The GICv2 LR only holds five bits of priority. */
> + lr_val.priority = irq->priority >> 3;
> +
> + gic_hw_ops->write_lr(lr, &lr_val);
> +}
> +
> +/*
> + * Local variables:
> + * mode: C
> + * c-file-style: "BSD"
> + * c-basic-offset: 4
> + * indent-tabs-mode: nil
> + * End:
> + */
> diff --git a/xen/arch/arm/vgic/vgic.c b/xen/arch/arm/vgic/vgic.c
> index d91ed29d96..214176c14e 100644
> --- a/xen/arch/arm/vgic/vgic.c
> +++ b/xen/arch/arm/vgic/vgic.c
> @@ -520,6 +520,7 @@ retry:
>
> static void vgic_fold_lr_state(struct vcpu *vcpu)
> {
> + vgic_v2_fold_lr_state(vcpu);
> }
>
> /* Requires the irq_lock to be held. */
> @@ -527,6 +528,8 @@ static void vgic_populate_lr(struct vcpu *vcpu,
> struct vgic_irq *irq, int lr)
> {
> ASSERT(spin_is_locked(&irq->irq_lock));
> +
> + vgic_v2_populate_lr(vcpu, irq, lr);
> }
>
> static void vgic_set_underflow(struct vcpu *vcpu)
> @@ -640,7 +643,10 @@ void vgic_sync_to_lrs(void)
> spin_lock(&current->arch.vgic.ap_list_lock);
> vgic_flush_lr_state(current);
> spin_unlock(&current->arch.vgic.ap_list_lock);
> +
> + gic_hw_ops->update_hcr_status(GICH_HCR_EN, 1);
> }
> +
> /*
> * Local variables:
> * mode: C
> diff --git a/xen/arch/arm/vgic/vgic.h b/xen/arch/arm/vgic/vgic.h
> index 1547478518..e2b6d51e47 100644
> --- a/xen/arch/arm/vgic/vgic.h
> +++ b/xen/arch/arm/vgic/vgic.h
> @@ -27,6 +27,11 @@ static inline bool irq_is_pending(struct vgic_irq *irq)
> return irq->pending_latch || irq->line_level;
> }
>
> +static inline bool vgic_irq_is_mapped_level(struct vgic_irq *irq)
> +{
> + return irq->config == VGIC_CONFIG_LEVEL && irq->hw;
> +}
> +
> struct vgic_irq *vgic_get_irq(struct domain *d, struct vcpu *vcpu,
> uint32_t intid);
> void vgic_put_irq(struct domain *d, struct vgic_irq *irq);
> @@ -41,6 +46,10 @@ static inline void vgic_get_irq_kref(struct vgic_irq *irq)
> atomic_inc(&irq->refcount);
> }
>
> +void vgic_v2_fold_lr_state(struct vcpu *vcpu);
> +void vgic_v2_populate_lr(struct vcpu *vcpu, struct vgic_irq *irq, int lr);
> +void vgic_v2_set_underflow(struct vcpu *vcpu);
> +
> #endif
>
> /*
> --
> 2.14.1
>

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

Re: [Xen-devel] [PATCH v3a 14/39] ARM: new VGIC: Add GICv2 world switch backend