[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Xen-devel] [PATCH v3a 14/39] ARM: new VGIC: Add GICv2 world switch backend
 
 
Hi,
 
 Sorry for the formatting. On Thu, 22 Mar 2018, Andre Przywara wrote: 
> Processing maintenance interrupts and accessing the list registers 
> are dependent on the host's GIC version. 
> Introduce vgic-v2.c to contain GICv2 specific functions. 
> Implement the GICv2 specific code for syncing the emulation state 
> into the VGIC registers. 
> This also adds the hook to let Xen setup the host GIC addresses. 
> 
> This is based on Linux commit 140b086dd197, written by Marc Zyngier. 
> 
> Signed-off-by: Andre Przywara <andre.przywara@xxxxxxxxxx> 
> --- 
> Changelog v3 ... v3a: 
> - take hardware IRQ lock in vgic_v2_fold_lr_state() 
> - fix last remaining u32 usage 
> - print message when using new VGIC 
> - add TODO about racy _IRQ_INPROGRESS setting 
> 
> Changelog v2 ... v3: 
> - remove no longer needed asm/io.h header 
> - replace 0/1 with false/true for bool's 
> - clear _IRQ_INPROGRESS bit when retiring hardware mapped IRQ 
> - fix indentation and w/s issues 
> 
> Changelog v1 ... v2: 
> - remove v2 specific underflow function (now generic) 
> - re-add Linux code to properly handle acked level IRQs 
> 
>  xen/arch/arm/vgic/vgic-v2.c | 259 ++++++++++++++++++++++++++++++++++++++++++++ 
>  xen/arch/arm/vgic/vgic.c    |   6 + 
>  xen/arch/arm/vgic/vgic.h    |   9 ++ 
>  3 files changed, 274 insertions(+) 
>  create mode 100644 xen/arch/arm/vgic/vgic-v2.c 
> 
> diff --git a/xen/arch/arm/vgic/vgic-v2.c b/xen/arch/arm/vgic/vgic-v2.c 
> new file mode 100644 
> index 0000000000..1773503cfb 
> --- /dev/null 
> +++ b/xen/arch/arm/vgic/vgic-v2.c 
> @@ -0,0 +1,259 @@ 
> +/* 
> + * Copyright (C) 2015, 2016 ARM Ltd. 
> + * Imported from Linux ("new" KVM VGIC) and heavily adapted to Xen. 
> + * 
> + * This program is free software; you can redistribute it and/or modify 
> + * it under the terms of the GNU General Public License version 2 as 
> + * published by the Free Software Foundation. 
> + * 
> + * This program is distributed in the hope that it will be useful, 
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of 
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
> + * GNU General Public License for more details. 
> + * 
> + * You should have received a copy of the GNU General Public License 
> + * along with this program.  If not, see <http://www.gnu.org/licenses/>. 
> + */ 
> + 
> +#include <asm/new_vgic.h> 
> +#include <asm/bug.h> 
> +#include <asm/gic.h> 
> +#include <xen/sched.h> 
> +#include <xen/sizes.h> 
> + 
> +#include "vgic.h" 
> + 
> +static struct { 
> +    bool enabled; 
> +    paddr_t dbase;          /* Distributor interface address */ 
> +    paddr_t cbase;          /* CPU interface address & size */ 
> +    paddr_t csize; 
> +    paddr_t vbase;          /* Virtual CPU interface address */ 
> + 
> +    /* Offset to add to get an 8kB contiguous region if GIC is aliased */ 
> +    uint32_t aliased_offset; 
> +} gic_v2_hw_data; 
> + 
> +void vgic_v2_setup_hw(paddr_t dbase, paddr_t cbase, paddr_t csize, 
> +                      paddr_t vbase, uint32_t aliased_offset) 
> +{ 
> +    gic_v2_hw_data.enabled = true; 
> +    gic_v2_hw_data.dbase = dbase; 
> +    gic_v2_hw_data.cbase = cbase; 
> +    gic_v2_hw_data.csize = csize; 
> +    gic_v2_hw_data.vbase = vbase; 
> +    gic_v2_hw_data.aliased_offset = aliased_offset; 
> + 
> +    printk("Using the new VGIC implementation.\n"); 
> +} 
> + 
> +/* 
> + * transfer the content of the LRs back into the corresponding ap_list: 
> + * - active bit is transferred as is 
> + * - pending bit is 
> + *   - transferred as is in case of edge sensitive IRQs 
> + *   - set to the line-level (resample time) for level sensitive IRQs 
> + */ 
> +void vgic_v2_fold_lr_state(struct vcpu *vcpu) 
> +{ 
> +    struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic; 
> +    unsigned int used_lrs = vcpu->arch.vgic.used_lrs; 
> +    unsigned long flags; 
> +    unsigned int lr; 
> + 
> +    if ( !used_lrs )    /* No LRs used, so nothing to sync back here. */ 
> +        return; 
> + 
> +    gic_hw_ops->update_hcr_status(GICH_HCR_UIE, false); 
> + 
> +    for ( lr = 0; lr < used_lrs; lr++ ) 
> +    { 
> +        struct gic_lr lr_val; 
> +        uint32_t intid; 
> +        struct vgic_irq *irq; 
> +        struct irq_desc *desc = NULL; 
> +        bool have_desc_lock = false; 
> + 
> +        gic_hw_ops->read_lr(lr, &lr_val); 
> + 
> +        /* 
> +         * TODO: Possible optimization to avoid reading LRs: 
> +         * Read the ELRSR to find out which of our LRs have been cleared 
> +         * by the guest. We just need to know the IRQ number for those, which 
> +         * we could save in an array when populating the LRs. 
> +         * This trades one MMIO access (ELRSR) for possibly more than one (LRs), 
> +         * but requires some more code to save the IRQ number and to handle 
> +         * those finished IRQs according to the algorithm below. 
> +         * We need some numbers to justify this: chances are that we don't 
> +         * have many LRs in use most of the time, so we might not save much. 
> +         */ 
> +        gic_hw_ops->clear_lr(lr); 
> + 
> +        intid = lr_val.virq; 
> +        irq = vgic_get_irq(vcpu->domain, vcpu, intid); 
> + 
> +        local_irq_save(flags); 
 
Shouldn't we disable interrupts earlier, maybe at the beginning of the 
function? Is it not a problem if we take an interrupt a couple of lines 
above with the read_lr and clear_lr that we do? 
 
 
> +        spin_lock(&irq->irq_lock); 
> + 
> +        /* The locking order forces us to drop and re-take the locks here. */ 
> +        if ( irq->hw ) 
> +        { 
> +            spin_unlock(&irq->irq_lock); 
> + 
> +            desc = irq_to_desc(irq->hwintid); 
> +            spin_lock(&desc->lock); 
> +            spin_lock(&irq->irq_lock); 
> + 
> +            /* This h/w IRQ should still be assigned to the virtual IRQ. */ 
> +            ASSERT(irq->hw && desc->irq == irq->hwintid); 
> + 
> +            have_desc_lock = true; 
> +        } 
 
I agree with Julien that this looks very fragile. Instead, I think it 
would be best to always take the desc lock (if irq->hw) before the 
irq_lock earlier in this function. That way, we don't have to deal with 
this business of unlocking and relocking. Do you see any problems with 
it? We don't change irq->hw at run time, so it looks OK to me. 
 
 
> +        /* 
> +         * If a hardware mapped IRQ has been handled for good, we need to 
> +         * clear the _IRQ_INPROGRESS bit to allow handling of new IRQs. 
> +         * 
> +         * TODO: This is probably racy, but is so already in the existing 
> +         * VGIC. A fix does not seem to be trivial. 
> +         */ 
> +        if ( irq->hw && !lr_val.active && !lr_val.pending ) 
> +            clear_bit(_IRQ_INPROGRESS, &desc->status); 
 
I'll reply here to Julien's comment: 
 
> I realize the current vGIC is doing exactly the same thing. But this is racy. 
> 
> Imagine the interrupt is firing on another pCPU (I wasn't able to rule out this even when the interrupt is following the vCPU), that pCPU may set _IRQ_INPROGRESS before this 
> is cleared here. 
 
The assumption in the old vgic was that this scenario was not possible. 
vgic_migrate_irq would avoid changing physical interrupt affinity if a 
virtual interrupt was currently in an LR (see xen/arch/arm/vgic.c:L298). 
Instead, it would set the irq as GIC_IRQ_GUEST_MIGRATING, then at the 
time of clearing the LR we would change the physical irq affinity (see 
xen/arch/arm/gic-vgic.c:L240). 
 
I think we would need a similar mechanism here to protect ourselves from 
races. Is there something equivalent in the new vgic?
  
 
 A mechanism that we know is already very racy on the old vGIC. You also have to take into account that write to ITARGETR/IROUTER will take effect in finite time. A interrupt may still get pending on the old pCPU.  
 
 To be honest we should migrate the interrupt on gues MMIO and finding a way to handle the desc->state correctly. 
 
 One of the solution is to avoid relying in the desc->state when releasing IRQ. So we would not need to care a potential. 
 
 Cheers, 
 
 
 
 
> +        /* Always preserve the active bit */ 
> +        irq->active = lr_val.active; 
> + 
> +        /* Edge is the only case where we preserve the pending bit */ 
> +        if ( irq->config == VGIC_CONFIG_EDGE && lr_val.pending ) 
> +        { 
> +            irq->pending_latch = true; 
> + 
> +            if ( vgic_irq_is_sgi(intid) ) 
> +                irq->source |= (1U << lr_val.virt.source); 
> +        } 
> + 
> +        /* Clear soft pending state when level irqs have been acked. */ 
> +        if ( irq->config == VGIC_CONFIG_LEVEL && !lr_val.pending ) 
> +            irq->pending_latch = false; 
> + 
> +        /* 
> +         * Level-triggered mapped IRQs are special because we only 
> +         * observe rising edges as input to the VGIC. 
> +         * 
> +         * If the guest never acked the interrupt we have to sample 
> +         * the physical line and set the line level, because the 
> +         * device state could have changed or we simply need to 
> +         * process the still pending interrupt later. 
> +         * 
> +         * If this causes us to lower the level, we have to also clear 
> +         * the physical active state, since we will otherwise never be 
> +         * told when the interrupt becomes asserted again. 
> +         */ 
> +        if ( vgic_irq_is_mapped_level(irq) && lr_val.pending ) 
> +        { 
> +            ASSERT(irq->hwintid >= VGIC_NR_PRIVATE_IRQS); 
> + 
> +            irq->line_level = gic_read_pending_state(desc); 
> + 
> +            if ( !irq->line_level ) 
> +                gic_set_active_state(desc, false); 
> +        } 
> + 
> +        spin_unlock(&irq->irq_lock); 
> +        if ( have_desc_lock ) 
> +            spin_unlock(&desc->lock); 
> +        local_irq_restore(flags); 
> + 
> +        vgic_put_irq(vcpu->domain, irq); 
> +    } 
> + 
> +    gic_hw_ops->update_hcr_status(GICH_HCR_EN, false); 
> +    vgic_cpu->used_lrs = 0; 
> +} 
> + 
> +/** 
> + * vgic_v2_populate_lr() - Populates an LR with the state of a given IRQ. 
> + * @vcpu: The VCPU which the given @irq belongs to. 
> + * @irq:  The IRQ to convert into an LR. The irq_lock must be held already. 
> + * @lr:   The LR number to transfer the state into. 
> + * 
> + * This moves a virtual IRQ, represented by its vgic_irq, into a list register. 
> + * Apart from translating the logical state into the LR bitfields, it also 
> + * changes some state in the vgic_irq. 
> + * For an edge sensitive IRQ the pending state is cleared in struct vgic_irq, 
> + * for a level sensitive IRQ the pending state value is unchanged, as it is 
> + * dictated directly by the input line level. 
> + * 
> + * If @irq describes an SGI with multiple sources, we choose the 
> + * lowest-numbered source VCPU and clear that bit in the source bitmap. 
> + * 
> + * The irq_lock must be held by the caller. 
> + */ 
> +void vgic_v2_populate_lr(struct vcpu *vcpu, struct vgic_irq *irq, int lr) 
> +{ 
> +    struct gic_lr lr_val = {0}; 
> + 
> +    lr_val.virq = irq->intid; 
> + 
> +    if ( irq_is_pending(irq) ) 
> +    { 
> +        lr_val.pending = true; 
> + 
> +        if ( irq->config == VGIC_CONFIG_EDGE ) 
> +            irq->pending_latch = false; 
> + 
> +        if ( vgic_irq_is_sgi(irq->intid) ) 
> +        { 
> +            uint32_t src = "">
> + 
> +            BUG_ON(!src); 
> +            lr_val.virt.source = (src - 1); 
> +            irq->source &= ~(1 << (src - 1)); 
> +            if ( irq->source ) 
> +                irq->pending_latch = true; 
> +        } 
> +    } 
> + 
> +    lr_val.active = irq->active; 
> + 
> +    if ( irq->hw ) 
> +    { 
> +        lr_val.hw_status = true; 
> +        lr_val.hw.pirq = irq->hwintid; 
> +        /* 
> +         * Never set pending+active on a HW interrupt, as the 
> +         * pending state is kept at the physical distributor 
> +         * level. 
> +         */ 
> +        if ( irq->active && irq_is_pending(irq) ) 
> +            lr_val.pending = false; 
> +    } 
> +    else 
> +    { 
> +        if ( irq->config == VGIC_CONFIG_LEVEL ) 
> +            lr_val.virt.eoi = true; 
> +    } 
> + 
> +    /* 
> +     * Level-triggered mapped IRQs are special because we only observe 
> +     * rising edges as input to the VGIC.  We therefore lower the line 
> +     * level here, so that we can take new virtual IRQs.  See 
> +     * vgic_v2_fold_lr_state for more info. 
> +     */ 
> +    if ( vgic_irq_is_mapped_level(irq) && lr_val.pending ) 
> +        irq->line_level = false; 
> + 
> +    /* The GICv2 LR only holds five bits of priority. */ 
> +    lr_val.priority = irq->priority >> 3; 
> + 
> +    gic_hw_ops->write_lr(lr, &lr_val); 
> +} 
> + 
> +/* 
> + * Local variables: 
> + * mode: C 
> + * c-file-style: "BSD" 
> + * c-basic-offset: 4 
> + * indent-tabs-mode: nil 
> + * End: 
> + */ 
> diff --git a/xen/arch/arm/vgic/vgic.c b/xen/arch/arm/vgic/vgic.c 
> index d91ed29d96..214176c14e 100644 
> --- a/xen/arch/arm/vgic/vgic.c 
> +++ b/xen/arch/arm/vgic/vgic.c 
> @@ -520,6 +520,7 @@ retry: 
> 
>  static void vgic_fold_lr_state(struct vcpu *vcpu) 
>  { 
> +    vgic_v2_fold_lr_state(vcpu); 
>  } 
> 
>  /* Requires the irq_lock to be held. */ 
> @@ -527,6 +528,8 @@ static void vgic_populate_lr(struct vcpu *vcpu, 
>                               struct vgic_irq *irq, int lr) 
>  { 
>      ASSERT(spin_is_locked(&irq->irq_lock)); 
> + 
> +    vgic_v2_populate_lr(vcpu, irq, lr); 
>  } 
> 
>  static void vgic_set_underflow(struct vcpu *vcpu) 
> @@ -640,7 +643,10 @@ void vgic_sync_to_lrs(void) 
>      spin_lock(¤t->arch.vgic.ap_list_lock); 
>      vgic_flush_lr_state(current); 
>      spin_unlock(¤t->arch.vgic.ap_list_lock); 
> + 
> +    gic_hw_ops->update_hcr_status(GICH_HCR_EN, 1); 
>  } 
> + 
>  /* 
>   * Local variables: 
>   * mode: C 
> diff --git a/xen/arch/arm/vgic/vgic.h b/xen/arch/arm/vgic/vgic.h 
> index 1547478518..e2b6d51e47 100644 
> --- a/xen/arch/arm/vgic/vgic.h 
> +++ b/xen/arch/arm/vgic/vgic.h 
> @@ -27,6 +27,11 @@ static inline bool irq_is_pending(struct vgic_irq *irq) 
>          return irq->pending_latch || irq->line_level; 
>  } 
> 
> +static inline bool vgic_irq_is_mapped_level(struct vgic_irq *irq) 
> +{ 
> +    return irq->config == VGIC_CONFIG_LEVEL && irq->hw; 
> +} 
> + 
>  struct vgic_irq *vgic_get_irq(struct domain *d, struct vcpu *vcpu, 
>                                uint32_t intid); 
>  void vgic_put_irq(struct domain *d, struct vgic_irq *irq); 
> @@ -41,6 +46,10 @@ static inline void vgic_get_irq_kref(struct vgic_irq *irq) 
>      atomic_inc(&irq->refcount); 
>  } 
> 
> +void vgic_v2_fold_lr_state(struct vcpu *vcpu); 
> +void vgic_v2_populate_lr(struct vcpu *vcpu, struct vgic_irq *irq, int lr); 
> +void vgic_v2_set_underflow(struct vcpu *vcpu); 
> + 
>  #endif 
> 
>  /* 
> -- 
> 2.14.1 
> 
 
_______________________________________________ 
Xen-devel mailing list 
Xen-devel@xxxxxxxxxxxxxxxxxxxx 
https://lists.xenproject.org/mailman/listinfo/xen-devel  
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel 
 
    
     |