[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v3 07/26] ARM: GICv3 ITS: introduce host LPI array



On Fri, 31 Mar 2017, Andre Przywara wrote:
> The number of LPIs on a host can be potentially huge (millions),
> although in practise will be mostly reasonable. So prematurely allocating
> an array of struct irq_desc's for each LPI is not an option.
> However Xen itself does not care about LPIs, as every LPI will be injected
> into a guest (Dom0 for now).
> Create a dense data structure (8 Bytes) for each LPI which holds just
> enough information to determine the virtual IRQ number and the VCPU into
> which the LPI needs to be injected.
> Also to not artificially limit the number of LPIs, we create a 2-level
> table for holding those structures.
> This patch introduces functions to initialize these tables and to
> create, lookup and destroy entries for a given LPI.
> By using the naturally atomic access guarantee the native uint64_t data
> type gives us, we allocate and access LPI information in a way that does
> not require a lock.
> 
> Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>

See alpine.DEB.2.10.1703221552490.8001@sstabellini-ThinkPad-X260.

I'll stop here for now, I think that are enough comments already for
another version.


> ---
>  xen/arch/arm/gic-v3-its.c        |  89 +++++++++++++++++-
>  xen/arch/arm/gic-v3-lpi.c        | 196 
> +++++++++++++++++++++++++++++++++++++++
>  xen/include/asm-arm/gic.h        |   2 +
>  xen/include/asm-arm/gic_v3_its.h |   5 +
>  xen/include/asm-arm/irq.h        |   5 +
>  5 files changed, 295 insertions(+), 2 deletions(-)
> 
> diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
> index 295f7dc..fa284e7 100644
> --- a/xen/arch/arm/gic-v3-its.c
> +++ b/xen/arch/arm/gic-v3-its.c
> @@ -151,6 +151,20 @@ static int its_send_cmd_sync(struct host_its *its, 
> unsigned int cpu)
>      return its_send_command(its, cmd);
>  }
>  
> +static int its_send_cmd_mapti(struct host_its *its,
> +                              uint32_t deviceid, uint32_t eventid,
> +                              uint32_t pintid, uint16_t icid)
> +{
> +    uint64_t cmd[4];
> +
> +    cmd[0] = GITS_CMD_MAPTI | ((uint64_t)deviceid << 32);
> +    cmd[1] = eventid | ((uint64_t)pintid << 32);
> +    cmd[2] = icid;
> +    cmd[3] = 0x00;
> +
> +    return its_send_command(its, cmd);
> +}
> +
>  static int its_send_cmd_mapc(struct host_its *its, uint32_t collection_id,
>                               unsigned int cpu)
>  {
> @@ -185,6 +199,19 @@ static int its_send_cmd_mapd(struct host_its *its, 
> uint32_t deviceid,
>      return its_send_command(its, cmd);
>  }
>  
> +static int its_send_cmd_inv(struct host_its *its,
> +                            uint32_t deviceid, uint32_t eventid)
> +{
> +    uint64_t cmd[4];
> +
> +    cmd[0] = GITS_CMD_INV | ((uint64_t)deviceid << 32);
> +    cmd[1] = eventid;
> +    cmd[2] = 0x00;
> +    cmd[3] = 0x00;
> +
> +    return its_send_command(its, cmd);
> +}
> +
>  /* Set up the (1:1) collection mapping for the given host CPU. */
>  int gicv3_its_setup_collection(unsigned int cpu)
>  {
> @@ -469,7 +496,7 @@ int gicv3_its_init(void)
>  
>  static int remove_mapped_guest_device(struct its_devices *dev)
>  {
> -    int ret;
> +    int ret, i;
>  
>      if ( dev->hw_its )
>      {
> @@ -479,12 +506,16 @@ static int remove_mapped_guest_device(struct 
> its_devices *dev)
>              return ret;
>      }
>  
> +    for ( i = 0; i < DIV_ROUND_UP(dev->eventids, LPI_BLOCK); i++ )
> +        gicv3_free_host_lpi_block(dev->host_lpi_blocks[i]);
> +
>      ret = gicv3_its_wait_commands(dev->hw_its);
>      if ( ret )
>          return ret;
>  
>      xfree(dev->itt_addr);
>      xfree(dev->pend_irqs);
> +    xfree(dev->host_lpi_blocks);
>      xfree(dev);
>  
>      return 0;
> @@ -522,6 +553,37 @@ static int compare_its_guest_devices(struct its_devices 
> *dev,
>  }
>  
>  /*
> + * On the host ITS @its, map @nr_events consecutive LPIs.
> + * The mapping connects a device @devid and event @eventid pair to LPI @lpi,
> + * increasing both @eventid and @lpi to cover the number of requested LPIs.
> + */
> +static int gicv3_its_map_host_events(struct host_its *its,
> +                                     uint32_t devid, uint32_t eventid,
> +                                     uint32_t lpi, uint32_t nr_events)
> +{
> +    uint32_t i;
> +    int ret;
> +
> +    for ( i = 0; i < nr_events; i++ )
> +    {
> +        /* For now we map every host LPI to host CPU 0 */
> +        ret = its_send_cmd_mapti(its, devid, eventid + i, lpi + i, 0);
> +        if ( ret )
> +            return ret;
> +
> +        ret = its_send_cmd_inv(its, devid, eventid + i);
> +        if ( ret )
> +            return ret;
> +    }
> +
> +    ret = its_send_cmd_sync(its, 0);
> +    if ( ret )
> +        return ret;
> +
> +    return gicv3_its_wait_commands(its);
> +}
> +
> +/*
>   * Map a hardware device, identified by a certain host ITS and its device ID
>   * to domain d, a guest ITS (identified by its doorbell address) and device 
> ID.
>   * Also provide the number of events (MSIs) needed for that device.
> @@ -537,7 +599,7 @@ int gicv3_its_map_guest_device(struct domain *d,
>      struct host_its *hw_its;
>      struct its_devices *dev = NULL;
>      struct rb_node **new = &d->arch.vgic.its_devices.rb_node, *parent = NULL;
> -    int ret = -ENOENT;
> +    int ret = -ENOENT, i;
>  
>      hw_its = gicv3_its_find_by_doorbell(host_doorbell);
>      if ( !hw_its )
> @@ -595,6 +657,11 @@ int gicv3_its_map_guest_device(struct domain *d,
>      if ( !dev->pend_irqs )
>          goto out_unlock;
>  
> +    dev->host_lpi_blocks = xzalloc_array(uint32_t,
> +                                         DIV_ROUND_UP(nr_events, LPI_BLOCK));
> +    if ( !dev->host_lpi_blocks )
> +        goto out_unlock;
> +
>      ret = its_send_cmd_mapd(hw_its, host_devid,
>                              fls(ROUNDUP(nr_events, LPI_BLOCK) - 1) - 1,
>                              virt_to_maddr(itt_addr), true);
> @@ -613,10 +680,28 @@ int gicv3_its_map_guest_device(struct domain *d,
>  
>      spin_unlock(&d->arch.vgic.its_devices_lock);
>  
> +    /*
> +     * Map all host LPIs within this device already. We can't afford to queue
> +     * any host ITS commands later on during the guest's runtime.
> +     */
> +    for ( i = 0; i < DIV_ROUND_UP(nr_events, LPI_BLOCK); i++ )
> +    {
> +        ret = gicv3_allocate_host_lpi_block(d, &dev->host_lpi_blocks[i]);
> +        if ( ret < 0 )
> +            goto out;
> +
> +        ret = gicv3_its_map_host_events(hw_its, host_devid, i * LPI_BLOCK,
> +                                        dev->host_lpi_blocks[i], LPI_BLOCK);
> +        if ( ret < 0 )
> +            goto out;
> +    }
> +
>      return 0;
>  
>  out_unlock:
>      spin_unlock(&d->arch.vgic.its_devices_lock);
> +
> +out:
>      if ( dev )
>      {
>          xfree(dev->pend_irqs);
> diff --git a/xen/arch/arm/gic-v3-lpi.c b/xen/arch/arm/gic-v3-lpi.c
> index d85d63d..d642cc5 100644
> --- a/xen/arch/arm/gic-v3-lpi.c
> +++ b/xen/arch/arm/gic-v3-lpi.c
> @@ -20,25 +20,55 @@
>  
>  #include <xen/lib.h>
>  #include <xen/mm.h>
> +#include <xen/sched.h>
>  #include <xen/sizes.h>
> +#include <asm/atomic.h>
> +#include <asm/domain.h>
>  #include <asm/gic.h>
>  #include <asm/gic_v3_defs.h>
>  #include <asm/gic_v3_its.h>
>  #include <asm/io.h>
>  #include <asm/page.h>
>  
> +/*
> + * There could be a lot of LPIs on the host side, and they always go to
> + * a guest. So having a struct irq_desc for each of them would be wasteful
> + * and useless.
> + * Instead just store enough information to find the right VCPU to inject
> + * those LPIs into, which just requires the virtual LPI number.
> + * To avoid a global lock on this data structure, this is using a lockless
> + * approach relying on the architectural atomicty of native data types:
> + * We read or write the "data" view of this union atomically, then can
> + * access the broken-down fields in our local copy.
> + */
> +union host_lpi {
> +    uint64_t data;
> +    struct {
> +        uint32_t virt_lpi;
> +        uint16_t dom_id;
> +        uint16_t vcpu_id;
> +    };
> +};
> +
>  #define LPI_PROPTABLE_NEEDS_FLUSHING    (1U << 0)
>  /* Global state */
>  static struct {
>      /* The global LPI property table, shared by all redistributors. */
>      uint8_t *lpi_property;
>      /*
> +     * A two-level table to lookup LPIs firing on the host and look up the
> +     * VCPU and virtual LPI number to inject into.
> +     */
> +    union host_lpi **host_lpis;
> +    /*
>       * Number of physical LPIs the host supports. This is a property of
>       * the GIC hardware. We depart from the habit of naming these things
>       * "physical" in Xen, as the GICv3/4 spec uses the term "physical LPI"
>       * in a different context to differentiate them from "virtual LPIs".
>       */
>      unsigned long int nr_host_lpis;
> +    /* Protects allocation and deallocation of host LPIs, but not the access 
> */
> +    spinlock_t host_lpis_lock;
>      unsigned int flags;
>  } lpi_data;
>  
> @@ -51,6 +81,19 @@ struct lpi_redist_data {
>  static DEFINE_PER_CPU(struct lpi_redist_data, lpi_redist);
>  
>  #define MAX_PHYS_LPIS   (lpi_data.nr_host_lpis - LPI_OFFSET)
> +#define HOST_LPIS_PER_PAGE      (PAGE_SIZE / sizeof(union host_lpi))
> +
> +static union host_lpi *gic_get_host_lpi(uint32_t plpi)
> +{
> +    if ( !is_lpi(plpi) || plpi >= MAX_PHYS_LPIS + LPI_OFFSET )
> +        return NULL;
> +
> +    plpi -= LPI_OFFSET;
> +    if ( !lpi_data.host_lpis[plpi / HOST_LPIS_PER_PAGE] )
> +        return NULL;
> +
> +    return &lpi_data.host_lpis[plpi / HOST_LPIS_PER_PAGE][plpi % 
> HOST_LPIS_PER_PAGE];
> +}
>  
>  /* Stores this redistributor's physical address and ID in a per-CPU variable 
> */
>  void gicv3_set_redist_address(paddr_t address, unsigned int redist_id)
> @@ -212,15 +255,168 @@ int gicv3_lpi_init_rdist(void __iomem * rdist_base)
>  static unsigned int max_lpi_bits = 20;
>  integer_param("max_lpi_bits", max_lpi_bits);
>  
> +/*
> + * Allocate the 2nd level array for host LPIs. This one holds pointers
> + * to the page with the actual "union host_lpi" entries. Our LPI limit
> + * avoids excessive memory usage.
> + */
>  int gicv3_lpi_init_host_lpis(unsigned int hw_lpi_bits)
>  {
> +    int nr_lpi_ptrs;
> +
> +    /* We rely on the data structure being atomically accessible. */
> +    BUILD_BUG_ON(sizeof(union host_lpi) > sizeof(unsigned long));
> +
>      lpi_data.nr_host_lpis = BIT_ULL(min(hw_lpi_bits, max_lpi_bits));
>  
> +    spin_lock_init(&lpi_data.host_lpis_lock);
> +
> +    nr_lpi_ptrs = MAX_PHYS_LPIS / (PAGE_SIZE / sizeof(union host_lpi));
> +    lpi_data.host_lpis = xzalloc_array(union host_lpi *, nr_lpi_ptrs);
> +    if ( !lpi_data.host_lpis )
> +        return -ENOMEM;
> +
>      printk("GICv3: using at most %lu LPIs on the host.\n", MAX_PHYS_LPIS);
>  
>      return 0;
>  }
>  
> +static int find_unused_host_lpi(uint32_t start, uint32_t *index)
> +{
> +    unsigned int chunk;
> +    uint32_t i = *index;
> +
> +    ASSERT(spin_is_locked(&lpi_data.host_lpis_lock));
> +
> +    for ( chunk = start; chunk < MAX_PHYS_LPIS / HOST_LPIS_PER_PAGE; chunk++ 
> )
> +    {
> +        /* If we hit an unallocated chunk, use entry 0 in that one. */
> +        if ( !lpi_data.host_lpis[chunk] )
> +        {
> +            *index = 0;
> +            return chunk;
> +        }
> +
> +        /* Find an unallocated entry in this chunk. */
> +        for ( ; i < HOST_LPIS_PER_PAGE; i += LPI_BLOCK )
> +        {
> +            if ( lpi_data.host_lpis[chunk][i].dom_id == DOMID_INVALID )
> +            {
> +                *index = i;
> +                return chunk;
> +            }
> +        }
> +        i = 0;
> +    }
> +
> +    return -1;
> +}
> +
> +/*
> + * Allocate a block of 32 LPIs on the given host ITS for device "devid",
> + * starting with "eventid". Put them into the respective ITT by issuing a
> + * MAPTI command for each of them.
> + */
> +int gicv3_allocate_host_lpi_block(struct domain *d, uint32_t *first_lpi)
> +{
> +    static uint32_t next_lpi = 0;
> +    uint32_t lpi, lpi_idx = next_lpi % HOST_LPIS_PER_PAGE;
> +    int chunk;
> +    int i;
> +
> +    spin_lock(&lpi_data.host_lpis_lock);
> +    chunk = find_unused_host_lpi(next_lpi / HOST_LPIS_PER_PAGE, &lpi_idx);
> +
> +    if ( chunk == - 1 )          /* rescan for a hole from the beginning */
> +    {
> +        lpi_idx = 0;
> +        chunk = find_unused_host_lpi(0, &lpi_idx);
> +        if ( chunk == -1 )
> +        {
> +            spin_unlock(&lpi_data.host_lpis_lock);
> +            return -ENOSPC;
> +        }
> +    }
> +
> +    /* If we hit an unallocated chunk, we initialize it and use entry 0. */
> +    if ( !lpi_data.host_lpis[chunk] )
> +    {
> +        union host_lpi *new_chunk;
> +
> +        /* TODO: NUMA locality for quicker IRQ path? */
> +        new_chunk = xmalloc_bytes(PAGE_SIZE);
> +        if ( !new_chunk )
> +        {
> +            spin_unlock(&lpi_data.host_lpis_lock);
> +            return -ENOMEM;
> +        }
> +
> +        for ( i = 0; i < HOST_LPIS_PER_PAGE; i += LPI_BLOCK )
> +            new_chunk[i].dom_id = DOMID_INVALID;
> +
> +        lpi_data.host_lpis[chunk] = new_chunk;
> +        lpi_idx = 0;
> +    }
> +
> +    lpi = chunk * HOST_LPIS_PER_PAGE + lpi_idx;
> +
> +    for ( i = 0; i < LPI_BLOCK; i++ )
> +    {
> +        union host_lpi hlpi;
> +
> +        /*
> +         * Mark this host LPI as belonging to the domain, but don't assign
> +         * any virtual LPI or a VCPU yet.
> +         */
> +        hlpi.virt_lpi = INVALID_LPI;
> +        hlpi.dom_id = d->domain_id;
> +        hlpi.vcpu_id = ~0;
> +        write_u64_atomic(&lpi_data.host_lpis[chunk][lpi_idx + i].data,
> +                         hlpi.data);
> +
> +        /*
> +         * Enable this host LPI, so we don't have to do this during the
> +         * guest's runtime.
> +         */
> +        lpi_data.lpi_property[lpi + i] |= LPI_PROP_ENABLED;
> +    }
> +
> +    /*
> +     * We have allocated and initialized the host LPI entries, so it's safe
> +     * to drop the lock now. Access to the structures can be done 
> concurrently
> +     * as it involves only an atomic uint64_t access.
> +     */
> +    spin_unlock(&lpi_data.host_lpis_lock);
> +
> +    if ( lpi_data.flags & LPI_PROPTABLE_NEEDS_FLUSHING )
> +        clean_and_invalidate_dcache_va_range(&lpi_data.lpi_property[lpi],
> +                                             LPI_BLOCK);
> +
> +    next_lpi = lpi + LPI_BLOCK;
> +    *first_lpi = lpi + LPI_OFFSET;
> +
> +    return 0;
> +}
> +
> +void gicv3_free_host_lpi_block(uint32_t first_lpi)
> +{
> +    union host_lpi *hlpi, empty_lpi = { .dom_id = DOMID_INVALID };
> +    int i;
> +
> +    hlpi = gic_get_host_lpi(first_lpi);
> +    if ( !hlpi )
> +        return;         /* Nothing to free here. */
> +
> +    spin_lock(&lpi_data.host_lpis_lock);
> +
> +    for ( i = 0; i < LPI_BLOCK; i++ )
> +        write_u64_atomic(&hlpi[i].data, empty_lpi.data);
> +
> +    spin_unlock(&lpi_data.host_lpis_lock);
> +
> +    return;
> +}
> +
>  /*
>   * Local variables:
>   * mode: C
> diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h
> index 836a103..d04bd04 100644
> --- a/xen/include/asm-arm/gic.h
> +++ b/xen/include/asm-arm/gic.h
> @@ -220,6 +220,8 @@ enum gic_version {
>      GIC_V3,
>  };
>  
> +#define INVALID_LPI     0
> +
>  extern enum gic_version gic_hw_version(void);
>  
>  /* Program the IRQ type into the GIC */
> diff --git a/xen/include/asm-arm/gic_v3_its.h 
> b/xen/include/asm-arm/gic_v3_its.h
> index 4ade5f6..7b47596 100644
> --- a/xen/include/asm-arm/gic_v3_its.h
> +++ b/xen/include/asm-arm/gic_v3_its.h
> @@ -106,6 +106,9 @@
>  #define HOST_ITS_FLUSH_CMD_QUEUE        (1U << 0)
>  #define HOST_ITS_USES_PTA               (1U << 1)
>  
> +/* We allocate LPIs on the hosts in chunks of 32 to reduce handling 
> overhead. */
> +#define LPI_BLOCK                       32
> +
>  /* data structure for each hardware ITS */
>  struct host_its {
>      struct list_head entry;
> @@ -153,6 +156,8 @@ int gicv3_its_map_guest_device(struct domain *d,
>                                 paddr_t guest_doorbell, uint32_t guest_devid,
>                                 uint32_t nr_events, bool valid);
>  void gicv3_its_unmap_all_devices(struct domain *d);
> +int gicv3_allocate_host_lpi_block(struct domain *d, uint32_t *first_lpi);
> +void gicv3_free_host_lpi_block(uint32_t first_lpi);
>  
>  #else
>  
> diff --git a/xen/include/asm-arm/irq.h b/xen/include/asm-arm/irq.h
> index 13528c0..d16affc 100644
> --- a/xen/include/asm-arm/irq.h
> +++ b/xen/include/asm-arm/irq.h
> @@ -42,6 +42,11 @@ struct irq_desc *__irq_to_desc(int irq);
>  
>  void do_IRQ(struct cpu_user_regs *regs, unsigned int irq, int is_fiq);
>  
> +static inline bool is_lpi(unsigned int irq)
> +{
> +    return irq >= LPI_OFFSET;
> +}
> +
>  #define domain_pirq_to_irq(d, pirq) (pirq)
>  
>  bool_t is_assignable_irq(unsigned int irq);
> -- 
> 2.9.0
> 

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.