[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v4 08/27] ARM: GICv3 ITS: introduce device mapping



Hi Andre,

On 04/03/2017 09:28 PM, Andre Przywara wrote:
The ITS uses device IDs to map LPIs to a device. Dom0 will later use
those IDs, which we directly pass on to the host.
For this we have to map each device that Dom0 may request to a host
ITS device with the same identifier.
Allocate the respective memory and enter each device into an rbtree to
later be able to iterate over it or to easily teardown guests.
Because device IDs are per ITS, we need to identify a virtual ITS. We
use the doorbell address for that purpose, as it is a nice architectural
MSI property and spares us handling with opaque pointer or break
the VGIC abstraction.

Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
---
 xen/arch/arm/gic-v3-its.c        | 266 +++++++++++++++++++++++++++++++++++++++
 xen/arch/arm/vgic-v3.c           |   4 +
 xen/include/asm-arm/domain.h     |   3 +
 xen/include/asm-arm/gic_v3_its.h |  17 +++
 4 files changed, 290 insertions(+)

diff --git a/xen/arch/arm/gic-v3-its.c b/xen/arch/arm/gic-v3-its.c
index bd189e8..c7c32b9 100644
--- a/xen/arch/arm/gic-v3-its.c
+++ b/xen/arch/arm/gic-v3-its.c
@@ -21,6 +21,8 @@
 #include <xen/lib.h>
 #include <xen/delay.h>
 #include <xen/mm.h>
+#include <xen/rbtree.h>
+#include <xen/sched.h>
 #include <xen/sizes.h>
 #include <asm/gic.h>
 #include <asm/gic_v3_defs.h>
@@ -36,6 +38,26 @@
  */
 LIST_HEAD(host_its_list);

+/*
+ * Describes a device which is using the ITS and is used by a guest.
+ * Since device IDs are per ITS (in contrast to vLPIs, which are per
+ * guest), we have to differentiate between different virtual ITSes.
+ * We use the doorbell address here, since this is a nice architectural
+ * property of MSIs in general and we can easily get to the base address
+ * of the ITS and look that up.
+ */
+struct its_devices {
+    struct rb_node rbnode;
+    struct host_its *hw_its;
+    void *itt_addr;
+    paddr_t guest_doorbell;             /* Identifies the virtual ITS */
+    uint32_t host_devid;
+    uint32_t guest_devid;
+    uint32_t eventids;                  /* Number of event IDs (MSIs) */
+    uint32_t *host_lpi_blocks;          /* Which LPIs are used on the host */
+    struct pending_irq *pend_irqs;      /* One struct per event */
+};
+
 bool gicv3_its_host_has_its(void)
 {
     return !list_empty(&host_its_list);
@@ -170,6 +192,26 @@ static int its_send_cmd_mapc(struct host_its *its, 
uint32_t collection_id,
     return its_send_command(its, cmd);
 }

+static int its_send_cmd_mapd(struct host_its *its, uint32_t deviceid,
+                             uint8_t size_bits, paddr_t itt_addr, bool valid)
+{
+    uint64_t cmd[4];
+
+    if ( valid )
+    {
+        ASSERT(size_bits <= its->evid_bits);

Because of the -1 below, you expect size_bits to always be > 0. So I would add another check here.

+        ASSERT(!(itt_addr & ~GENMASK_ULL(51, 8)));
+    }
+    cmd[0] = GITS_CMD_MAPD | ((uint64_t)deviceid << 32);
+    cmd[1] = size_bits - 1;

I am sorry, I know I suggested this. How about moving size_bits - 1 under the if ( valid ) above?

This would avoid the 1 in remove_mapped_guest_device.

+    cmd[2] = itt_addr;
+    if ( valid )
+        cmd[2] |= GITS_VALID_BIT;
+    cmd[3] = 0x00;
+
+    return its_send_command(its, cmd);
+}
+
 static int its_send_cmd_inv(struct host_its *its,
                             uint32_t deviceid, uint32_t eventid)
 {
@@ -464,6 +506,64 @@ int gicv3_its_init(void)
     return 0;
 }

+static int remove_mapped_guest_device(struct its_devices *dev)
+{
+    int ret = 0;
+    unsigned int i;
+
+    if ( dev->hw_its )
+        /* MAPD also discards all events with this device ID. */
+        ret = its_send_cmd_mapd(dev->hw_its, dev->host_devid, 1, 0, false);
+
+    for ( i = 0; i < DIV_ROUND_UP(dev->eventids, LPI_BLOCK); i++ )
+        gicv3_free_host_lpi_block(dev->host_lpi_blocks[i]);

+    /* Make sure the MAPD command above is really executed. */
+    if ( !ret )
+        ret = gicv3_its_wait_commands(dev->hw_its);
+
+    /* We can't free the ITT memory if the MAPD(V=0) failed for any reason. */
+    if ( !ret )
+        xfree(dev->itt_addr);

That's a leak. How likely this can happen? How do you plan to address this?

+
+    xfree(dev->pend_irqs);
+    xfree(dev->host_lpi_blocks);
+    xfree(dev);
+
+    return 0;
+}
+
+static struct host_its *gicv3_its_find_by_doorbell(paddr_t doorbell_address)
+{
+    struct host_its *hw_its;
+
+    list_for_each_entry(hw_its, &host_its_list, entry)
+    {
+        if ( hw_its->addr + ITS_DOORBELL_OFFSET == doorbell_address )
+            return hw_its;
+    }
+
+    return NULL;
+}
+
+static int compare_its_guest_devices(struct its_devices *dev,
+                                     paddr_t doorbell, uint32_t devid)
+{
+    if ( dev->guest_doorbell < doorbell )
+        return -1;
+
+    if ( dev->guest_doorbell > doorbell )
+        return 1;
+
+    if ( dev->guest_devid < devid )
+        return -1;
+
+    if ( dev->guest_devid > devid )
+        return 1;
+
+    return 0;
+}
+
 /*
  * On the host ITS @its, map @nr_events consecutive LPIs.
  * The mapping connects a device @devid and event @eventid pair to LPI @lpi,
@@ -495,6 +595,172 @@ static int gicv3_its_map_host_events(struct host_its *its,
     return gicv3_its_wait_commands(its);
 }

+/*
+ * Map a hardware device, identified by a certain host ITS and its device ID
+ * to domain d, a guest ITS (identified by its doorbell address) and device ID.
+ * Also provide the number of events (MSIs) needed for that device.
+ * This does not check if this particular hardware device is already mapped
+ * at another domain, it is expected that this would be done by the caller.
+ */
+int gicv3_its_map_guest_device(struct domain *d,
+                               paddr_t host_doorbell, uint32_t host_devid,
+                               paddr_t guest_doorbell, uint32_t guest_devid,
+                               uint32_t nr_events, bool valid)
+{
+    void *itt_addr = NULL;
+    struct host_its *hw_its;
+    struct its_devices *dev = NULL;
+    struct rb_node **new = &d->arch.vgic.its_devices.rb_node, *parent = NULL;
+    unsigned int i;
+    int ret = -ENOENT;

You likely need to sanitize host_devid, nr_events against the hardware values.

+
+    hw_its = gicv3_its_find_by_doorbell(host_doorbell);
+    if ( !hw_its )
+        return ret;
+
+    /* check for already existing mappings */
+    spin_lock(&d->arch.vgic.its_devices_lock);
+    while ( *new )
+    {
+        struct its_devices *temp;
+        int cmp;
+
+        temp = rb_entry(*new, struct its_devices, rbnode);
+
+        parent = *new;
+        cmp = compare_its_guest_devices(temp, guest_doorbell, guest_devid);
+        if ( !cmp )
+        {
+            if ( !valid )
+                rb_erase(&temp->rbnode, &d->arch.vgic.its_devices);
+
+            spin_unlock(&d->arch.vgic.its_devices_lock);
+
+            if ( valid )
+            {
+                gprintk(XENLOG_DEBUG, "d%d: tried to remap guest ITS device 0x%x to 
host device 0x% x\n",
+                        d->domain_id, guest_devid, host_devid);

Please use printk(XENLOG_GUEST XENLOG_DEBUG ...) as you don't care about the domain that calls the function.

+                return -EBUSY;
+            }
+
+            return remove_mapped_guest_device(temp);
+        }
+
+        if ( cmp > 0 )
+            new = &((*new)->rb_left);
+        else
+            new = &((*new)->rb_right);
+    }
+
+    if ( !valid )
+        goto out_unlock;
+
+    ret = -ENOMEM;
+
+    /* An Interrupt Translation Table needs to be 256-byte aligned. */
+    itt_addr = _xzalloc(nr_events * hw_its->itte_size, 256);

The MAPD command will use the number of event bits, so you need to allocate (1 << N) * hw_its->itte_size

When you do the computation, the number of events has to be aligned to 32 to prevent any memory corruption.

This is because a guest MAPD could contain Size = 1 (i.e 2 events) but you will further down align to a minimum of 32.

Note this is the bug reported by Vijay.

+    if ( !itt_addr )
+        goto out_unlock;
+
+    dev = xzalloc(struct its_devices);
+    if ( !dev )
+        goto out_unlock;
+
+    /*
+     * Allocate the pending_irqs for each virtual LPI. They will be put
+     * into the domain's radix tree upon the guest's MAPTI command.
+     */
+    dev->pend_irqs = xzalloc_array(struct pending_irq, nr_events);
+    if ( !dev->pend_irqs )
+        goto out_unlock;
+
+    dev->host_lpi_blocks = xzalloc_array(uint32_t,
+                                         DIV_ROUND_UP(nr_events, LPI_BLOCK));

I still think it would make easier to round_up the number of events at the top of the function.

+    if ( !dev->host_lpi_blocks )
+        goto out_unlock;
+
+    ret = its_send_cmd_mapd(hw_its, host_devid,
+                            fls(ROUNDUP(nr_events, LPI_BLOCK) - 1),

See my comment above about the number of event bits.

+                            virt_to_maddr(itt_addr), true);
+    if ( ret )
+        goto out_unlock;
+
+    dev->itt_addr = itt_addr;
+    dev->hw_its = hw_its;
+    dev->guest_doorbell = guest_doorbell;
+    dev->guest_devid = guest_devid;
+    dev->host_devid = host_devid;
+    dev->eventids = nr_events;
+
+    rb_link_node(&dev->rbnode, parent, new);
+    rb_insert_color(&dev->rbnode, &d->arch.vgic.its_devices);
+
+    spin_unlock(&d->arch.vgic.its_devices_lock);
+
+    /*
+     * Map all host LPIs within this device already. We can't afford to queue
+     * any host ITS commands later on during the guest's runtime.
+     */
+    for ( i = 0; i < DIV_ROUND_UP(nr_events, LPI_BLOCK); i++ )

It is the second time in this function you do DIV_ROUND_UP. Please introduce a variable to make easier to read and avoid potential mistake.

+    {
+        ret = gicv3_allocate_host_lpi_block(d, &dev->host_lpi_blocks[i]);
+        if ( ret < 0 )
+            goto out;
+
+        ret = gicv3_its_map_host_events(hw_its, host_devid, i * LPI_BLOCK,
+                                        dev->host_lpi_blocks[i], LPI_BLOCK);
+        if ( ret < 0 )
+            goto out;
+    }
+
+    return 0;
+
+out_unlock:
+    spin_unlock(&d->arch.vgic.its_devices_lock);
+
+out:

There are a lot of things missing in the error path:
        - Free LPI block allocated
        - Send MAPD (V=0) command.

+    if ( dev )
+    {
+        xfree(dev->pend_irqs);
+        xfree(dev->host_lpi_blocks);
+    }
+    xfree(itt_addr);
+    xfree(dev);

NIT: newline here.

+    return ret;
+}
+
+/* Removing any connections a domain had to any ITS in the system. */
+void gicv3_its_unmap_all_devices(struct domain *d)
+{
+    struct rb_node *victim;
+    struct its_devices *dev;
+
+    /*
+     * This is an easily readable, but sub-optimal implementation.
+     * It uses the provided iteration wrapper and erases each node, which
+     * possibly triggers rebalancing.
+     * This seems overkill since we are going to abolish the whole tree, but
+     * avoids an open-coded re-implementation of the traversal functions with
+     * some recursive function calls.
+     */

I still maintain my point on this function. I would expect an upper layer to remove the devices on by one during the relinquish period (see domain_relinquish_resources). So we can report error back and allow preemption.

+    do {
+        spin_lock(&d->arch.vgic.its_devices_lock);
+
+        if ( (victim = rb_first(&d->arch.vgic.its_devices)) )
+        {
+
+            dev = rb_entry(victim, struct its_devices, rbnode);
+            rb_erase(victim, &d->arch.vgic.its_devices);
+
+            spin_unlock(&d->arch.vgic.its_devices_lock);
+
+            remove_mapped_guest_device(dev);

You likely need to propagate the return here.

+        }
+    } while ( victim );
+
+    spin_unlock(&d->arch.vgic.its_devices_lock);
+}
+
 /* Scan the DT for any ITS nodes and create a list of host ITSes out of it. */
 void gicv3_its_dt_init(const struct dt_device_node *node)
 {
diff --git a/xen/arch/arm/vgic-v3.c b/xen/arch/arm/vgic-v3.c
index 0679e76..3c7161b 100644
--- a/xen/arch/arm/vgic-v3.c
+++ b/xen/arch/arm/vgic-v3.c
@@ -1449,6 +1449,9 @@ static int vgic_v3_domain_init(struct domain *d)
     d->arch.vgic.nr_regions = rdist_count;
     d->arch.vgic.rdist_regions = rdist_regions;

+    spin_lock_init(&d->arch.vgic.its_devices_lock);
+    d->arch.vgic.its_devices = RB_ROOT;

I continue to maintain this is the wrong place for those 2 variables. ITS should be separated from the rest of the GICv3.

+
     /*
      * Domain 0 gets the hardware address.
      * Guests get the virtual platform layout.
@@ -1521,6 +1524,7 @@ static int vgic_v3_domain_init(struct domain *d)

 static void vgic_v3_domain_free(struct domain *d)
 {
+    gicv3_its_unmap_all_devices(d);

This is likely the wrong place to call as you would need to report back error or potential preemption. This would have to be called in domain_relinquish_resources.

This is also a call for another gic_ops callback.

Cheers,

--
Julien Grall

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
https://lists.xen.org/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.