|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [Xen-devel] [PATCH v4 11/28] x86/vvtd: Process interrupt remapping request
On Fri, Nov 17, 2017 at 02:22:18PM +0800, Chao Gao wrote:
> When a remapping interrupt request arrives, remapping hardware computes the
> interrupt_index per the algorithm described in VTD spec
> "Interrupt Remapping Table", interprets the IRTE and generates a remapped
> interrupt request.
>
> This patch introduces viommu_handle_irq_request() to emulate the process how
> remapping hardware handles a remapping interrupt request. This patch
> also introduces a counter inflight_intr, which is used to count the number
> of interrupt are being handled. The reason why we should have this
> counter is VT-d hardware should drain in-flight interrups before setting
> flags to show that some operations are completed. These operations
> include enabling interrupt remapping and performing a kind of invalidation
> requests. In vvtd, we also try to drain in-flight interrupts by waiting
> the inflight_intr is decreased to 0.
>
> Signed-off-by: Chao Gao <chao.gao@xxxxxxxxx>
> Signed-off-by: Lan Tianyu <tianyu.lan@xxxxxxxxx>
>
> ---
> v4:
> - use "#define" to define interrupt remapping transition faults
> rather than using an enum
> - use switch-case rather than if-else in irq_remapping_request_index()
> and vvtd_irq_request_sanity_check()
> - introduce a counter inflight_intr
>
> v3:
> - Encode map_guest_page()'s error into void* to avoid using another parameter
> ---
> xen/drivers/passthrough/vtd/iommu.h | 15 +++
> xen/drivers/passthrough/vtd/vvtd.c | 219
> ++++++++++++++++++++++++++++++++++++
> 2 files changed, 234 insertions(+)
>
> diff --git a/xen/drivers/passthrough/vtd/iommu.h
> b/xen/drivers/passthrough/vtd/iommu.h
> index 9c59aeb..82edd2a 100644
> --- a/xen/drivers/passthrough/vtd/iommu.h
> +++ b/xen/drivers/passthrough/vtd/iommu.h
> @@ -216,6 +216,15 @@
> #define dma_frcd_source_id(c) (c & 0xffff)
> #define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
>
> +/* Interrupt remapping transition faults */
> +#define VTD_FR_IR_REQ_RSVD 0x20
> +#define VTD_FR_IR_INDEX_OVER 0x21
> +#define VTD_FR_IR_ENTRY_P 0x22
> +#define VTD_FR_IR_ROOT_INVAL 0x23
> +#define VTD_FR_IR_IRTE_RSVD 0x24
> +#define VTD_FR_IR_REQ_COMPAT 0x25
> +#define VTD_FR_IR_SID_ERR 0x26
> +
> /*
> * 0: Present
> * 1-11: Reserved
> @@ -356,6 +365,12 @@ struct iremap_entry {
> };
>
> /*
> + * When VT-d doesn't enable extended interrupt mode, hardware interprets
> + * 8-bits ([15:8]) of Destination-ID field in the IRTEs.
> + */
> +#define IRTE_xAPIC_DEST_MASK 0xff00
> +
> +/*
> * Posted-interrupt descriptor address is 64 bits with 64-byte aligned, only
> * the upper 26 bits of lest significiant 32 bits is available.
> */
> diff --git a/xen/drivers/passthrough/vtd/vvtd.c
> b/xen/drivers/passthrough/vtd/vvtd.c
> index 06e522a..927e715 100644
> --- a/xen/drivers/passthrough/vtd/vvtd.c
> +++ b/xen/drivers/passthrough/vtd/vvtd.c
> @@ -22,11 +22,15 @@
> #include <xen/types.h>
> #include <xen/viommu.h>
> #include <xen/xmalloc.h>
> +#include <asm/apic.h>
> #include <asm/current.h>
> +#include <asm/event.h>
> +#include <asm/io_apic.h>
> #include <asm/hvm/domain.h>
> #include <asm/p2m.h>
>
> #include "iommu.h"
> +#include "vtd.h"
>
> /* Supported capabilities by vvtd */
> #define VVTD_MAX_CAPS VIOMMU_CAP_IRQ_REMAPPING
> @@ -52,6 +56,8 @@ struct vvtd {
> uint64_t base_addr;
> /* Point back to the owner domain */
> struct domain *domain;
> + /* # of in-flight interrupts */
> + atomic_t inflight_intr;
>
> struct hvm_hw_vvtd hw;
> void *irt_base;
> @@ -181,6 +187,109 @@ static void unmap_guest_pages(void *va, uint32_t nr)
> put_page_and_type(mfn_to_page(mfn[i]));
> }
>
> +static int vvtd_delivery(struct domain *d, uint8_t vector,
> + uint32_t dest, bool dest_mode,
> + uint8_t delivery_mode, uint8_t trig_mode)
> +{
> + struct vlapic *target;
> + struct vcpu *v;
> +
> + switch ( delivery_mode )
> + {
> + case dest_LowestPrio:
> + target = vlapic_lowest_prio(d, NULL, 0, dest, dest_mode);
> + if ( target != NULL )
> + {
> + vvtd_debug("d%d: dest=v%d dlm=%x vector=%d trig_mode=%d\n",
> + vlapic_domain(target)->domain_id,
> + vlapic_vcpu(target)->vcpu_id,
> + delivery_mode, vector, trig_mode);
> + vlapic_set_irq(target, vector, trig_mode);
> + break;
> + }
> + vvtd_debug("d%d: null round robin: vector=%02x\n",
> + d->domain_id, vector);
> + break;
> +
> + case dest_Fixed:
> + for_each_vcpu ( d, v )
> + if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode)
> )
> + {
> + vvtd_debug("d%d: dest=v%d dlm=%x vector=%d trig_mode=%d\n",
> + v->domain->domain_id, v->vcpu_id,
> + delivery_mode, vector, trig_mode);
> + vlapic_set_irq(vcpu_vlapic(v), vector, trig_mode);
> + }
> + break;
> +
> + case dest_NMI:
> + for_each_vcpu ( d, v )
> + if ( vlapic_match_dest(vcpu_vlapic(v), NULL, 0, dest, dest_mode)
> &&
> + !test_and_set_bool(v->nmi_pending) )
> + vcpu_kick(v);
Doing this loops here seems quite bad from a preformance PoV,
specially taking into account that this code is going to be used with
> 128 vCPUs.
> + break;
> +
> + default:
> + gdprintk(XENLOG_WARNING, "Unsupported VTD delivery mode %d\n",
> + delivery_mode);
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> +
> +/* Computing the IRTE index for a given interrupt request. When success,
> return
> + * 0 and set index to reference the corresponding IRTE. Otherwise, return <
> 0,
> + * i.e. -1 when the irq request isn't an remapping format.
> + */
> +static int irq_remapping_request_index(
> + const struct arch_irq_remapping_request *irq, uint32_t *index)
> +{
> + switch ( irq->type )
> + {
> + case VIOMMU_REQUEST_IRQ_MSI:
> + {
> + struct msi_msg_remap_entry msi_msg =
> + {
> + .address_lo = { .val = irq->msg.msi.addr },
Can't you just use .address_lo.val = irq->...
> + .data = irq->msg.msi.data,
> + };
> +
> + if ( !msi_msg.address_lo.format )
> + return -1;
In all the other functions you already return some kind of meaningful
error code, please do so here also.
> +
> + *index = (msi_msg.address_lo.index_15 << 15) +
> + msi_msg.address_lo.index_0_14;
> + if ( msi_msg.address_lo.SHV )
> + *index += (uint16_t)msi_msg.data;
> + break;
> + }
> +
> + case VIOMMU_REQUEST_IRQ_APIC:
> + {
> + struct IO_APIC_route_remap_entry remap_rte = { .val = irq->msg.rte };
> +
> + if ( !remap_rte.format )
> + return -1;
> +
> + *index = (remap_rte.index_15 << 15) + remap_rte.index_0_14;
> + break;
> + }
> +
> + default:
> + ASSERT_UNREACHABLE();
> + }
> +
> + return 0;
> +}
> +
> +static inline uint32_t irte_dest(struct vvtd *vvtd, uint32_t dest)
> +{
> + /* In xAPIC mode, only 8-bits([15:8]) are valid */
> + return vvtd->hw.eim_enabled ? dest
> + : MASK_EXTR(dest, IRTE_xAPIC_DEST_MASK);
> +}
> +
> static void write_gcmd_ire(struct vvtd *vvtd, uint32_t val)
> {
> bool set = val & DMA_GCMD_IRE;
> @@ -323,6 +432,115 @@ static const struct hvm_mmio_ops vvtd_mmio_ops = {
> .write = vvtd_write
> };
>
> +static void vvtd_handle_fault(struct vvtd *vvtd,
> + const struct arch_irq_remapping_request *irq,
> + struct iremap_entry *irte,
> + unsigned int fault)
> +{
> + switch ( fault )
> + {
> + case VTD_FR_IR_SID_ERR:
> + case VTD_FR_IR_IRTE_RSVD:
> + case VTD_FR_IR_ENTRY_P:
> + if ( qinval_fault_disable(*irte) )
> + break;
> + /* fall through */
> + case VTD_FR_IR_REQ_RSVD:
> + case VTD_FR_IR_INDEX_OVER:
> + case VTD_FR_IR_ROOT_INVAL:
> + /* TODO: handle fault (e.g. record and report this fault to VM */
> + break;
> +
> + default:
> + vvtd_debug("d%d can't handle VT-d fault %x\n",
> vvtd->domain->domain_id,
> + fault);
> + }
> + return;
> +}
> +
> +static bool vvtd_irq_request_sanity_check(const struct vvtd *vvtd,
> + const struct arch_irq_remapping_request
> *irq)
> +{
> + switch ( irq->type )
> + {
> + case VIOMMU_REQUEST_IRQ_APIC:
> + {
> + struct IO_APIC_route_remap_entry rte = { .val = irq->msg.rte };
> +
> + return !rte.reserved;
> + }
> +
> + case VIOMMU_REQUEST_IRQ_MSI:
> + return true;
> + }
> +
> + ASSERT_UNREACHABLE();
> + return false;
> +}
> +
> +static int vvtd_get_entry(struct vvtd *vvtd,
> + const struct arch_irq_remapping_request *irq,
> + struct iremap_entry *dest)
const for both vvtd and dest?
> +{
> + uint32_t entry;
> + struct iremap_entry irte;
> + int ret = irq_remapping_request_index(irq, &entry);
> +
> + ASSERT(!ret);
> +
> + vvtd_debug("d%d: interpret a request with index %x\n",
> + vvtd->domain->domain_id, entry);
> +
> + if ( !vvtd_irq_request_sanity_check(vvtd, irq) )
> + return VTD_FR_IR_REQ_RSVD;
> + else if ( entry > vvtd->hw.irt_max_entry )
> + return VTD_FR_IR_INDEX_OVER;
> + else if ( !vvtd->irt_base )
No need for the 'else', since you are already using return.
> + return VTD_FR_IR_ROOT_INVAL;
> +
> + irte = ((struct iremap_entry*)vvtd->irt_base)[entry];
> +
> + if ( !qinval_present(irte) )
> + ret = VTD_FR_IR_ENTRY_P;
> + else if ( (irte.remap.res_1 || irte.remap.res_2 || irte.remap.res_3 ||
> + irte.remap.res_4) )
> + ret = VTD_FR_IR_IRTE_RSVD;
> +
> + /* FIXME: We don't check against the source ID */
> +
> + dest->val = irte.val;
> +
> + return ret;
> +}
> +
> +static int vvtd_handle_irq_request(const struct domain *d,
constifying domain here is not the best practice IMHO. In the function
you are actually modifying vvtd, which is fine because it's a pointer
but it's conceptually inside of domain.
> + const struct arch_irq_remapping_request
> *irq)
> +{
> + struct iremap_entry irte;
> + int ret;
> + struct vvtd *vvtd = domain_vvtd(d);
> +
> + if ( !vvtd || !vvtd->hw.intremap_enabled )
> + return -ENODEV;
> +
> + atomic_inc(&vvtd->inflight_intr);
> + ret = vvtd_get_entry(vvtd, irq, &irte);
> + if ( ret )
> + {
> + vvtd_handle_fault(vvtd, irq, &irte, ret);
> + goto out;
> + }
> +
> + ret = vvtd_delivery(vvtd->domain, irte.remap.vector,
> + irte_dest(vvtd, irte.remap.dst),
> + irte.remap.dm, irte.remap.dlm,
> + irte.remap.tm);
> +
> + out:
> + atomic_dec(&vvtd->inflight_intr);
So inflight_intr seem to be quite pointless, you only use it in this
function and it's never read AFAICT.
Thanks, Roger.
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |