|
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index] Re: [PATCH v3 5/7] x86/dmop: Add XEN_DMOP_{bind,unbind}_pt_msi_irq DM ops
On Mon, Mar 09, 2026 at 12:31:03PM +0000, Julian Vetter wrote:
> Add two DM ops for MSI passthrough IRQs. These new DM ops take the raw
> MSI address and data fields rather than a pre-decoded gflags values. Xen
> decodes the destination ID via msi_addr_to_gflags(), including any
> extended destination bits in address[11:5]. This means the device model
> does not need to understand the extended destination ID encoding, and
> simply forwards the MSI address it observes from the guest.
>
> Signed-off-by: Julian Vetter <julian.vetter@xxxxxxxxxx>
> ---
> Changes in V3:
> - New patch addressing feedback from Roger
> ---
> tools/include/xendevicemodel.h | 37 ++++++++++++
> tools/libs/devicemodel/core.c | 44 ++++++++++++++
> xen/arch/x86/hvm/dm.c | 102 +++++++++++++++++++++++++++++++++
> xen/include/public/hvm/dm_op.h | 23 ++++++++
> xen/include/xlat.lst | 1 +
> 5 files changed, 207 insertions(+)
>
> diff --git a/tools/include/xendevicemodel.h b/tools/include/xendevicemodel.h
> index 227e7fd810..0d5d7b0ff1 100644
> --- a/tools/include/xendevicemodel.h
> +++ b/tools/include/xendevicemodel.h
> @@ -375,6 +375,43 @@ int xendevicemodel_nr_vcpus(
> */
> int xendevicemodel_restrict(xendevicemodel_handle *dmod, domid_t domid);
>
> +/**
> + * This function binds a passthrough physical IRQ to a guest MSI vector
> + * using raw MSI address/data fields. Unlike XEN_DOMCTL_bind_pt_irq,
> + * this interface supports extended (15-bit) destination IDs by having
> + * Xen decode the MSI address internally.
> + *
> + * @parm dmod a handle to an open devicemodel interface.
> + * @parm domid the domain id to be serviced.
> + * @parm machine_irq the physical IRQ number (pirq).
> + * @parm gvec the guest interrupt vector.
> + * @parm msi_addr the MSI address (0xfeexxxxx, includes extended dest ID).
> + * @parm msi_data the MSI data (vector, delivery mode, trigger mode).
> + * @parm gtable the MSI-X table base GFN, or 0 for plain MSI.
> + * @parm unmasked if non-zero, leave the IRQ unmasked after binding.
> + * @return 0 on success, -1 on failure.
> + */
> +int xendevicemodel_bind_pt_msi_irq(
> + xendevicemodel_handle *dmod, domid_t domid, uint32_t machine_irq,
> + uint8_t gvec, uint64_t msi_addr, uint32_t msi_data, uint64_t gtable,
> + int unmasked);
> +
> +/**
> + * This function unbinds a passthrough physical IRQ previously bound
> + * with xendevicemodel_bind_pt_msi_irq.
> + *
> + * @parm dmod a handle to an open devicemodel interface.
> + * @parm domid the domain id to be serviced.
> + * @parm machine_irq the physical IRQ number (pirq).
> + * @parm gvec the guest interrupt vector.
> + * @parm msi_addr the MSI address.
> + * @parm msi_data the MSI data.
> + * @return 0 on success, -1 on failure.
> + */
> +int xendevicemodel_unbind_pt_msi_irq(
> + xendevicemodel_handle *dmod, domid_t domid, uint32_t machine_irq,
> + uint8_t gvec, uint64_t msi_addr, uint32_t msi_data);
> +
> #endif /* XENDEVICEMODEL_H */
>
> /*
> diff --git a/tools/libs/devicemodel/core.c b/tools/libs/devicemodel/core.c
> index 8e619eeb0a..4a52fe4750 100644
> --- a/tools/libs/devicemodel/core.c
> +++ b/tools/libs/devicemodel/core.c
> @@ -645,6 +645,50 @@ int xendevicemodel_nr_vcpus(
> return 0;
> }
>
> +int xendevicemodel_bind_pt_msi_irq(
> + xendevicemodel_handle *dmod, domid_t domid, uint32_t machine_irq,
> + uint8_t gvec, uint64_t msi_addr, uint32_t msi_data, uint64_t gtable,
> + int unmasked)
> +{
> + struct xen_dm_op op;
> + struct xen_dm_op_bind_pt_msi_irq *data;
> +
> + memset(&op, 0, sizeof(op));
> +
> + op.op = XEN_DMOP_bind_pt_msi_irq;
> + data = &op.u.bind_pt_msi_irq;
> +
> + data->machine_irq = machine_irq;
> + data->gvec = gvec;
> + data->data = msi_data;
> + data->addr = msi_addr;
> + data->gtable = gtable;
> + if ( unmasked )
> + data->flags |= XEN_DMOP_MSI_FLAG_UNMASKED;
> +
> + return xendevicemodel_op(dmod, domid, 1, &op, sizeof(op));
> +}
> +
> +int xendevicemodel_unbind_pt_msi_irq(
> + xendevicemodel_handle *dmod, domid_t domid, uint32_t machine_irq,
> + uint8_t gvec, uint64_t msi_addr, uint32_t msi_data)
> +{
> + struct xen_dm_op op;
> + struct xen_dm_op_bind_pt_msi_irq *data;
> +
> + memset(&op, 0, sizeof(op));
> +
> + op.op = XEN_DMOP_unbind_pt_msi_irq;
> + data = &op.u.bind_pt_msi_irq;
> +
> + data->machine_irq = machine_irq;
> + data->gvec = gvec;
> + data->data = msi_data;
> + data->addr = msi_addr;
> +
> + return xendevicemodel_op(dmod, domid, 1, &op, sizeof(op));
> +}
> +
> int xendevicemodel_restrict(xendevicemodel_handle *dmod, domid_t domid)
> {
> return osdep_xendevicemodel_restrict(dmod, domid);
> diff --git a/xen/arch/x86/hvm/dm.c b/xen/arch/x86/hvm/dm.c
> index 3b53471af0..3d530d948f 100644
> --- a/xen/arch/x86/hvm/dm.c
> +++ b/xen/arch/x86/hvm/dm.c
> @@ -7,12 +7,15 @@
> #include <xen/guest_access.h>
> #include <xen/dm.h>
> #include <xen/hypercall.h>
> +#include <xen/iocap.h>
> +#include <xen/iommu.h>
> #include <xen/ioreq.h>
> #include <xen/nospec.h>
> #include <xen/sched.h>
>
> #include <asm/hap.h>
> #include <asm/hvm/cacheattr.h>
> +#include <asm/msi.h>
> #include <asm/shadow.h>
>
> #include <xsm/xsm.h>
> @@ -322,6 +325,25 @@ static int inject_event(struct domain *d,
> return 0;
> }
>
> +static uint32_t msi_addr_to_gflags(uint64_t addr, uint32_t data, bool masked)
> +{
> + uint32_t tmp = (uint32_t)addr;
> +
> + return MASK_INSR(MASK_EXTR(tmp, MSI_ADDR_DEST_ID_MASK),
> + XEN_DOMCTL_VMSI_X86_DEST_ID_MASK) |
> + MASK_INSR(MASK_EXTR(tmp, MSI_ADDR_EXT_DEST_ID_MASK),
> + XEN_DOMCTL_VMSI_X86_EXT_DEST_ID_MASK) |
> + MASK_INSR(MASK_EXTR(tmp, MSI_ADDR_REDIRECTION_MASK),
> + XEN_DOMCTL_VMSI_X86_RH_MASK) |
> + MASK_INSR(MASK_EXTR(tmp, MSI_ADDR_DESTMODE_MASK),
> + XEN_DOMCTL_VMSI_X86_DM_MASK) |
> + MASK_INSR(MASK_EXTR(data, MSI_DATA_DELIVERY_MODE_MASK),
> + XEN_DOMCTL_VMSI_X86_DELIV_MASK) |
> + MASK_INSR(MASK_EXTR(data, MSI_DATA_TRIGGER_MASK),
> + XEN_DOMCTL_VMSI_X86_TRIG_MASK) |
> + (masked ? 0 : XEN_DOMCTL_VMSI_X86_UNMASKED);
> +}
> +
> int dm_op(const struct dmop_args *op_args)
> {
> struct domain *d;
> @@ -350,6 +372,8 @@ int dm_op(const struct dmop_args *op_args)
> [XEN_DMOP_relocate_memory] = sizeof(struct
> xen_dm_op_relocate_memory),
> [XEN_DMOP_pin_memory_cacheattr] = sizeof(struct
> xen_dm_op_pin_memory_cacheattr),
> [XEN_DMOP_nr_vcpus] = sizeof(struct
> xen_dm_op_nr_vcpus),
> + [XEN_DMOP_bind_pt_msi_irq] = sizeof(struct
> xen_dm_op_bind_pt_msi_irq),
> + [XEN_DMOP_unbind_pt_msi_irq] = sizeof(struct
> xen_dm_op_bind_pt_msi_irq),
> };
>
> rc = rcu_lock_remote_domain_by_id(op_args->domid, &d);
> @@ -607,6 +631,83 @@ int dm_op(const struct dmop_args *op_args)
> break;
> }
>
> + case XEN_DMOP_bind_pt_msi_irq:
> + {
> + const struct xen_dm_op_bind_pt_msi_irq *data =
> + &op.u.bind_pt_msi_irq;
> + struct xen_domctl_bind_pt_irq bind = {
> + .machine_irq = data->machine_irq,
> + .irq_type = PT_IRQ_TYPE_MSI,
> + };
> + int irq;
> +
> + rc = -EINVAL;
> + if ( data->pad0 || data->pad1 )
> + break;
> +
> + if ( data->flags & ~XEN_DMOP_MSI_FLAG_UNMASKED )
> + break;
> +
> + irq = domain_pirq_to_irq(d, bind.machine_irq);
> +
> + rc = -EPERM;
> + if ( irq <= 0 || !irq_access_permitted(current->domain, irq) )
> + break;
> +
> + bind.u.msi.gvec = data->gvec;
> + bind.u.msi.gflags =
> + msi_addr_to_gflags(data->addr, data->data,
> + !(data->flags & XEN_DMOP_MSI_FLAG_UNMASKED));
> + bind.u.msi.gtable = data->gtable;
Retrofitting the new interface into the old one seems weird. I would
do it the other way around - implement the old bind domctl on top of
an interface that's more suited for the new DM op.
That way you avoid having to expand gflags with extended destination
field.
> +
> + rc = -ESRCH;
> + if ( is_iommu_enabled(d) )
> + {
> + pcidevs_lock();
> + rc = pt_irq_create_bind(d, &bind);
> + pcidevs_unlock();
> + }
> + if ( rc < 0 )
> + printk(XENLOG_G_ERR
> + "pt_irq_create_bind failed (%ld) for dom%d\n",
> + rc, d->domain_id);
> + break;
> + }
> +
> + case XEN_DMOP_unbind_pt_msi_irq:
> + {
> + const struct xen_dm_op_bind_pt_msi_irq *data =
> + &op.u.bind_pt_msi_irq;
> + struct xen_domctl_bind_pt_irq bind = {
> + .machine_irq = data->machine_irq,
> + .irq_type = PT_IRQ_TYPE_MSI,
> + };
> + int irq;
> +
> + rc = -EINVAL;
> + if ( data->pad0 || data->pad1 )
> + break;
> +
> + irq = domain_pirq_to_irq(d, bind.machine_irq);
> +
> + rc = -EPERM;
> + if ( irq <= 0 || !irq_access_permitted(current->domain, irq) )
> + break;
> +
> + rc = -ESRCH;
> + if ( is_iommu_enabled(d) )
> + {
> + pcidevs_lock();
> + rc = pt_irq_destroy_bind(d, &bind);
> + pcidevs_unlock();
> + }
> + if ( rc < 0 )
> + printk(XENLOG_G_ERR
> + "pt_irq_destroy_bind failed (%ld) for dom%d\n",
> + rc, d->domain_id);
> + break;
> + }
> +
> default:
> rc = ioreq_server_dm_op(&op, d, &const_op);
> break;
> @@ -643,6 +744,7 @@ CHECK_dm_op_remote_shutdown;
> CHECK_dm_op_relocate_memory;
> CHECK_dm_op_pin_memory_cacheattr;
> CHECK_dm_op_nr_vcpus;
> +CHECK_dm_op_bind_pt_msi_irq;
>
> int compat_dm_op(
> domid_t domid, unsigned int nr_bufs, XEN_GUEST_HANDLE_PARAM(void) bufs)
> diff --git a/xen/include/public/hvm/dm_op.h b/xen/include/public/hvm/dm_op.h
> index 2bf0fdc1ae..fd0f3a6a99 100644
> --- a/xen/include/public/hvm/dm_op.h
> +++ b/xen/include/public/hvm/dm_op.h
> @@ -444,6 +444,28 @@ struct xen_dm_op_nr_vcpus {
> };
> typedef struct xen_dm_op_nr_vcpus xen_dm_op_nr_vcpus_t;
>
> +#define XEN_DMOP_bind_pt_msi_irq 21
> +#define XEN_DMOP_unbind_pt_msi_irq 22
> +
> +struct xen_dm_op_bind_pt_msi_irq {
> + /* IN - physical IRQ (pirq) */
> + uint32_t machine_irq;
> + /* IN - guest vector */
> + uint8_t gvec;
If you pass the address and data MSI fields there's no need to also
pass the vector, this can be obtained by Xen from the MSI fields?
> + uint8_t pad0;
> + uint16_t pad1;
> + /* IN - MSI data (vector, delivery, trigger) */
> + uint32_t data;
> + /* IN - flags */
> + uint32_t flags;
> +#define XEN_DMOP_MSI_FLAG_UNMASKED (1u << 0)
> + /* IN - MSI address (0xfeexxxxx, includes ext dest) */
> + uint64_aligned_t addr;
> + /* IN - MSI-X table base GFN, 0 for MSI */
> + uint64_aligned_t gtable;
> +};
> +typedef struct xen_dm_op_bind_pt_msi_irq xen_dm_op_bind_pt_msi_irq_t;
> +
> struct xen_dm_op {
> uint32_t op;
> uint32_t pad;
> @@ -468,6 +490,7 @@ struct xen_dm_op {
> xen_dm_op_relocate_memory_t relocate_memory;
> xen_dm_op_pin_memory_cacheattr_t pin_memory_cacheattr;
> xen_dm_op_nr_vcpus_t nr_vcpus;
> + xen_dm_op_bind_pt_msi_irq_t bind_pt_msi_irq;
> } u;
> };
>
> diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst
> index 9d08dcc4bb..9236394786 100644
> --- a/xen/include/xlat.lst
> +++ b/xen/include/xlat.lst
> @@ -107,6 +107,7 @@
> ? dm_op_map_mem_type_to_ioreq_server hvm/dm_op.h
> ? dm_op_modified_memory hvm/dm_op.h
> ? dm_op_nr_vcpus hvm/dm_op.h
> +? dm_op_bind_pt_msi_irq hvm/dm_op.h
> ? dm_op_pin_memory_cacheattr hvm/dm_op.h
> ? dm_op_relocate_memory hvm/dm_op.h
> ? dm_op_remote_shutdown hvm/dm_op.h
> --
> 2.51.0
>
>
>
> --
> Julian Vetter | Vates Hypervisor & Kernel Developer
>
> XCP-ng & Xen Orchestra - Vates solutions
>
> web: https://vates.tech
>
|
![]() |
Lists.xenproject.org is hosted with RackSpace, monitoring our |