[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v3 5/7] x86/dmop: Add XEN_DMOP_{bind,unbind}_pt_msi_irq DM ops


  • To: Julian Vetter <julian.vetter@xxxxxxxxxx>
  • From: Roger Pau Monné <roger.pau@xxxxxxxxxx>
  • Date: Mon, 9 Mar 2026 14:38:34 +0100
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=citrix.com; dmarc=pass action=none header.from=citrix.com; dkim=pass header.d=citrix.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector10001; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=N3Kjmze1PvonsWyS6ZRnDjxJM008BAiAB/e5vCFFx6s=; b=gvZ1Cd5o/KOKbvMJxIGcvLjLlrQ3aHXEYrhn3C9YJmtnwbZMAHcFP2SGtrlJvVv9ynSzWsCJag1CgE7Trisi6+SI10SskfFku9qZd/0iFOfO0ZbOdT3fBlbs1n8Js/lXIuuCfWjmM0tMKydYUJRtaiG6LEeF8kG0wztUBoqZcK8wJfIITq9Z//dxBx6A9NyW3YIErdRcT+gl4yjpklwV2cbmfCAfMOXJK9Dng5mQs1MCuA/ZZNizHUiGKhM5yMSRdc7Sj4OnGBU1stsRnf22ps10SAsGwajgN3M9PRSR6iAL4awW0lW6r0/G0K+4ykHx4kQ4U8KVVgYGCIeE9V8W4w==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector10001; d=microsoft.com; cv=none; b=Jhp+xVVhAZo2OX+UivxzzwGfZOmP9aacCe5it+UQEt3ShqeRA71Dvq6NDCtmJmaSZQozDuxT3Jfc20oGGF9X02PvykoTGGve3O/STervJ1t7eL6u86SmXFAbNaG75v+cjm1JTQraJuts9DSS3UATc5EB+9mAMkpg10rUWyBqGRBCMuNz4yi/JYQEIUxzWciJDy/jyV1jUvomfWbK5UWc+UZ3kfBV5V/dMfCrCXcYIcr9o4NNyKMkPUONtovToBlmYQPddDgWvmh4XCuKDdEe6DCE9q0cbz4hHh4LoAxtg1XRCxBXcuZkQOPtnVil6Ia19p/FlFJf5EBveBtRx9D/Zg==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=citrix.com;
  • Cc: xen-devel@xxxxxxxxxxxxxxxxxxxx, Jan Beulich <jbeulich@xxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Anthony PERARD <anthony.perard@xxxxxxxxxx>, Michal Orzel <michal.orzel@xxxxxxx>, Stefano Stabellini <sstabellini@xxxxxxxxxx>, Juergen Gross <jgross@xxxxxxxx>, Julien Grall <julien@xxxxxxx>
  • Delivery-date: Mon, 09 Mar 2026 13:38:51 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>

On Mon, Mar 09, 2026 at 12:31:03PM +0000, Julian Vetter wrote:
> Add two DM ops for MSI passthrough IRQs. These new DM ops take the raw
> MSI address and data fields rather than a pre-decoded gflags values. Xen
> decodes the destination ID via msi_addr_to_gflags(), including any
> extended destination bits in address[11:5]. This means the device model
> does not need to understand the extended destination ID encoding, and
> simply forwards the MSI address it observes from the guest.
> 
> Signed-off-by: Julian Vetter <julian.vetter@xxxxxxxxxx>
> ---
> Changes in V3:
> - New patch addressing feedback from Roger
> ---
>  tools/include/xendevicemodel.h |  37 ++++++++++++
>  tools/libs/devicemodel/core.c  |  44 ++++++++++++++
>  xen/arch/x86/hvm/dm.c          | 102 +++++++++++++++++++++++++++++++++
>  xen/include/public/hvm/dm_op.h |  23 ++++++++
>  xen/include/xlat.lst           |   1 +
>  5 files changed, 207 insertions(+)
> 
> diff --git a/tools/include/xendevicemodel.h b/tools/include/xendevicemodel.h
> index 227e7fd810..0d5d7b0ff1 100644
> --- a/tools/include/xendevicemodel.h
> +++ b/tools/include/xendevicemodel.h
> @@ -375,6 +375,43 @@ int xendevicemodel_nr_vcpus(
>   */
>  int xendevicemodel_restrict(xendevicemodel_handle *dmod, domid_t domid);
>  
> +/**
> + * This function binds a passthrough physical IRQ to a guest MSI vector
> + * using raw MSI address/data fields. Unlike XEN_DOMCTL_bind_pt_irq,
> + * this interface supports extended (15-bit) destination IDs by having
> + * Xen decode the MSI address internally.
> + *
> + * @parm dmod a handle to an open devicemodel interface.
> + * @parm domid the domain id to be serviced.
> + * @parm machine_irq the physical IRQ number (pirq).
> + * @parm gvec the guest interrupt vector.
> + * @parm msi_addr the MSI address (0xfeexxxxx, includes extended dest ID).
> + * @parm msi_data the MSI data (vector, delivery mode, trigger mode).
> + * @parm gtable the MSI-X table base GFN, or 0 for plain MSI.
> + * @parm unmasked if non-zero, leave the IRQ unmasked after binding.
> + * @return 0 on success, -1 on failure.
> + */
> +int xendevicemodel_bind_pt_msi_irq(
> +    xendevicemodel_handle *dmod, domid_t domid, uint32_t machine_irq,
> +    uint8_t gvec, uint64_t msi_addr, uint32_t msi_data, uint64_t gtable,
> +    int unmasked);
> +
> +/**
> + * This function unbinds a passthrough physical IRQ previously bound
> + * with xendevicemodel_bind_pt_msi_irq.
> + *
> + * @parm dmod a handle to an open devicemodel interface.
> + * @parm domid the domain id to be serviced.
> + * @parm machine_irq the physical IRQ number (pirq).
> + * @parm gvec the guest interrupt vector.
> + * @parm msi_addr the MSI address.
> + * @parm msi_data the MSI data.
> + * @return 0 on success, -1 on failure.
> + */
> +int xendevicemodel_unbind_pt_msi_irq(
> +    xendevicemodel_handle *dmod, domid_t domid, uint32_t machine_irq,
> +    uint8_t gvec, uint64_t msi_addr, uint32_t msi_data);
> +
>  #endif /* XENDEVICEMODEL_H */
>  
>  /*
> diff --git a/tools/libs/devicemodel/core.c b/tools/libs/devicemodel/core.c
> index 8e619eeb0a..4a52fe4750 100644
> --- a/tools/libs/devicemodel/core.c
> +++ b/tools/libs/devicemodel/core.c
> @@ -645,6 +645,50 @@ int xendevicemodel_nr_vcpus(
>      return 0;
>  }
>  
> +int xendevicemodel_bind_pt_msi_irq(
> +    xendevicemodel_handle *dmod, domid_t domid, uint32_t machine_irq,
> +    uint8_t gvec, uint64_t msi_addr, uint32_t msi_data, uint64_t gtable,
> +    int unmasked)
> +{
> +    struct xen_dm_op op;
> +    struct xen_dm_op_bind_pt_msi_irq *data;
> +
> +    memset(&op, 0, sizeof(op));
> +
> +    op.op = XEN_DMOP_bind_pt_msi_irq;
> +    data = &op.u.bind_pt_msi_irq;
> +
> +    data->machine_irq = machine_irq;
> +    data->gvec = gvec;
> +    data->data = msi_data;
> +    data->addr = msi_addr;
> +    data->gtable = gtable;
> +    if ( unmasked )
> +        data->flags |= XEN_DMOP_MSI_FLAG_UNMASKED;
> +
> +    return xendevicemodel_op(dmod, domid, 1, &op, sizeof(op));
> +}
> +
> +int xendevicemodel_unbind_pt_msi_irq(
> +    xendevicemodel_handle *dmod, domid_t domid, uint32_t machine_irq,
> +    uint8_t gvec, uint64_t msi_addr, uint32_t msi_data)
> +{
> +    struct xen_dm_op op;
> +    struct xen_dm_op_bind_pt_msi_irq *data;
> +
> +    memset(&op, 0, sizeof(op));
> +
> +    op.op = XEN_DMOP_unbind_pt_msi_irq;
> +    data = &op.u.bind_pt_msi_irq;
> +
> +    data->machine_irq = machine_irq;
> +    data->gvec = gvec;
> +    data->data = msi_data;
> +    data->addr = msi_addr;
> +
> +    return xendevicemodel_op(dmod, domid, 1, &op, sizeof(op));
> +}
> +
>  int xendevicemodel_restrict(xendevicemodel_handle *dmod, domid_t domid)
>  {
>      return osdep_xendevicemodel_restrict(dmod, domid);
> diff --git a/xen/arch/x86/hvm/dm.c b/xen/arch/x86/hvm/dm.c
> index 3b53471af0..3d530d948f 100644
> --- a/xen/arch/x86/hvm/dm.c
> +++ b/xen/arch/x86/hvm/dm.c
> @@ -7,12 +7,15 @@
>  #include <xen/guest_access.h>
>  #include <xen/dm.h>
>  #include <xen/hypercall.h>
> +#include <xen/iocap.h>
> +#include <xen/iommu.h>
>  #include <xen/ioreq.h>
>  #include <xen/nospec.h>
>  #include <xen/sched.h>
>  
>  #include <asm/hap.h>
>  #include <asm/hvm/cacheattr.h>
> +#include <asm/msi.h>
>  #include <asm/shadow.h>
>  
>  #include <xsm/xsm.h>
> @@ -322,6 +325,25 @@ static int inject_event(struct domain *d,
>      return 0;
>  }
>  
> +static uint32_t msi_addr_to_gflags(uint64_t addr, uint32_t data, bool masked)
> +{
> +    uint32_t tmp = (uint32_t)addr;
> +
> +    return MASK_INSR(MASK_EXTR(tmp, MSI_ADDR_DEST_ID_MASK),
> +                     XEN_DOMCTL_VMSI_X86_DEST_ID_MASK) |
> +           MASK_INSR(MASK_EXTR(tmp, MSI_ADDR_EXT_DEST_ID_MASK),
> +                     XEN_DOMCTL_VMSI_X86_EXT_DEST_ID_MASK) |
> +           MASK_INSR(MASK_EXTR(tmp, MSI_ADDR_REDIRECTION_MASK),
> +                     XEN_DOMCTL_VMSI_X86_RH_MASK) |
> +           MASK_INSR(MASK_EXTR(tmp, MSI_ADDR_DESTMODE_MASK),
> +                     XEN_DOMCTL_VMSI_X86_DM_MASK) |
> +           MASK_INSR(MASK_EXTR(data, MSI_DATA_DELIVERY_MODE_MASK),
> +                     XEN_DOMCTL_VMSI_X86_DELIV_MASK) |
> +           MASK_INSR(MASK_EXTR(data, MSI_DATA_TRIGGER_MASK),
> +                     XEN_DOMCTL_VMSI_X86_TRIG_MASK) |
> +           (masked ? 0 : XEN_DOMCTL_VMSI_X86_UNMASKED);
> +}
> +
>  int dm_op(const struct dmop_args *op_args)
>  {
>      struct domain *d;
> @@ -350,6 +372,8 @@ int dm_op(const struct dmop_args *op_args)
>          [XEN_DMOP_relocate_memory]                  = sizeof(struct 
> xen_dm_op_relocate_memory),
>          [XEN_DMOP_pin_memory_cacheattr]             = sizeof(struct 
> xen_dm_op_pin_memory_cacheattr),
>          [XEN_DMOP_nr_vcpus]                         = sizeof(struct 
> xen_dm_op_nr_vcpus),
> +        [XEN_DMOP_bind_pt_msi_irq]                  = sizeof(struct 
> xen_dm_op_bind_pt_msi_irq),
> +        [XEN_DMOP_unbind_pt_msi_irq]                = sizeof(struct 
> xen_dm_op_bind_pt_msi_irq),
>      };
>  
>      rc = rcu_lock_remote_domain_by_id(op_args->domid, &d);
> @@ -607,6 +631,83 @@ int dm_op(const struct dmop_args *op_args)
>          break;
>      }
>  
> +    case XEN_DMOP_bind_pt_msi_irq:
> +    {
> +        const struct xen_dm_op_bind_pt_msi_irq *data =
> +            &op.u.bind_pt_msi_irq;
> +        struct xen_domctl_bind_pt_irq bind = {
> +            .machine_irq = data->machine_irq,
> +            .irq_type = PT_IRQ_TYPE_MSI,
> +        };
> +        int irq;
> +
> +        rc = -EINVAL;
> +        if ( data->pad0 || data->pad1 )
> +            break;
> +
> +        if ( data->flags & ~XEN_DMOP_MSI_FLAG_UNMASKED )
> +            break;
> +
> +        irq = domain_pirq_to_irq(d, bind.machine_irq);
> +
> +        rc = -EPERM;
> +        if ( irq <= 0 || !irq_access_permitted(current->domain, irq) )
> +            break;
> +
> +        bind.u.msi.gvec = data->gvec;
> +        bind.u.msi.gflags =
> +            msi_addr_to_gflags(data->addr, data->data,
> +                               !(data->flags & XEN_DMOP_MSI_FLAG_UNMASKED));
> +        bind.u.msi.gtable = data->gtable;

Retrofitting the new interface into the old one seems weird.  I would
do it the other way around - implement the old bind domctl on top of
an interface that's more suited for the new DM op.

That way you avoid having to expand gflags with extended destination
field.

> +
> +        rc = -ESRCH;
> +        if ( is_iommu_enabled(d) )
> +        {
> +            pcidevs_lock();
> +            rc = pt_irq_create_bind(d, &bind);
> +            pcidevs_unlock();
> +        }
> +        if ( rc < 0 )
> +            printk(XENLOG_G_ERR
> +                   "pt_irq_create_bind failed (%ld) for dom%d\n",
> +                   rc, d->domain_id);
> +        break;
> +    }
> +
> +    case XEN_DMOP_unbind_pt_msi_irq:
> +    {
> +        const struct xen_dm_op_bind_pt_msi_irq *data =
> +            &op.u.bind_pt_msi_irq;
> +        struct xen_domctl_bind_pt_irq bind = {
> +            .machine_irq = data->machine_irq,
> +            .irq_type = PT_IRQ_TYPE_MSI,
> +        };
> +        int irq;
> +
> +        rc = -EINVAL;
> +        if ( data->pad0 || data->pad1 )
> +            break;
> +
> +        irq = domain_pirq_to_irq(d, bind.machine_irq);
> +
> +        rc = -EPERM;
> +        if ( irq <= 0 || !irq_access_permitted(current->domain, irq) )
> +            break;
> +
> +        rc = -ESRCH;
> +        if ( is_iommu_enabled(d) )
> +        {
> +            pcidevs_lock();
> +            rc = pt_irq_destroy_bind(d, &bind);
> +            pcidevs_unlock();
> +        }
> +        if ( rc < 0 )
> +            printk(XENLOG_G_ERR
> +                   "pt_irq_destroy_bind failed (%ld) for dom%d\n",
> +                   rc, d->domain_id);
> +        break;
> +    }
> +
>      default:
>          rc = ioreq_server_dm_op(&op, d, &const_op);
>          break;
> @@ -643,6 +744,7 @@ CHECK_dm_op_remote_shutdown;
>  CHECK_dm_op_relocate_memory;
>  CHECK_dm_op_pin_memory_cacheattr;
>  CHECK_dm_op_nr_vcpus;
> +CHECK_dm_op_bind_pt_msi_irq;
>  
>  int compat_dm_op(
>      domid_t domid, unsigned int nr_bufs, XEN_GUEST_HANDLE_PARAM(void) bufs)
> diff --git a/xen/include/public/hvm/dm_op.h b/xen/include/public/hvm/dm_op.h
> index 2bf0fdc1ae..fd0f3a6a99 100644
> --- a/xen/include/public/hvm/dm_op.h
> +++ b/xen/include/public/hvm/dm_op.h
> @@ -444,6 +444,28 @@ struct xen_dm_op_nr_vcpus {
>  };
>  typedef struct xen_dm_op_nr_vcpus xen_dm_op_nr_vcpus_t;
>  
> +#define XEN_DMOP_bind_pt_msi_irq   21
> +#define XEN_DMOP_unbind_pt_msi_irq 22
> +
> +struct xen_dm_op_bind_pt_msi_irq {
> +    /* IN - physical IRQ (pirq) */
> +    uint32_t machine_irq;
> +    /* IN - guest vector */
> +    uint8_t  gvec;

If you pass the address and data MSI fields there's no need to also
pass the vector, this can be obtained by Xen from the MSI fields?

> +    uint8_t  pad0;
> +    uint16_t pad1;
> +    /* IN - MSI data (vector, delivery, trigger) */
> +    uint32_t data;
> +    /* IN - flags */
> +    uint32_t flags;
> +#define XEN_DMOP_MSI_FLAG_UNMASKED (1u << 0)
> +    /* IN - MSI address (0xfeexxxxx, includes ext dest) */
> +    uint64_aligned_t addr;
> +    /* IN - MSI-X table base GFN, 0 for MSI */
> +    uint64_aligned_t gtable;
> +};
> +typedef struct xen_dm_op_bind_pt_msi_irq xen_dm_op_bind_pt_msi_irq_t;
> +
>  struct xen_dm_op {
>      uint32_t op;
>      uint32_t pad;
> @@ -468,6 +490,7 @@ struct xen_dm_op {
>          xen_dm_op_relocate_memory_t relocate_memory;
>          xen_dm_op_pin_memory_cacheattr_t pin_memory_cacheattr;
>          xen_dm_op_nr_vcpus_t nr_vcpus;
> +        xen_dm_op_bind_pt_msi_irq_t bind_pt_msi_irq;
>      } u;
>  };
>  
> diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst
> index 9d08dcc4bb..9236394786 100644
> --- a/xen/include/xlat.lst
> +++ b/xen/include/xlat.lst
> @@ -107,6 +107,7 @@
>  ?    dm_op_map_mem_type_to_ioreq_server hvm/dm_op.h
>  ?    dm_op_modified_memory           hvm/dm_op.h
>  ?    dm_op_nr_vcpus                  hvm/dm_op.h
> +?    dm_op_bind_pt_msi_irq           hvm/dm_op.h
>  ?    dm_op_pin_memory_cacheattr      hvm/dm_op.h
>  ?    dm_op_relocate_memory           hvm/dm_op.h
>  ?    dm_op_remote_shutdown           hvm/dm_op.h
> -- 
> 2.51.0
> 
> 
> 
> --
> Julian Vetter | Vates Hypervisor & Kernel Developer
> 
> XCP-ng & Xen Orchestra - Vates solutions
> 
> web: https://vates.tech
> 



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.