[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v1 4/6] vpci: add SR-IOV support for PVH Dom0


  • To: "xen-devel@xxxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxxx>
  • From: Mykyta Poturai <Mykyta_Poturai@xxxxxxxx>
  • Date: Fri, 25 Jul 2025 14:24:33 +0000
  • Accept-language: en-US
  • Arc-authentication-results: i=1; mx.microsoft.com 1; spf=pass smtp.mailfrom=epam.com; dmarc=pass action=none header.from=epam.com; dkim=pass header.d=epam.com; arc=none
  • Arc-message-signature: i=1; a=rsa-sha256; c=relaxed/relaxed; d=microsoft.com; s=arcselector10001; h=From:Date:Subject:Message-ID:Content-Type:MIME-Version:X-MS-Exchange-AntiSpam-MessageData-ChunkCount:X-MS-Exchange-AntiSpam-MessageData-0:X-MS-Exchange-AntiSpam-MessageData-1; bh=OotdfLHKZjhyPJlV+BjTo76JI2XRgyIEJ4WlkkxrWJE=; b=ijJNhM8bng1cqWgeVwEphl0sJ/YUNsj53Z5+vvwTtACIe5XZjx+t9kEc8qCeN9ZydabUymzoA2RzB4v+165+ty5vel4egtx+M3cPnRY+jZW1LsDWkxp2GJD+5vG2TY/Gh8HoPXigIGOn1sV23LuiPv/OuR/hUD12of8JsaOHsZbR4GxuuFNows+TtOtJXv44MHwjJvFxARP8D79n95eczrITU1qUhxgmj+BxR8V712YBI9FUzBoRPVk/5Jwb5aIV2B/1/4P/pW3rCcWf17o44NA08KW5e6iIem24bL4kZokHCP5S+4r4rfpRVaPCecMXxDRosdv9CnFJm0JJ65+tQw==
  • Arc-seal: i=1; a=rsa-sha256; s=arcselector10001; d=microsoft.com; cv=none; b=LKmHS4+cw9mEeRWGjkEH0hm+O9V7DS/w8j40DY7e2L5F2mq1a5YgqsGHATIu1HhJxlRTIHah/AGHkiMDkuSM/in5VOEoal7In/7t0YThFcr69Kf14Ay9cG0zeb1pd8Oer21AVCK329z7asnUtcvUlLVIlHHAbybV6s6Or66GIdLWhOIbeBEA+vZLqftYGR4JAMDS94eHl0UyEmihcl4JH4gr3MP+XGDqWyuBM3zgU2FbWW0dR+Bwsw9+QEhzYPAEMBxAjOznRsS9Vvy/NW19AmnK7hn4OeYnduy7G/eQlVq9S5qXv87Fh3tDj7eAZQ4oMCbCn7LWrnKSMUO8RUHasw==
  • Authentication-results: dkim=none (message not signed) header.d=none;dmarc=none action=none header.from=epam.com;
  • Cc: Stewart Hildebrand <stewart.hildebrand@xxxxxxx>, Oleksii Kurochko <oleksii.kurochko@xxxxxxxxx>, Community Manager <community.manager@xxxxxxxxxxxxxx>, Andrew Cooper <andrew.cooper3@xxxxxxxxxx>, Anthony PERARD <anthony.perard@xxxxxxxxxx>, Michal Orzel <michal.orzel@xxxxxxx>, Jan Beulich <jbeulich@xxxxxxxx>, Julien Grall <julien@xxxxxxx>, Roger Pau Monné <roger.pau@xxxxxxxxxx>, Stefano Stabellini <sstabellini@xxxxxxxxxx>, Mykyta Poturai <Mykyta_Poturai@xxxxxxxx>
  • Delivery-date: Fri, 25 Jul 2025 14:24:45 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xenproject.org>
  • Thread-index: AQHb/W/XkRTIlDBdSUyf2rny+l8q3w==
  • Thread-topic: [PATCH v1 4/6] vpci: add SR-IOV support for PVH Dom0

From: Stewart Hildebrand <stewart.hildebrand@xxxxxxx>

This code is expected to only be used by privileged domains,
unprivileged domains should not get access to the SR-IOV capability.

Implement RW handlers for PCI_SRIOV_CTRL register to dynamically
map/unmap VF BARS. Recalculate BAR sizes before mapping VFs to account
for possible changes in the system page size register.

Relies on dom0 to enable SR-IOV and PHYSDEVOP to inform Xen about
addition/removal of VFs.

Signed-off-by: Stewart Hildebrand <stewart.hildebrand@xxxxxxx>
Signed-off-by: Mykyta Poturai <mykyta_poturai@xxxxxxxx>
---
 CHANGELOG.md              |   3 +-
 SUPPORT.md                |   2 -
 xen/drivers/vpci/Makefile |   2 +-
 xen/drivers/vpci/header.c |   3 +
 xen/drivers/vpci/sriov.c  | 235 ++++++++++++++++++++++++++++++++++++++
 xen/drivers/vpci/vpci.c   |   1 +
 xen/include/xen/vpci.h    |   7 +-
 7 files changed, 247 insertions(+), 6 deletions(-)
 create mode 100644 xen/drivers/vpci/sriov.c

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5f31ca08fe..7b0e8beb76 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -23,8 +23,7 @@ The format is based on [Keep a 
Changelog](https://keepachangelog.com/en/1.0.0/)
  - On x86:
    - Option to attempt to fixup p2m page-faults on PVH dom0.
    - Resizable BARs is supported for PVH dom0.
-   - Support PCI passthrough for HVM domUs when dom0 is PVH (note SR-IOV
-     capability usage is not yet supported on PVH dom0).
+   - Support PCI passthrough for HVM domUs when dom0 is PVH.
    - Smoke tests for the FreeBSD Xen builds in Cirrus CI.
 
  - On Arm:
diff --git a/SUPPORT.md b/SUPPORT.md
index 6a82a92189..830b598714 100644
--- a/SUPPORT.md
+++ b/SUPPORT.md
@@ -170,8 +170,6 @@ unexpected behavior or issues on some hardware.
 
 At least the following features are missing on a PVH dom0:
 
-  * PCI SR-IOV.
-
   * Native NMI forwarding (nmi=dom0 command line option).
 
   * MCE handling.
diff --git a/xen/drivers/vpci/Makefile b/xen/drivers/vpci/Makefile
index a7c8a30a89..fe1e57b64d 100644
--- a/xen/drivers/vpci/Makefile
+++ b/xen/drivers/vpci/Makefile
@@ -1,2 +1,2 @@
-obj-y += vpci.o header.o rebar.o
+obj-y += vpci.o header.o rebar.o sriov.o
 obj-$(CONFIG_HAS_PCI_MSI) += msi.o msix.o
diff --git a/xen/drivers/vpci/header.c b/xen/drivers/vpci/header.c
index f947f652cd..0a840c6dcc 100644
--- a/xen/drivers/vpci/header.c
+++ b/xen/drivers/vpci/header.c
@@ -839,6 +839,9 @@ static int cf_check init_header(struct pci_dev *pdev)
 
     ASSERT(rw_is_write_locked(&pdev->domain->pci_lock));
 
+    if ( pdev->info.is_virtfn )
+        return 0;
+
     switch ( pci_conf_read8(pdev->sbdf, PCI_HEADER_TYPE) & 0x7f )
     {
     case PCI_HEADER_TYPE_NORMAL:
diff --git a/xen/drivers/vpci/sriov.c b/xen/drivers/vpci/sriov.c
new file mode 100644
index 0000000000..640430e3e9
--- /dev/null
+++ b/xen/drivers/vpci/sriov.c
@@ -0,0 +1,235 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Handlers for accesses to the SR-IOV capability structure.
+ *
+ * Copyright (C) 2018 Citrix Systems R&D
+ */
+
+#include <xen/sched.h>
+#include <xen/vpci.h>
+
+static int vf_init_bars(const struct pci_dev *vf_pdev)
+{
+    unsigned int i, sriov_pos;
+    int vf_idx, rc;
+    const struct pci_dev *pf_pdev = vf_pdev->pf_pdev;
+    uint16_t offset, stride;
+    struct vpci_bar *bars = vf_pdev->vpci->header.bars;
+    struct vpci_bar *physfn_vf_bars = pf_pdev->vpci->sriov->vf_bars;
+
+    sriov_pos = pci_find_ext_capability(pf_pdev->sbdf, PCI_EXT_CAP_ID_SRIOV);
+    offset = pci_conf_read16(pf_pdev->sbdf, sriov_pos + PCI_SRIOV_VF_OFFSET);
+    stride = pci_conf_read16(pf_pdev->sbdf, sriov_pos + PCI_SRIOV_VF_STRIDE);
+
+    vf_idx = vf_pdev->sbdf.sbdf;
+    vf_idx -= pf_pdev->sbdf.sbdf + offset;
+    if ( vf_idx < 0 )
+        return -EINVAL;
+    if ( stride )
+    {
+        if ( vf_idx % stride )
+            return -EINVAL;
+        vf_idx /= stride;
+    }
+
+    /*
+     * Set up BARs for this VF out of PF's VF BARs taking into account
+     * the index of the VF.
+     */
+    for ( i = 0; i < PCI_SRIOV_NUM_BARS; i++ )
+    {
+        bars[i].addr = physfn_vf_bars[i].addr + vf_idx * 
physfn_vf_bars[i].size;
+        bars[i].guest_addr = bars[i].addr;
+        bars[i].size = physfn_vf_bars[i].size;
+        bars[i].type = physfn_vf_bars[i].type;
+        bars[i].prefetchable = physfn_vf_bars[i].prefetchable;
+        rc = vpci_bar_add_rangeset(vf_pdev, &bars[i], i);
+        if ( rc )
+            return rc;
+    }
+
+    return 0;
+}
+
+static int map_vf(const struct pci_dev *vf_pdev, uint16_t cmd)
+{
+    int rc;
+
+    ASSERT(rw_is_write_locked(&vf_pdev->domain->pci_lock));
+
+    rc = vf_init_bars(vf_pdev);
+    if ( rc )
+        return rc;
+
+    return vpci_modify_bars(vf_pdev, cmd, false);
+}
+
+static int size_vf_bars(struct pci_dev *pf_pdev, unsigned int sriov_pos)
+{
+    /*
+     * NB: a non-const pci_dev of the PF is needed in order to update
+     * vf_rlen.
+     */
+    struct vpci_bar *bars;
+    unsigned int i;
+    int rc = 0;
+
+    ASSERT(rw_is_write_locked(&pf_pdev->domain->pci_lock));
+    ASSERT(!pf_pdev->info.is_virtfn);
+
+    if ( !pf_pdev->vpci->sriov )
+        return -EINVAL;
+
+    /* Read BARs for VFs out of PF's SR-IOV extended capability. */
+    bars = pf_pdev->vpci->sriov->vf_bars;
+    /* Set the BARs addresses and size. */
+    for ( i = 0; i < PCI_SRIOV_NUM_BARS; i += rc )
+    {
+        unsigned int idx = sriov_pos + PCI_SRIOV_BAR + i * 4;
+        uint32_t bar;
+        uint64_t addr, size;
+
+        bar = pci_conf_read32(pf_pdev->sbdf, idx);
+
+        rc = pci_size_mem_bar(pf_pdev->sbdf, idx, &addr, &size,
+                              PCI_BAR_VF |
+                              ((i == PCI_SRIOV_NUM_BARS - 1) ? PCI_BAR_LAST
+                                                             : 0));
+
+        /*
+         * Update vf_rlen on the PF. According to the spec the size of
+         * the BARs can change if the system page size register is
+         * modified, so always update rlen when enabling VFs.
+         */
+        pf_pdev->physfn.vf_rlen[i] = size;
+
+        if ( !size )
+        {
+            bars[i].type = VPCI_BAR_EMPTY;
+            continue;
+        }
+
+        bars[i].addr = addr;
+        bars[i].guest_addr = addr;
+        bars[i].size = size;
+        bars[i].prefetchable = bar & PCI_BASE_ADDRESS_MEM_PREFETCH;
+
+        switch ( rc )
+        {
+        case 1:
+            bars[i].type = VPCI_BAR_MEM32;
+            break;
+
+        case 2:
+            bars[i].type = VPCI_BAR_MEM64_LO;
+            bars[i + 1].type = VPCI_BAR_MEM64_HI;
+            break;
+
+        default:
+            ASSERT_UNREACHABLE();
+        }
+    }
+
+    rc = rc > 0 ? 0 : rc;
+
+    return rc;
+}
+
+static void cf_check control_write(const struct pci_dev *pdev, unsigned int 
reg,
+                                   uint32_t val, void *data)
+{
+    unsigned int sriov_pos = reg - PCI_SRIOV_CTRL;
+    uint16_t control = pci_conf_read16(pdev->sbdf, reg);
+    bool mem_enabled = control & PCI_SRIOV_CTRL_MSE;
+    bool new_mem_enabled = val & PCI_SRIOV_CTRL_MSE;
+
+    ASSERT(!pdev->info.is_virtfn);
+
+    if ( new_mem_enabled != mem_enabled )
+    {
+        struct pci_dev *vf_pdev;
+        if ( new_mem_enabled )
+        {
+            /* FIXME casting away const-ness to modify vf_rlen */
+            size_vf_bars((struct pci_dev *)pdev, sriov_pos);
+
+            list_for_each_entry(vf_pdev, &pdev->vf_list, vf_list)
+                map_vf(vf_pdev, PCI_COMMAND_MEMORY);
+        }
+        else
+        {
+            list_for_each_entry(vf_pdev, &pdev->vf_list, vf_list)
+                map_vf(vf_pdev, 0);
+        }
+    }
+
+    pci_conf_write16(pdev->sbdf, reg, val);
+}
+
+static int vf_init_header(struct pci_dev *vf_pdev)
+{
+    const struct pci_dev *pf_pdev;
+    unsigned int sriov_pos;
+    int rc = 0;
+    uint16_t ctrl;
+
+    ASSERT(rw_is_write_locked(&vf_pdev->domain->pci_lock));
+
+    if ( !vf_pdev->info.is_virtfn )
+        return 0;
+
+    pf_pdev = vf_pdev->pf_pdev;
+    ASSERT(pf_pdev);
+
+    sriov_pos = pci_find_ext_capability(pf_pdev->sbdf, PCI_EXT_CAP_ID_SRIOV);
+    ctrl = pci_conf_read16(pf_pdev->sbdf, sriov_pos + PCI_SRIOV_CTRL);
+
+    if ( (pf_pdev->domain == vf_pdev->domain) && (ctrl & PCI_SRIOV_CTRL_MSE) )
+    {
+        rc = map_vf(vf_pdev, PCI_COMMAND_MEMORY);
+        if ( rc )
+            return rc;
+    }
+
+    return rc;
+}
+
+static int init_sriov(struct pci_dev *pdev)
+{
+    unsigned int pos;
+
+    if ( pdev->info.is_virtfn )
+        return vf_init_header(pdev);
+
+    pos = pci_find_ext_capability(pdev->sbdf, PCI_EXT_CAP_ID_SRIOV);
+
+    if ( !pos )
+        return 0;
+
+    if ( !is_hardware_domain(pdev->domain) )
+    {
+        printk(XENLOG_ERR
+               "%pp: SR-IOV configuration unsupported for unpriv %pd\n",
+               &pdev->sbdf, pdev->domain);
+        return 0;
+    }
+
+    pdev->vpci->sriov = xzalloc(struct vpci_sriov);
+    if ( !pdev->vpci->sriov )
+        return -ENOMEM;
+
+    return vpci_add_register(pdev->vpci, vpci_hw_read16, control_write,
+                             pos + PCI_SRIOV_CTRL, 2, NULL);
+}
+
+REGISTER_VPCI_INIT(init_sriov, VPCI_PRIORITY_LOW);
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/drivers/vpci/vpci.c b/xen/drivers/vpci/vpci.c
index 09988f04c2..7af6651831 100644
--- a/xen/drivers/vpci/vpci.c
+++ b/xen/drivers/vpci/vpci.c
@@ -120,6 +120,7 @@ void vpci_deassign_device(struct pci_dev *pdev)
     for ( i = 0; i < ARRAY_SIZE(pdev->vpci->header.bars); i++ )
         rangeset_destroy(pdev->vpci->header.bars[i].mem);
 
+    xfree(pdev->vpci->sriov);
     xfree(pdev->vpci->msix);
     xfree(pdev->vpci->msi);
     xfree(pdev->vpci);
diff --git a/xen/include/xen/vpci.h b/xen/include/xen/vpci.h
index 06f7039f20..9e8dcab17e 100644
--- a/xen/include/xen/vpci.h
+++ b/xen/include/xen/vpci.h
@@ -138,7 +138,6 @@ struct vpci {
          * upon to know whether BARs are mapped into the guest p2m.
          */
         bool bars_mapped      : 1;
-        /* FIXME: currently there's no support for SR-IOV. */
     } header;
 
     /* MSI data. */
@@ -192,6 +191,12 @@ struct vpci {
             struct vpci_arch_msix_entry arch;
         } entries[];
     } *msix;
+
+    struct vpci_sriov {
+        /* PF only */
+        struct vpci_bar vf_bars[PCI_SRIOV_NUM_BARS];
+    } *sriov;
+
 #ifdef CONFIG_HAS_VPCI_GUEST_SUPPORT
     /* Guest SBDF of the device. */
 #define INVALID_GUEST_SBDF ((pci_sbdf_t){ .sbdf = ~0U })
-- 
2.34.1



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.