# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1260258690 0
# Node ID 66ff18dd385841b534efd24fe3a3e33abdbd98ad
# Parent ab0d71f7f596048194c4389bd4fc8a25221ac380
VT-d: per-iommu domain-id
Currently, xen uses shared iommu domain-id across all the VT-d units
in the platform. The number of iommu domain-ids (NR_DID, e.g. 256)
supported by each VT-d unit is reported in Capability register. The
limitation of current implementation is it only can support at most
NR_DID domains with VT-d in the entire platform, even though the
platform can support N * NR_DID (where N is the number of VT-d
units). Imagine a platform with several SR_IOV NICs, and each NIC
supports 128 VFs. It possibly beyond the NR_DID.
This patch implements iommu domain-id management per iommu (VT-d
unit), hence solves above limitation. It removes the global domain-id
bitmap, instead use domain-id bitmap in struct iommu, and also involve
an array to map guest domain-id and iommu domain-id, which is used to
iommu domain-id when flush context cache or IOTLB. When a device is
assigned to a guest, choose an available iommu domain-id from the
device's iommu, and map guest domain id to the domain-id mapping
array. When a device is deassigned from a guest, clear the domain-id
bit in domain-id bitmap and clear the corresponding entry in domain-id
map array if there is no other devices under the same iommu owned by
the guest.
Signed-off-by: Weidong Han <weidong.han@xxxxxxxxx>
---
xen/drivers/passthrough/vtd/iommu.c | 213 +++++++++++++++++++++++-------------
xen/include/xen/hvm/iommu.h | 1
xen/include/xen/iommu.h | 2
3 files changed, 143 insertions(+), 73 deletions(-)
diff -r ab0d71f7f596 -r 66ff18dd3858 xen/drivers/passthrough/vtd/iommu.c
--- a/xen/drivers/passthrough/vtd/iommu.c Tue Dec 08 07:49:54 2009 +0000
+++ b/xen/drivers/passthrough/vtd/iommu.c Tue Dec 08 07:51:30 2009 +0000
@@ -38,46 +38,70 @@
#include "extern.h"
#include "vtd.h"
-#define domain_iommu_domid(d) ((d)->arch.hvm_domain.hvm_iommu.iommu_domid)
-
int nr_iommus;
-static spinlock_t domid_bitmap_lock; /* protect domain id bitmap */
-static int domid_bitmap_size; /* domain id bitmap size in bits */
-static unsigned long *domid_bitmap; /* iommu domain id bitmap */
static bool_t rwbf_quirk;
static void setup_dom0_devices(struct domain *d);
static void setup_dom0_rmrr(struct domain *d);
+static int domain_iommu_domid(struct domain *d,
+ struct iommu *iommu)
+{
+ unsigned long nr_dom, i;
+
+ nr_dom = cap_ndoms(iommu->cap);
+ i = find_first_bit(iommu->domid_bitmap, nr_dom);
+ while ( i < nr_dom )
+ {
+ if ( iommu->domid_map[i] == d->domain_id )
+ return i;
+
+ i = find_next_bit(iommu->domid_bitmap, nr_dom, i+1);
+ }
+
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "Cannot get valid iommu domid: domid=%d iommu->index=%d\n",
+ d->domain_id, iommu->index);
+ return -1;
+}
+
#define DID_FIELD_WIDTH 16
#define DID_HIGH_OFFSET 8
-static void context_set_domain_id(struct context_entry *context,
- struct domain *d)
-{
- domid_t iommu_domid = domain_iommu_domid(d);
-
- if ( iommu_domid == 0 )
- {
- spin_lock(&domid_bitmap_lock);
- iommu_domid = find_first_zero_bit(domid_bitmap, domid_bitmap_size);
- set_bit(iommu_domid, domid_bitmap);
- spin_unlock(&domid_bitmap_lock);
- d->arch.hvm_domain.hvm_iommu.iommu_domid = iommu_domid;
- }
-
- context->hi &= (1 << DID_HIGH_OFFSET) - 1;
- context->hi |= iommu_domid << DID_HIGH_OFFSET;
-}
-
-static void iommu_domid_release(struct domain *d)
-{
- domid_t iommu_domid = domain_iommu_domid(d);
-
- if ( iommu_domid != 0 )
- {
- d->arch.hvm_domain.hvm_iommu.iommu_domid = 0;
- clear_bit(iommu_domid, domid_bitmap);
- }
+static int context_set_domain_id(struct context_entry *context,
+ struct domain *d,
+ struct iommu *iommu)
+{
+ unsigned long nr_dom, i;
+ int found = 0;
+
+ ASSERT(spin_is_locked(&iommu->lock));
+
+ nr_dom = cap_ndoms(iommu->cap);
+ i = find_first_bit(iommu->domid_bitmap, nr_dom);
+ while ( i < nr_dom )
+ {
+ if ( iommu->domid_map[i] == d->domain_id )
+ {
+ found = 1;
+ break;
+ }
+ i = find_next_bit(iommu->domid_bitmap, nr_dom, i+1);
+ }
+
+ if ( found == 0 )
+ {
+ i = find_first_zero_bit(iommu->domid_bitmap, nr_dom);
+ if ( i >= nr_dom )
+ {
+ gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no free domain ids\n");
+ return -EFAULT;
+ }
+ iommu->domid_map[i] = d->domain_id;
+ }
+
+ set_bit(i, iommu->domid_bitmap);
+ context->hi |= (i & ((1 << DID_FIELD_WIDTH) - 1)) << DID_HIGH_OFFSET;
+ return 0;
}
static struct intel_iommu *alloc_intel_iommu(void)
@@ -526,6 +550,7 @@ static void dma_pte_clear_one(struct dom
struct dma_pte *page = NULL, *pte = NULL;
u64 pg_maddr;
int flush_dev_iotlb;
+ int iommu_domid;
spin_lock(&hd->mapping_lock);
/* get last level pte */
@@ -557,7 +582,10 @@ static void dma_pte_clear_one(struct dom
if ( test_bit(iommu->index, &hd->iommu_bitmap) )
{
flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
- if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
+ iommu_domid= domain_iommu_domid(domain, iommu);
+ if ( iommu_domid == -1 )
+ continue;
+ if ( iommu_flush_iotlb_psi(iommu, iommu_domid,
addr, 1, 0, flush_dev_iotlb) )
iommu_flush_write_buffer(iommu);
}
@@ -982,7 +1010,7 @@ static int iommu_alloc(struct acpi_drhd_
static int iommu_alloc(struct acpi_drhd_unit *drhd)
{
struct iommu *iommu;
- unsigned long sagaw;
+ unsigned long sagaw, nr_dom;
int agaw;
if ( nr_iommus > MAX_IOMMUS )
@@ -1033,6 +1061,25 @@ static int iommu_alloc(struct acpi_drhd_
if ( !ecap_coherent(iommu->ecap) )
iommus_incoherent = 1;
+ /* allocate domain id bitmap */
+ nr_dom = cap_ndoms(iommu->cap);
+ iommu->domid_bitmap = xmalloc_array(unsigned long, BITS_TO_LONGS(nr_dom));
+ if ( !iommu->domid_bitmap )
+ return -ENOMEM ;
+ memset(iommu->domid_bitmap, 0, nr_dom / 8);
+
+ /*
+ * if Caching mode is set, then invalid translations are tagged with
+ * domain id 0, Hence reserve bit 0 for it
+ */
+ if ( cap_caching_mode(iommu->cap) )
+ set_bit(0, iommu->domid_bitmap);
+
+ iommu->domid_map = xmalloc_array(u16, nr_dom);
+ if ( !iommu->domid_map )
+ return -ENOMEM ;
+ memset(iommu->domid_map, 0, nr_dom * sizeof(*iommu->domid_map));
+
spin_lock_init(&iommu->lock);
spin_lock_init(&iommu->register_lock);
@@ -1055,6 +1102,9 @@ static void iommu_free(struct acpi_drhd_
if ( iommu->reg )
iounmap(iommu->reg);
+
+ xfree(iommu->domid_bitmap);
+ xfree(iommu->domid_map);
free_intel_iommu(iommu->intel);
destroy_irq(iommu->irq);
@@ -1174,7 +1224,12 @@ static int domain_context_mapping_one(
spin_unlock(&hd->mapping_lock);
}
- context_set_domain_id(context, domain);
+ if ( context_set_domain_id(context, domain, iommu) )
+ {
+ spin_unlock(&iommu->lock);
+ return -EFAULT;
+ }
+
context_set_address_width(*context, agaw);
context_set_fault_enable(*context);
context_set_present(*context);
@@ -1292,6 +1347,10 @@ static int domain_context_unmap_one(
{
struct context_entry *context, *context_entries;
u64 maddr;
+ int iommu_domid;
+ struct pci_dev *pdev;
+ struct acpi_drhd_unit *drhd;
+ int found = 0;
ASSERT(spin_is_locked(&pcidevs_lock));
spin_lock(&iommu->lock);
@@ -1311,14 +1370,50 @@ static int domain_context_unmap_one(
context_clear_entry(*context);
iommu_flush_cache_entry(context, sizeof(struct context_entry));
- if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
+ iommu_domid= domain_iommu_domid(domain, iommu);
+ if ( iommu_domid == -1 )
+ {
+ spin_unlock(&iommu->lock);
+ unmap_vtd_domain_page(context_entries);
+ return -EINVAL;
+ }
+
+ if ( iommu_flush_context_device(iommu, iommu_domid,
(((u16)bus) << 8) | devfn,
DMA_CCMD_MASK_NOBIT, 0) )
iommu_flush_write_buffer(iommu);
else
{
int flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
- iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0,
flush_dev_iotlb);
+ iommu_flush_iotlb_dsi(iommu, iommu_domid, 0, flush_dev_iotlb);
+ }
+
+
+ /*
+ * if no other devices under the same iommu owned by this domain,
+ * clear iommu in iommu_bitmap and clear domain_id in domid_bitmp
+ */
+ for_each_pdev ( domain, pdev )
+ {
+ if ( pdev->bus == bus && pdev->devfn == devfn )
+ continue;
+
+ drhd = acpi_find_matched_drhd_unit(pdev);
+ if ( drhd && drhd->iommu == iommu )
+ {
+ found = 1;
+ break;
+ }
+ }
+
+ if ( found == 0 )
+ {
+ struct hvm_iommu *hd = domain_hvm_iommu(domain);
+
+ clear_bit(iommu->index, &hd->iommu_bitmap);
+
+ clear_bit(iommu_domid, iommu->domid_bitmap);
+ iommu->domid_map[iommu_domid] = 0;
}
spin_unlock(&iommu->lock);
@@ -1397,11 +1492,8 @@ static int reassign_device_ownership(
struct domain *target,
u8 bus, u8 devfn)
{
- struct hvm_iommu *source_hd = domain_hvm_iommu(source);
struct pci_dev *pdev;
- struct acpi_drhd_unit *drhd;
- struct iommu *pdev_iommu;
- int ret, found = 0;
+ int ret;
ASSERT(spin_is_locked(&pcidevs_lock));
pdev = pci_get_pdev_by_domain(source, bus, devfn);
@@ -1409,10 +1501,9 @@ static int reassign_device_ownership(
if (!pdev)
return -ENODEV;
- if ( (drhd = acpi_find_matched_drhd_unit(pdev)) == NULL )
- return -ENODEV;
- pdev_iommu = drhd->iommu;
- domain_context_unmap(source, bus, devfn);
+ ret = domain_context_unmap(source, bus, devfn);
+ if ( ret )
+ return ret;
ret = domain_context_mapping(target, bus, devfn);
if ( ret )
@@ -1420,19 +1511,6 @@ static int reassign_device_ownership(
list_move(&pdev->domain_list, &target->arch.pdev_list);
pdev->domain = target;
-
- for_each_pdev ( source, pdev )
- {
- drhd = acpi_find_matched_drhd_unit(pdev);
- if ( drhd && drhd->iommu == pdev_iommu )
- {
- found = 1;
- break;
- }
- }
-
- if ( !found )
- clear_bit(pdev_iommu->index, &source_hd->iommu_bitmap);
return ret;
}
@@ -1448,8 +1526,6 @@ void iommu_domain_teardown(struct domain
iommu_free_pagetable(hd->pgd_maddr, agaw_to_level(hd->agaw));
hd->pgd_maddr = 0;
spin_unlock(&hd->mapping_lock);
-
- iommu_domid_release(d);
}
static int intel_iommu_map_page(
@@ -1462,6 +1538,7 @@ static int intel_iommu_map_page(
u64 pg_maddr;
int pte_present;
int flush_dev_iotlb;
+ int iommu_domid;
/* do nothing if dom0 and iommu supports pass thru */
if ( iommu_passthrough && (d->domain_id == 0) )
@@ -1501,7 +1578,10 @@ static int intel_iommu_map_page(
continue;
flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
- if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
+ iommu_domid= domain_iommu_domid(d, iommu);
+ if ( iommu_domid == -1 )
+ continue;
+ if ( iommu_flush_iotlb_psi(iommu, iommu_domid,
(paddr_t)gfn << PAGE_SHIFT_4K, 1,
!pte_present, flush_dev_iotlb) )
iommu_flush_write_buffer(iommu);
@@ -1780,7 +1860,6 @@ int intel_vtd_setup(void)
platform_quirks();
- spin_lock_init(&domid_bitmap_lock);
clflush_size = get_cache_line_size();
irq_to_iommu = xmalloc_array(struct iommu*, nr_irqs);
@@ -1827,16 +1906,6 @@ int intel_vtd_setup(void)
P(iommu_qinval, "Queued Invalidation");
P(iommu_intremap, "Interrupt Remapping");
#undef P
-
- /* Allocate domain id bitmap, and set bit 0 as reserved. */
- drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
- domid_bitmap_size = cap_ndoms(drhd->iommu->cap);
- domid_bitmap = xmalloc_array(unsigned long,
- BITS_TO_LONGS(domid_bitmap_size));
- if ( domid_bitmap == NULL )
- goto error;
- memset(domid_bitmap, 0, domid_bitmap_size / 8);
- __set_bit(0, domid_bitmap);
scan_pci_devices();
diff -r ab0d71f7f596 -r 66ff18dd3858 xen/include/xen/hvm/iommu.h
--- a/xen/include/xen/hvm/iommu.h Tue Dec 08 07:49:54 2009 +0000
+++ b/xen/include/xen/hvm/iommu.h Tue Dec 08 07:51:30 2009 +0000
@@ -34,7 +34,6 @@ struct hvm_iommu {
spinlock_t mapping_lock; /* io page table lock */
int agaw; /* adjusted guest address width, 0 is level 2 30-bit */
struct list_head g2m_ioport_list; /* guest to machine ioport mapping */
- domid_t iommu_domid; /* domain id stored in iommu */
u64 iommu_bitmap; /* bitmap of iommu(s) that the domain uses
*/
/* amd iommu support */
diff -r ab0d71f7f596 -r 66ff18dd3858 xen/include/xen/iommu.h
--- a/xen/include/xen/iommu.h Tue Dec 08 07:49:54 2009 +0000
+++ b/xen/include/xen/iommu.h Tue Dec 08 07:51:30 2009 +0000
@@ -55,6 +55,8 @@ struct iommu {
u64 root_maddr; /* root entry machine address */
int irq;
struct intel_iommu *intel;
+ unsigned long *domid_bitmap; /* domain id bitmap */
+ u16 *domid_map; /* domain id mapping array */
};
int iommu_setup(void);
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|