# HG changeset patch
# User Wei Wang <wei.wang2@xxxxxxx>
# Date 1297241832 0
# Node ID 3c5990fabb79cbc73fcc4d589ef5b54618aaf739
# Parent 9f96906ec72452390180c30ea96f3d3006943040
amd iommu: dynamic page table depth adjustment.
IO Page table growth is triggered by amd_iommu_map_page and grows to
upper level. I have tested it well for different devices (nic and gfx)
and different guests (linux and Win7) with different guest memory
sizes (512M, 1G, 4G and above).
Signed-off-by: Wei Wang <wei.wang2@xxxxxxx>
---
xen/drivers/passthrough/amd/iommu_map.c | 107 +++++++++++++++++++++++++++
xen/drivers/passthrough/amd/pci_amd_iommu.c | 8 +-
xen/include/asm-x86/hvm/svm/amd-iommu-defs.h | 2
3 files changed, 111 insertions(+), 6 deletions(-)
diff -r 9f96906ec724 -r 3c5990fabb79 xen/drivers/passthrough/amd/iommu_map.c
--- a/xen/drivers/passthrough/amd/iommu_map.c Wed Feb 09 08:54:37 2011 +0000
+++ b/xen/drivers/passthrough/amd/iommu_map.c Wed Feb 09 08:57:12 2011 +0000
@@ -472,6 +472,89 @@ static u64 iommu_l2e_from_pfn(struct pag
return next_table_maddr;
}
+static int update_paging_mode(struct domain *d, unsigned long gfn)
+{
+ u16 bdf;
+ void *device_entry;
+ unsigned int req_id, level, offset;
+ unsigned long flags;
+ struct pci_dev *pdev;
+ struct amd_iommu *iommu = NULL;
+ struct page_info *new_root = NULL;
+ struct page_info *old_root = NULL;
+ void *new_root_vaddr;
+ u64 old_root_maddr;
+ struct hvm_iommu *hd = domain_hvm_iommu(d);
+
+ level = hd->paging_mode;
+ old_root = hd->root_table;
+ offset = gfn >> (PTE_PER_TABLE_SHIFT * (level - 1));
+
+ ASSERT(spin_is_locked(&hd->mapping_lock) && is_hvm_domain(d));
+
+ while ( offset >= PTE_PER_TABLE_SIZE )
+ {
+ /* Allocate and install a new root table.
+ * Only upper I/O page table grows, no need to fix next level bits */
+ new_root = alloc_amd_iommu_pgtable();
+ if ( new_root == NULL )
+ {
+ AMD_IOMMU_DEBUG("%s Cannot allocate I/O page table\n",
+ __func__);
+ return -ENOMEM;
+ }
+
+ new_root_vaddr = __map_domain_page(new_root);
+ old_root_maddr = page_to_maddr(old_root);
+ amd_iommu_set_page_directory_entry((u32 *)new_root_vaddr,
+ old_root_maddr, level);
+ level++;
+ old_root = new_root;
+ offset >>= PTE_PER_TABLE_SHIFT;
+ }
+
+ if ( new_root != NULL )
+ {
+ hd->paging_mode = level;
+ hd->root_table = new_root;
+
+ if ( !spin_is_locked(&pcidevs_lock) )
+ AMD_IOMMU_DEBUG("%s Try to access pdev_list "
+ "without aquiring pcidevs_lock.\n", __func__);
+
+ /* Update device table entries using new root table and paging mode */
+ for_each_pdev( d, pdev )
+ {
+ bdf = (pdev->bus << 8) | pdev->devfn;
+ req_id = get_dma_requestor_id(bdf);
+ iommu = find_iommu_for_device(bdf);
+ if ( !iommu )
+ {
+ AMD_IOMMU_DEBUG("%s Fail to find iommu.\n", __func__);
+ return -ENODEV;
+ }
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ device_entry = iommu->dev_table.buffer +
+ (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
+
+ /* valid = 0 only works for dom0 passthrough mode */
+ amd_iommu_set_root_page_table((u32 *)device_entry,
+ page_to_maddr(hd->root_table),
+ hd->domain_id,
+ hd->paging_mode, 1);
+
+ invalidate_dev_table_entry(iommu, req_id);
+ flush_command_buffer(iommu);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ }
+
+ /* For safety, invalidate all entries */
+ invalidate_all_iommu_pages(d);
+ }
+ return 0;
+}
+
int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn,
unsigned int flags)
{
@@ -481,6 +564,18 @@ int amd_iommu_map_page(struct domain *d,
BUG_ON( !hd->root_table );
spin_lock(&hd->mapping_lock);
+
+ /* Since HVM domain is initialized with 2 level IO page table,
+ * we might need a deeper page table for lager gfn now */
+ if ( is_hvm_domain(d) )
+ {
+ if ( update_paging_mode(d, gfn) )
+ {
+ AMD_IOMMU_DEBUG("Update page mode failed gfn = %lx\n", gfn);
+ domain_crash(d);
+ return -EFAULT;
+ }
+ }
iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn);
if ( iommu_l2e == 0 )
@@ -509,6 +604,18 @@ int amd_iommu_unmap_page(struct domain *
BUG_ON( !hd->root_table );
spin_lock(&hd->mapping_lock);
+
+ /* Since HVM domain is initialized with 2 level IO page table,
+ * we might need a deeper page table for lager gfn now */
+ if ( is_hvm_domain(d) )
+ {
+ if ( update_paging_mode(d, gfn) )
+ {
+ AMD_IOMMU_DEBUG("Update page mode failed gfn = %lx\n", gfn);
+ domain_crash(d);
+ return -EFAULT;
+ }
+ }
iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn);
diff -r 9f96906ec724 -r 3c5990fabb79 xen/drivers/passthrough/amd/pci_amd_iommu.c
--- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Wed Feb 09 08:54:37
2011 +0000
+++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Wed Feb 09 08:57:12
2011 +0000
@@ -214,8 +214,11 @@ static int amd_iommu_domain_init(struct
return -ENOMEM;
}
+ /* For pv and dom0, stick with get_paging_mode(max_page)
+ * For HVM dom0, use 2 level page table at first */
hd->paging_mode = is_hvm_domain(d) ?
- IOMMU_PAGE_TABLE_LEVEL_4 : get_paging_mode(max_page);
+ IOMMU_PAGING_MODE_LEVEL_2 :
+ get_paging_mode(max_page);
hd->domain_id = d->domain_id;
@@ -297,9 +300,6 @@ static int reassign_device( struct domai
list_move(&pdev->domain_list, &target->arch.pdev_list);
pdev->domain = target;
-
- if ( target->max_pages > 0 )
- t->paging_mode = get_paging_mode(target->max_pages);
/* IO page tables might be destroyed after pci-detach the last device
* In this case, we have to re-allocate root table for next pci-attach.*/
diff -r 9f96906ec724 -r 3c5990fabb79
xen/include/asm-x86/hvm/svm/amd-iommu-defs.h
--- a/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Wed Feb 09 08:54:37
2011 +0000
+++ b/xen/include/asm-x86/hvm/svm/amd-iommu-defs.h Wed Feb 09 08:57:12
2011 +0000
@@ -386,8 +386,6 @@
#define IOMMU_PAGES (MMIO_PAGES_PER_IOMMU * MAX_AMD_IOMMUS)
#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
#define MAX_AMD_IOMMUS 32
-#define IOMMU_PAGE_TABLE_LEVEL_3 3
-#define IOMMU_PAGE_TABLE_LEVEL_4 4
/* interrupt remapping table */
#define INT_REMAP_INDEX_DM_MASK 0x1C00
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|