diff -r 1f6773300a7b xen/drivers/passthrough/amd/iommu_map.c --- a/xen/drivers/passthrough/amd/iommu_map.c Thu Dec 02 16:06:30 2010 +0100 +++ b/xen/drivers/passthrough/amd/iommu_map.c Fri Dec 03 15:48:50 2010 +0100 @@ -71,11 +71,36 @@ int send_iommu_command(struct amd_iommu return 0; } -static void invalidate_iommu_page(struct amd_iommu *iommu, - u64 io_addr, u16 domain_id) +static bool_t check_order(int order) +{ + return ( order == 0 || order == 9 || order == 18 ); +} + +void invalidate_iommu_pages(struct amd_iommu *iommu, u64 io_addr, + u16 domain_id, int order) { u64 addr_lo, addr_hi; u32 cmd[4], entry; + u64 mask = 0; + int sflag = 0, pde = 0; + + BUG_ON( !check_order(order) ); + + /* If sflag == 1, the size of the invalidate command is determined + by the first zero bit in the address starting from Address[12] */ + if ( order == 9 || order == 18 ) + { + mask = ((1ULL << (order - 1)) - 1) << PAGE_SHIFT; + io_addr |= mask; + sflag = 1; + } + + /* All pages associated with the domainID are invalidated */ + else if ( io_addr == 0x7FFFFFFFFFFFF000ULL ) + { + sflag = 1; + pde = 1; + } addr_lo = io_addr & DMA_32BIT_MASK; addr_hi = io_addr >> 32; @@ -88,10 +113,10 @@ static void invalidate_iommu_page(struct &entry); cmd[1] = entry; - set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, 0, + set_field_in_reg_u32(sflag, 0, IOMMU_INV_IOMMU_PAGES_S_FLAG_MASK, IOMMU_INV_IOMMU_PAGES_S_FLAG_SHIFT, &entry); - set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry, + set_field_in_reg_u32(pde, entry, IOMMU_INV_IOMMU_PAGES_PDE_FLAG_MASK, IOMMU_INV_IOMMU_PAGES_PDE_FLAG_SHIFT, &entry); set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, entry, @@ -149,37 +174,40 @@ void flush_command_buffer(struct amd_iom AMD_IOMMU_DEBUG("Warning: ComWaitInt bit did not assert!\n"); } -static void clear_iommu_l1e_present(u64 l2e, unsigned long gfn) -{ - u32 *l1e; +static void clear_iommu_pte_present(u64 pde, unsigned long gfn, int order) +{ + u32 *pte; int offset; - void *l1_table; - - l1_table = map_domain_page(l2e >> PAGE_SHIFT); - - offset = gfn & (~PTE_PER_TABLE_MASK); - l1e = (u32*)(l1_table + (offset * IOMMU_PAGE_TABLE_ENTRY_SIZE)); + void *pd_table; + + BUG_ON( !check_order(order) ); + + pd_table = map_domain_page(pde >> PAGE_SHIFT); + + offset = (gfn >> order) & (~PTE_PER_TABLE_MASK); + pte = (u32*)(pd_table + (offset * IOMMU_PAGE_TABLE_ENTRY_SIZE)); /* clear l1 entry */ - l1e[0] = l1e[1] = 0; - - unmap_domain_page(l1_table); -} - -static void set_iommu_l1e_present(u64 l2e, unsigned long gfn, - u64 maddr, int iw, int ir) + pte[0] = pte[1] = 0; + + unmap_domain_page(pd_table); +} + +static void set_iommu_pte_present(u64 pde, unsigned long gfn, + u64 maddr, int iw, int ir, int order) { u64 addr_lo, addr_hi; u32 entry; - void *l1_table; + void *pd_table; int offset; - u32 *l1e; - - l1_table = map_domain_page(l2e >> PAGE_SHIFT); - - offset = gfn & (~PTE_PER_TABLE_MASK); - l1e = (u32*)((u8*)l1_table + (offset * IOMMU_PAGE_TABLE_ENTRY_SIZE)); - + u32 *pte; + + BUG_ON( !check_order(order) ); + + pd_table = map_domain_page(pde >> PAGE_SHIFT); + + offset = (gfn >> order) & (~PTE_PER_TABLE_MASK); + pte = (u32*)((u8*)pd_table + (offset * IOMMU_PAGE_TABLE_ENTRY_SIZE)); addr_lo = maddr & DMA_32BIT_MASK; addr_hi = maddr >> 32; @@ -194,7 +222,7 @@ static void set_iommu_l1e_present(u64 l2 IOMMU_CONTROL_DISABLED, entry, IOMMU_PTE_IO_READ_PERMISSION_MASK, IOMMU_PTE_IO_READ_PERMISSION_SHIFT, &entry); - l1e[1] = entry; + pte[1] = entry; set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0, IOMMU_PTE_ADDR_LOW_MASK, @@ -205,13 +233,12 @@ static void set_iommu_l1e_present(u64 l2 set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry, IOMMU_PTE_PRESENT_MASK, IOMMU_PTE_PRESENT_SHIFT, &entry); - l1e[0] = entry; - - unmap_domain_page(l1_table); -} - -static void amd_iommu_set_page_directory_entry(u32 *pde, - u64 next_ptr, u8 next_level) + pte[0] = entry; + + unmap_domain_page(pd_table); +} + +static void set_iommu_pde_present(u32 *pde, u64 next_ptr, u8 next_level) { u64 addr_lo, addr_hi; u32 entry; @@ -360,29 +387,11 @@ void amd_iommu_add_dev_table_entry( dte[3] = entry; } -u64 amd_iommu_get_next_table_from_pte(u32 *entry) -{ - u64 addr_lo, addr_hi, ptr; - - addr_lo = get_field_from_reg_u32( - entry[0], - IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_MASK, - IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_SHIFT); - - addr_hi = get_field_from_reg_u32( - entry[1], - IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_MASK, - IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_SHIFT); - - ptr = (addr_hi << 32) | (addr_lo << PAGE_SHIFT); - return ptr; -} - -static int amd_iommu_is_pte_present(u32 *entry) -{ - return (get_field_from_reg_u32(entry[0], - IOMMU_PDE_PRESENT_MASK, - IOMMU_PDE_PRESENT_SHIFT)); +static int is_pde_present(u32 *entry) +{ + return get_field_from_reg_u32(entry[0], + IOMMU_PDE_PRESENT_MASK, + IOMMU_PDE_PRESENT_SHIFT); } void invalidate_dev_table_entry(struct amd_iommu *iommu, @@ -404,17 +413,27 @@ void invalidate_dev_table_entry(struct a send_iommu_command(iommu, cmd); } -static u64 iommu_l2e_from_pfn(struct page_info *table, int level, - unsigned long io_pfn) +static u64 iommu_pt_from_pfn(struct page_info *table, int level, + unsigned long io_pfn, + int order, struct domain *d) { unsigned long offset; void *pde = NULL; void *table_vaddr; u64 next_table_maddr = 0; - - BUG_ON( table == NULL || level == 0 ); - - while ( level > 1 ) + unsigned long flags; + struct hvm_iommu *hd = domain_hvm_iommu(d); + struct amd_iommu *iommu; + u64 spfn, mask; + unsigned int lowest = order / PTE_PER_TABLE_SHIFT + 1; + + BUG_ON( table == NULL || level == 0 || + level < lowest || !check_order(order) ); + + if ( level == lowest ) + return page_to_maddr(table); + + while ( level > lowest ) { offset = io_pfn >> ((PTE_PER_TABLE_SHIFT * (level - IOMMU_PAGING_MODE_LEVEL_1))); @@ -422,9 +441,9 @@ static u64 iommu_l2e_from_pfn(struct pag table_vaddr = __map_domain_page(table); pde = table_vaddr + (offset * IOMMU_PAGE_TABLE_ENTRY_SIZE); - next_table_maddr = amd_iommu_get_next_table_from_pte(pde); - - if ( !amd_iommu_is_pte_present(pde) ) + next_table_maddr = get_next_table_from_pte(pde); + + if ( !is_pde_present(pde) ) { if ( next_table_maddr == 0 ) { @@ -435,13 +454,41 @@ static u64 iommu_l2e_from_pfn(struct pag return 0; } next_table_maddr = page_to_maddr(table); - amd_iommu_set_page_directory_entry( - (u32 *)pde, next_table_maddr, level - 1); + set_iommu_pde_present((u32 *)pde, + next_table_maddr, level - 1); } else /* should never reach here */ return 0; } + /* The same gfn has been remapped as smaller page size */ + else if ( next_table_maddr && is_pte((u32 *)pde) ) + { + table = alloc_amd_iommu_pgtable(); + if ( table == NULL ) + { + printk("AMD-Vi: Cannot allocate I/O page table\n"); + return 0; + } + next_table_maddr = page_to_maddr(table); + set_iommu_pde_present((u32 *)pde, next_table_maddr, level - 1); + + /* The entire super page must be invalidated */ + for_each_amd_iommu ( iommu ) + { + spin_lock_irqsave(&iommu->lock, flags); + + /* Round io_pfn to super page boundary */ + mask = ~((1ULL << ((level - 1) * PTE_PER_TABLE_SHIFT)) - 1); + spfn = io_pfn & mask; + invalidate_iommu_pages(iommu, spfn << PAGE_SHIFT, + hd->domain_id, + (level - 1) * PTE_PER_TABLE_SHIFT); + flush_command_buffer(iommu); + spin_unlock_irqrestore(&iommu->lock, flags); + } + } + unmap_domain_page(table_vaddr); table = maddr_to_page(next_table_maddr); level--; @@ -453,60 +500,76 @@ int amd_iommu_map_page(struct domain *d, int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn, unsigned int flags) { - u64 iommu_l2e; + return amd_iommu_map_pages(d, gfn, mfn, 0, flags); +} + +int amd_iommu_unmap_page(struct domain *d, unsigned long gfn) +{ + return amd_iommu_unmap_pages(d, gfn, 0); +} + +int amd_iommu_map_pages(struct domain *d, unsigned long gfn, + unsigned long mfn, unsigned int order, + unsigned int flags) +{ + u64 iommu_pt; struct hvm_iommu *hd = domain_hvm_iommu(d); - BUG_ON( !hd->root_table ); + BUG_ON( !hd->root_table || !check_order(order) ); spin_lock(&hd->mapping_lock); - iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn); - if ( iommu_l2e == 0 ) + iommu_pt = iommu_pt_from_pfn(hd->root_table, + hd->paging_mode, gfn, order, d); + if ( iommu_pt == 0 ) { spin_unlock(&hd->mapping_lock); - AMD_IOMMU_DEBUG("Invalid IO pagetable entry gfn = %lx\n", gfn); + AMD_IOMMU_DEBUG("Invalid IO pagetable entry gfn = %lx, order = %d\n", + gfn, order); domain_crash(d); return -EFAULT; } - - set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, + set_iommu_pte_present(iommu_pt, gfn, (u64)mfn << PAGE_SHIFT, !!(flags & IOMMUF_writable), - !!(flags & IOMMUF_readable)); + !!(flags & IOMMUF_readable), order); spin_unlock(&hd->mapping_lock); return 0; } -int amd_iommu_unmap_page(struct domain *d, unsigned long gfn) -{ - u64 iommu_l2e; +int amd_iommu_unmap_pages(struct domain *d, unsigned long gfn, + unsigned int order) +{ + u64 iommu_pt; unsigned long flags; struct amd_iommu *iommu; struct hvm_iommu *hd = domain_hvm_iommu(d); - BUG_ON( !hd->root_table ); + BUG_ON( !hd->root_table || !check_order(order) ); spin_lock(&hd->mapping_lock); - iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn); - - if ( iommu_l2e == 0 ) + iommu_pt = iommu_pt_from_pfn(hd->root_table, + hd->paging_mode, gfn, 0, d); + if ( iommu_pt == 0 ) { spin_unlock(&hd->mapping_lock); - AMD_IOMMU_DEBUG("Invalid IO pagetable entry gfn = %lx\n", gfn); + AMD_IOMMU_DEBUG("Invalid IO pagetable entry gfn = %lx, order = %d\n", + gfn, order); domain_crash(d); return -EFAULT; } /* mark PTE as 'page not present' */ - clear_iommu_l1e_present(iommu_l2e, gfn); + clear_iommu_pte_present(iommu_pt, gfn, order); spin_unlock(&hd->mapping_lock); /* send INVALIDATE_IOMMU_PAGES command */ for_each_amd_iommu ( iommu ) { spin_lock_irqsave(&iommu->lock, flags); - invalidate_iommu_page(iommu, (u64)gfn << PAGE_SHIFT, hd->domain_id); + invalidate_iommu_pages(iommu, (u64)gfn << PAGE_SHIFT, + hd->domain_id, order); flush_command_buffer(iommu); spin_unlock_irqrestore(&iommu->lock, flags); } @@ -514,80 +577,38 @@ int amd_iommu_unmap_page(struct domain * return 0; } -int amd_iommu_reserve_domain_unity_map( - struct domain *domain, - unsigned long phys_addr, - unsigned long size, int iw, int ir) -{ - u64 iommu_l2e; +int amd_iommu_reserve_domain_unity_map(struct domain *domain, + u64 phys_addr, + unsigned long size, int iw, int ir) +{ unsigned long npages, i; - struct hvm_iommu *hd = domain_hvm_iommu(domain); + unsigned long gfn; + unsigned int flags = !!ir; + int rt = 0; npages = region_to_pages(phys_addr, size); - - spin_lock(&hd->mapping_lock); - for ( i = 0; i < npages; ++i ) - { - iommu_l2e = iommu_l2e_from_pfn( - hd->root_table, hd->paging_mode, phys_addr >> PAGE_SHIFT); - - if ( iommu_l2e == 0 ) - { - spin_unlock(&hd->mapping_lock); - AMD_IOMMU_DEBUG("Invalid IO pagetable entry phys_addr = %lx\n", - phys_addr); - domain_crash(domain); - return -EFAULT; - } - - set_iommu_l1e_present(iommu_l2e, - (phys_addr >> PAGE_SHIFT), phys_addr, iw, ir); - - phys_addr += PAGE_SIZE; - } - spin_unlock(&hd->mapping_lock); + if ( iw ) + flags |= IOMMUF_writable; + gfn = phys_addr >> PAGE_SHIFT; + for ( i = 0; i < npages; i++ ) + { + rt = amd_iommu_map_pages(domain, gfn +i, gfn +i, 0, flags); + if ( rt != 0 ) + return rt; + } return 0; } void invalidate_all_iommu_pages(struct domain *d) { - u32 cmd[4], entry; unsigned long flags; struct amd_iommu *iommu; - int domain_id = d->domain_id; - u64 addr_lo = 0x7FFFFFFFFFFFF000ULL & DMA_32BIT_MASK; - u64 addr_hi = 0x7FFFFFFFFFFFF000ULL >> 32; - - set_field_in_reg_u32(domain_id, 0, - IOMMU_INV_IOMMU_PAGES_DOMAIN_ID_MASK, - IOMMU_INV_IOMMU_PAGES_DOMAIN_ID_SHIFT, &entry); - set_field_in_reg_u32(IOMMU_CMD_INVALIDATE_IOMMU_PAGES, entry, - IOMMU_CMD_OPCODE_MASK, IOMMU_CMD_OPCODE_SHIFT, - &entry); - cmd[1] = entry; - - set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0, - IOMMU_INV_IOMMU_PAGES_S_FLAG_MASK, - IOMMU_INV_IOMMU_PAGES_S_FLAG_SHIFT, &entry); - set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry, - IOMMU_INV_IOMMU_PAGES_PDE_FLAG_MASK, - IOMMU_INV_IOMMU_PAGES_PDE_FLAG_SHIFT, &entry); - set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, entry, - IOMMU_INV_IOMMU_PAGES_ADDR_LOW_MASK, - IOMMU_INV_IOMMU_PAGES_ADDR_LOW_SHIFT, &entry); - cmd[2] = entry; - - set_field_in_reg_u32((u32)addr_hi, 0, - IOMMU_INV_IOMMU_PAGES_ADDR_HIGH_MASK, - IOMMU_INV_IOMMU_PAGES_ADDR_HIGH_SHIFT, &entry); - cmd[3] = entry; - - cmd[0] = 0; for_each_amd_iommu ( iommu ) { spin_lock_irqsave(&iommu->lock, flags); - send_iommu_command(iommu, cmd); + invalidate_iommu_pages(iommu, 0x7FFFFFFFFFFFF000ULL, + d->domain_id, 0); flush_command_buffer(iommu); spin_unlock_irqrestore(&iommu->lock, flags); } diff -r 1f6773300a7b xen/drivers/passthrough/amd/pci_amd_iommu.c --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Thu Dec 02 16:06:30 2010 +0100 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Fri Dec 03 15:48:50 2010 +0100 @@ -224,9 +224,7 @@ static int amd_iommu_domain_init(struct return -ENOMEM; } - hd->paging_mode = is_hvm_domain(d) ? - IOMMU_PAGE_TABLE_LEVEL_4 : get_paging_mode(max_page); - + hd->paging_mode = get_paging_mode(max_page); hd->domain_id = d->domain_id; return 0; @@ -337,8 +335,8 @@ static void deallocate_next_page_table(s for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ ) { pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE); - next_table_maddr = amd_iommu_get_next_table_from_pte(pde); - if ( next_table_maddr != 0 ) + next_table_maddr = get_next_table_from_pte((u32*)pde); + if ( next_table_maddr != 0 && !is_pte((u32*)pde) ) { deallocate_next_page_table( maddr_to_page(next_table_maddr), level - 1); @@ -362,7 +360,6 @@ static void deallocate_iommu_page_tables } spin_unlock(&hd->mapping_lock); } - static void amd_iommu_domain_destroy(struct domain *d) { diff -r 1f6773300a7b xen/include/asm-x86/hvm/svm/amd-iommu-proto.h --- a/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Thu Dec 02 16:06:30 2010 +0100 +++ b/xen/include/asm-x86/hvm/svm/amd-iommu-proto.h Fri Dec 03 15:48:50 2010 +0100 @@ -51,9 +51,8 @@ int amd_iommu_map_page(struct domain *d, int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn, unsigned int flags); int amd_iommu_unmap_page(struct domain *d, unsigned long gfn); -u64 amd_iommu_get_next_table_from_pte(u32 *entry); int amd_iommu_reserve_domain_unity_map(struct domain *domain, - unsigned long phys_addr, unsigned long size, int iw, int ir); + u64 phys_addr, unsigned long size, int iw, int ir); void invalidate_all_iommu_pages(struct domain *d); int amd_iommu_map_pages(struct domain *d, unsigned long gfn, unsigned long mfn, @@ -158,4 +157,28 @@ static inline void __free_amd_iommu_tabl free_xenheap_pages(table, order); } +static inline bool_t is_pte(u32 *pde) +{ + u32 next_level = get_field_from_reg_u32(pde[0], + IOMMU_PDE_NEXT_LEVEL_MASK, + IOMMU_PDE_NEXT_LEVEL_SHIFT);; + return ( next_level == 7 || next_level == 0 ); +} + +static inline u64 get_next_table_from_pte(u32 *entry) +{ + u64 addr_lo, addr_hi, ptr; + + addr_lo = get_field_from_reg_u32(entry[0], + IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_MASK, + IOMMU_DEV_TABLE_PAGE_TABLE_PTR_LOW_SHIFT); + + addr_hi = get_field_from_reg_u32(entry[1], + IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_MASK, + IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_SHIFT); + + ptr = (addr_hi << 32) | (addr_lo << PAGE_SHIFT); + return ptr; +} + #endif /* _ASM_X86_64_AMD_IOMMU_PROTO_H */