# HG changeset patch # User yamahata@xxxxxxxxxxxxx # Date 1180581248 -32400 # Node ID 8ebdf907115d39def9ec226c99fd85f919f55398 # Parent 2b14a1f22eecc73807578ceb78fa994449911d8b foreign domain p2m table exposure. unfinished. xen side and linux side. PATCHNAME: foreign_domain_p2m_exposure Signed-off-by: Isaku Yamahata diff -r 2b14a1f22eec -r 8ebdf907115d linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c --- a/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Fri May 25 09:43:21 2007 -0600 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/hypervisor.c Thu May 31 12:14:08 2007 +0900 @@ -580,6 +580,10 @@ struct xen_ia64_privcmd_range { unsigned long pgoff; // in PAGE_SIZE struct resource* res; + // for foreign domain p2m mapping + void* private; + void (*callback)(struct xen_ia64_privcmd_range* range, void* arg); + unsigned long num_entries; struct xen_ia64_privcmd_entry entries[0]; }; @@ -761,6 +765,9 @@ xen_ia64_privcmd_vma_close(struct vm_are BUG_ON(entry->gpfn != INVALID_GPFN); } #endif + if (privcmd_range->callback) + (*privcmd_range->callback)(privcmd_range, + privcmd_range->private); release_resource(privcmd_range->res); kfree(privcmd_range->res); vfree(privcmd_range); @@ -821,6 +828,8 @@ privcmd_mmap(struct file * file, struct atomic_set(&privcmd_range->ref_count, 1); privcmd_range->pgoff = vma->vm_pgoff; privcmd_range->num_entries = num_entries; + privcmd_range->private = NULL; + privcmd_range->callback = NULL; for (i = 0; i < privcmd_range->num_entries; i++) { xen_ia64_privcmd_init_entry(&privcmd_range->entries[i]); } @@ -1197,6 +1206,195 @@ EXPORT_SYMBOL_GPL(p2m_convert_max_pfn); EXPORT_SYMBOL_GPL(p2m_convert_max_pfn); EXPORT_SYMBOL_GPL(p2m_pte); EXPORT_SYMBOL_GPL(p2m_phystomach); + +/////////////////////////////////////////////////////////////////////////// +// foreign domain p2m mapping +#include +#include + +struct foreign_p2m_private { + unsigned long gpfn; + domid_t domid; +}; + +static void +xen_foreign_p2m_unexpose(struct xen_ia64_privcmd_range* privcmd_range, + void* arg) +{ + struct foreign_p2m_private* private = (struct foreign_p2m_private*)arg; + int ret; + + privcmd_range->private = NULL; + privcmd_range->callback = NULL; + + ret = HYPERVISOR_unexpose_foreign_p2m(private->gpfn, private->domid); + if (ret) + printk(KERN_WARNING + "unexpose_foreign_p2m hypercall failed.\n"); + kfree(private); +} + +int +xen_foreign_p2m_expose(privcmd_hypercall_t* hypercall) +{ + // hypercall-> + // arg0: cmd = IA64_DOM0VP_expose_foreign_p2m + // arg1: va + // arg2: domid + // arg3: __user* memmap_info + // arg4: flags + + int ret = 0; + struct mm_struct* mm = current->mm; + + unsigned long vaddr = hypercall->arg[1]; + domid_t domid = hypercall->arg[2]; + struct xen_ia64_memmap_info __user *u_memmap_info = + (struct xen_ia64_memmap_info* __user)hypercall->arg[3]; + + struct xen_ia64_memmap_info memmap_info; + size_t memmap_size; + struct xen_ia64_memmap_info* k_memmap_info = NULL; + unsigned long p2m_size; + struct resource* res; + unsigned long gpfn; + + struct vm_area_struct* vma; + void* p; + unsigned long prev_src_gpfn_end; + + struct xen_ia64_privcmd_vma* privcmd_vma; + struct xen_ia64_privcmd_range* privcmd_range; + struct foreign_p2m_private* private = NULL; + + + BUG_ON(hypercall->arg[0] != IA64_DOM0VP_expose_foreign_p2m); + + private = kmalloc(sizeof(*private), GFP_KERNEL); + if (private == NULL) + goto kfree_out; + + if (copy_from_user(&memmap_info, u_memmap_info, + sizeof(memmap_info))) + return -EFAULT; + memmap_size = sizeof(*k_memmap_info) + memmap_info.efi_memmap_size; + k_memmap_info = kmalloc(memmap_size, GFP_KERNEL); + if (k_memmap_info == NULL) + return -ENOMEM; + if (copy_from_user(k_memmap_info, u_memmap_info, memmap_size)) { + ret = -EFAULT; + goto kfree_out; + } + /* XXX integrity check k_memmap_info */ + + p2m_size = HYPERVISOR_memory_op(XENMEM_maximum_gpfn, &domid); + if (p2m_size < 0) { + ret = p2m_size; + goto kfree_out; + } + p2m_size = (p2m_size + PAGE_SIZE - 1) & PAGE_MASK; + + down_read(&mm->mmap_sem); + + vma = find_vma(mm, vaddr); + if (vma == NULL || vma->vm_ops != &xen_ia64_privcmd_vm_ops || + vaddr != vma->vm_start || + (vma->vm_flags & VM_WRITE) || (vma->vm_flags & VM_EXEC) || + !privcmd_enforce_singleshot_mapping(vma)) + goto mmap_out; + + privcmd_vma = (struct xen_ia64_privcmd_vma*)vma->vm_private_data; + res = privcmd_vma->range->res; + if (p2m_size > (res->end - res->start) || + p2m_size > vma->vm_end - vma->vm_start) { + ret = -EINVAL; + goto mmap_out; + } + + gpfn = res->start >> PAGE_SHIFT; + // arg0: dest_gpfn + // arg1: domid + // arg2: XEN_GUEST_HANDLE(char) buffer: memmap_info + // arg3: flags + ret = xencomm_arch_expose_foreign_p2m(gpfn, domid, + xencomm_create_inline(k_memmap_info), hypercall->arg[4]); + if (ret) + goto mmap_out; + + privcmd_range = (struct xen_ia64_privcmd_range*)privcmd_vma->range; + prev_src_gpfn_end = 0; + for (p = k_memmap_info->memdesc; + p < (void*)k_memmap_info->memdesc + k_memmap_info->efi_memmap_size; + p += k_memmap_info->efi_memdesc_size) { + efi_memory_desc_t* md = p; + unsigned long src_gpfn = md->phys_addr >> PAGE_SHIFT; + unsigned long src_gpfn_end = + (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> + PAGE_SHIFT; + unsigned long num_src_gpfn; + unsigned long gpfn_offset; + unsigned long size; + unsigned int i; + + if (src_gpfn <= prev_src_gpfn_end) + src_gpfn = prev_src_gpfn_end; + if (src_gpfn_end <= prev_src_gpfn_end) + continue; + + src_gpfn &= ~(PTRS_PER_PTE - 1); + num_src_gpfn = src_gpfn_end - src_gpfn; + num_src_gpfn |= PTRS_PER_PTE - 1; + + gpfn_offset = src_gpfn / PTRS_PER_PTE; + gpfn_offset &= ~(PTRS_PER_PTE - 1); + + size = ((num_src_gpfn + PTRS_PER_PTE - 1) / PTRS_PER_PTE) << + PAGE_SHIFT; + + prev_src_gpfn_end = src_gpfn + num_src_gpfn; + ret = remap_pfn_range(vma, + vaddr + (gpfn_offset << PAGE_SHIFT), + gpfn + gpfn_offset, size, + vma->vm_page_prot); + if (ret) { + for (i = 0; i < gpfn + gpfn_offset; i++) { + struct xen_ia64_privcmd_entry* entry = + &privcmd_range->entries[i]; + BUG_ON(atomic_read(&entry->map_count) != 1 || + atomic_read(&entry->map_count) != 0); + atomic_set(&entry->map_count, 0); + entry->gpfn = INVALID_GPFN; + } + (void)HYPERVISOR_unexpose_foreign_p2m(gpfn, domid); + goto mmap_out; + } + + for (i = gpfn_offset; + i < gpfn_offset + (size >> PAGE_SHIFT); + i++) { + struct xen_ia64_privcmd_entry* entry = + &privcmd_range->entries[i]; + BUG_ON(atomic_read(&entry->map_count) != 0); + BUG_ON(entry->gpfn != INVALID_GPFN); + atomic_inc(&entry->map_count); + entry->gpfn = gpfn + i; + } + } + + private->gpfn = gpfn; + private->domid = domid; + + privcmd_range->callback = &xen_foreign_p2m_unexpose; + privcmd_range->private = private; + +mmap_out: + up_read(&mm->mmap_sem); +kfree_out: + kfree(k_memmap_info); + kfree(private); + return ret; +} + #endif /////////////////////////////////////////////////////////////////////////// diff -r 2b14a1f22eec -r 8ebdf907115d linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c --- a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c Fri May 25 09:43:21 2007 -0600 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_hcall.c Thu May 31 12:14:08 2007 +0900 @@ -239,7 +239,10 @@ xencomm_hypercall_memory_op(unsigned int xen_guest_handle(xmr->extent_start); xencommize_memory_reservation((xen_memory_reservation_t *)arg); break; - + + case XENMEM_maximum_gpfn: + break; + case XENMEM_maximum_ram_page: break; diff -r 2b14a1f22eec -r 8ebdf907115d linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c --- a/linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c Fri May 25 09:43:21 2007 -0600 +++ b/linux-2.6-xen-sparse/arch/ia64/xen/xcom_privcmd.c Thu May 31 12:14:08 2007 +0900 @@ -682,6 +682,11 @@ xencomm_privcmd_ia64_dom0vp_op(privcmd_h ret = -EFAULT; break; } + case IA64_DOM0VP_expose_foreign_p2m: { + int xen_foreign_p2m_expose(privcmd_hypercall_t* hypercall); + ret = xen_foreign_p2m_expose(hypercall); + break; + } default: printk("%s: unknown IA64 DOM0VP op %d\n", __func__, cmd); ret = -EINVAL; diff -r 2b14a1f22eec -r 8ebdf907115d linux-2.6-xen-sparse/include/asm-ia64/hypercall.h --- a/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Fri May 25 09:43:21 2007 -0600 +++ b/linux-2.6-xen-sparse/include/asm-ia64/hypercall.h Thu May 31 12:14:08 2007 +0900 @@ -370,6 +370,23 @@ HYPERVISOR_expose_p2m(unsigned long conv return _hypercall5(unsigned long, ia64_dom0vp_op, IA64_DOM0VP_expose_p2m, conv_start_gpfn, assign_start_gpfn, expose_size, granule_pfn); +} + +static inline int +xencomm_arch_expose_foreign_p2m(unsigned long gpfn, + domid_t domid, struct xencomm_handle *arg, + unsigned long flags) +{ + return _hypercall5(int, ia64_dom0vp_op, + IA64_DOM0VP_expose_foreign_p2m, + gpfn, domid, arg, flags); +} + +static inline int +HYPERVISOR_unexpose_foreign_p2m(unsigned long gpfn, domid_t domid) +{ + return _hypercall3(int, ia64_dom0vp_op, + IA64_DOM0VP_unexpose_foreign_p2m, gpfn, domid); } #endif diff -r 2b14a1f22eec -r 8ebdf907115d xen/arch/ia64/xen/dom0_ops.c --- a/xen/arch/ia64/xen/dom0_ops.c Fri May 25 09:43:21 2007 -0600 +++ b/xen/arch/ia64/xen/dom0_ops.c Thu May 31 12:14:08 2007 +0900 @@ -419,6 +419,15 @@ do_dom0vp_op(unsigned long cmd, ret = dom0vp_fpswa_revision(hnd); break; } + case IA64_DOM0VP_expose_foreign_p2m: { + XEN_GUEST_HANDLE(char) hnd; + set_xen_guest_handle(hnd, (char*)arg1); + ret = dom0vp_expose_foreign_p2m(d, arg0, (domid_t)arg1, hnd, arg3); + break; + } + case IA64_DOM0VP_unexpose_foreign_p2m: + ret = dom0vp_unexpose_foreign_p2m(d, arg0, arg1); + break; default: ret = -1; printk("unknown dom0_vp_op 0x%lx\n", cmd); diff -r 2b14a1f22eec -r 8ebdf907115d xen/arch/ia64/xen/domain.c --- a/xen/arch/ia64/xen/domain.c Fri May 25 09:43:21 2007 -0600 +++ b/xen/arch/ia64/xen/domain.c Thu May 31 12:14:08 2007 +0900 @@ -530,6 +530,7 @@ int arch_domain_create(struct domain *d) if (is_idle_domain(d)) return 0; + foreign_p2m_init(d); #ifdef CONFIG_XEN_IA64_PERVCPU_VHPT d->arch.has_pervcpu_vhpt = opt_pervcpu_vhpt; dprintk(XENLOG_INFO, "%s:%d domain %d pervcpu_vhpt %d\n", diff -r 2b14a1f22eec -r 8ebdf907115d xen/arch/ia64/xen/mm.c --- a/xen/arch/ia64/xen/mm.c Fri May 25 09:43:21 2007 -0600 +++ b/xen/arch/ia64/xen/mm.c Thu May 31 12:14:08 2007 +0900 @@ -303,6 +303,8 @@ mm_teardown(struct domain* d) continue; mm_teardown_pgd(d, pgd, i << PGDIR_SHIFT); } + + foreign_p2m_destroy(d); } static void @@ -808,7 +810,7 @@ assign_new_domain0_page(struct domain *d if (pte_none(*pte)) { struct page_info *p = __assign_new_domain_page(d, mpaddr, pte); if (p == NULL) { - panic("%s: can't allocate page for dom0", __func__); + panic("%s: can't allocate page for dom0\n", __func__); } } } @@ -1390,6 +1392,43 @@ expose_p2m_init(void) p2m_pte_zero_page = virt_to_page(pte); } +// allocate pgd, pmd of dest_dom if necessary +static int +allocate_pgd_pmd(struct domain* dest_dom, unsigned long dest_gpfn, + struct domain* src_dom, + unsigned long src_gpfn, unsigned long num_src_gpfn) +{ + unsigned long i; + BUG_ON((src_gpfn % PTRS_PER_PTE) != 0); + BUG_ON((num_src_gpfn % PTRS_PER_PTE) != 0); + + i = 0; + while (i < num_src_gpfn) { + volatile pte_t* src_pte; + volatile pte_t* dest_pte; + src_pte = lookup_noalloc_domain_pte(src_dom, + (src_gpfn + i) << PAGE_SHIFT); + if (src_pte == NULL) { + i++; + continue; + } + + dest_pte = lookup_alloc_domain_pte(dest_dom, + (dest_gpfn << PAGE_SHIFT) + + i * sizeof(pte_t)); + if (dest_pte == NULL) { + gdprintk(XENLOG_INFO, "%s failed to allocate pte page\n", + __func__); + return -ENOMEM; + } + + // skip to next pte page + i += PTRS_PER_PTE; + i &= ~(PTRS_PER_PTE - 1); + } + return 0; +} + static int expose_p2m_page(struct domain* d, unsigned long mpaddr, struct page_info* page) { @@ -1404,6 +1443,91 @@ expose_p2m_page(struct domain* d, unsign ASSIGN_readonly); } +// expose pte page +static int +expose_p2m_range(struct domain* dest_dom, unsigned long dest_gpfn, + struct domain* src_dom, + unsigned long src_gpfn, unsigned long num_src_gpfn) +{ + unsigned long i; + BUG_ON((src_gpfn % PTRS_PER_PTE) != 0); + BUG_ON((num_src_gpfn % PTRS_PER_PTE) != 0); + + i = 0; + while (i < num_src_gpfn) { + volatile pte_t* pte; + pte = lookup_noalloc_domain_pte(src_dom, (src_gpfn + i) << PAGE_SHIFT); + if (pte == NULL) { + i++; + continue; + } + + if (expose_p2m_page(dest_dom, + (dest_gpfn << PAGE_SHIFT) + i * sizeof(pte_t), + virt_to_page(pte)) < 0) { + gdprintk(XENLOG_INFO, "%s failed to assign page\n", __func__); + return -EAGAIN; + } + + // skip to next pte page + i += PTRS_PER_PTE; + i &= ~(PTRS_PER_PTE - 1); + } + +} + +// expose p2m_pte_zero_page +static int +expose_zero_page(struct domain* dest_dom, unsigned long dest_gpfn, + unsigned long num_src_gpfn) +{ + unsigned long i; + + for (i = 0; i < num_src_gpfn / PTRS_PER_PTE + 1; i++) { + volatile pte_t* pte; + pte = lookup_noalloc_domain_pte(dest_dom, + (dest_gpfn + i) << PAGE_SHIFT); + if (pte == NULL || pte_present(*pte)) + continue; + + if (expose_p2m_page(dest_dom, (dest_gpfn + i) << PAGE_SHIFT, + p2m_pte_zero_page) < 0) { + gdprintk(XENLOG_INFO, "%s failed to assign zero-pte page\n", + __func__); + return -EAGAIN; + } + } + return 0; +} + +static int +expose_p2m(struct domain* dest_dom, unsigned long dest_gpfn, + struct domain* src_dom, + unsigned long src_gpfn, unsigned long num_src_gpfn) +{ + if (allocate_pgd_pmd(dest_dom, dest_gpfn, + src_dom, src_gpfn, num_src_gpfn)) + return -ENOMEM; + if (expose_p2m_range(dest_dom, dest_gpfn, + src_dom, src_gpfn, num_src_gpfn)) + return -EAGAIN; + if (expose_zero_page(dest_dom, dest_gpfn, num_src_gpfn)) + return -EAGAIN; + + return 0; +} + +static void +unexpose_p2m(struct domain* dest_dom, + unsigned long dest_gpfn, unsigned long num_dest_gpfn) +{ + unsigned long i; + for (i = 0; i < num_dest_gpfn; i++) { + zap_domain_page_one(dest_dom, (dest_gpfn + i) << PAGE_SHIFT, + 0, INVALID_MFN, INVALID_VADDR); + } +} + // It is possible to optimize loop, But this isn't performance critical. unsigned long dom0vp_expose_p2m(struct domain* d, @@ -1411,10 +1535,8 @@ dom0vp_expose_p2m(struct domain* d, unsigned long assign_start_gpfn, unsigned long expose_size, unsigned long granule_pfn) { + unsigned long ret; unsigned long expose_num_pfn = expose_size >> PAGE_SHIFT; - unsigned long i; - volatile pte_t* conv_pte; - volatile pte_t* assign_pte; if ((expose_size % PAGE_SIZE) != 0 || (granule_pfn % PTRS_PER_PTE) != 0 || @@ -1436,64 +1558,361 @@ dom0vp_expose_p2m(struct domain* d, return -ENOSYS; } - // allocate pgd, pmd. - i = conv_start_gpfn; - while (i < expose_num_pfn) { - conv_pte = lookup_noalloc_domain_pte(d, (conv_start_gpfn + i) << - PAGE_SHIFT); - if (conv_pte == NULL) { - i++; + ret = expose_p2m(d, assign_start_gpfn, + d, conv_start_gpfn, expose_num_pfn); +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_flush(¤t->arch.dfree); +#endif + return ret; +} + +static int +memmap_info_copy_from_guest(struct xen_ia64_memmap_info* memmap_info, + char** memmap_p, + XEN_GUEST_HANDLE(char) buffer) +{ + char* memmap; + + if (copy_from_guest((char*)memmap_info, buffer, sizeof(memmap_info))) + return -EFAULT; + if (memmap_info->efi_memmap_size == 0) + return -EINVAL; + + memmap = _xmalloc(memmap_info->efi_memmap_size, + __alignof__(efi_memory_desc_t)); + if (memmap == NULL) + return -ENOMEM; + if (copy_from_guest_offset(memmap, buffer, memmap_info->efi_memmap_size, + sizeof(memmap_info))) { + xfree(memmap); + return -EFAULT; + } + + if (memmap_info->efi_memmap_size == 0 || + memmap_info->efi_memdesc_size < sizeof(efi_memory_desc_t) || + memmap_info->efi_memmap_size < memmap_info->efi_memdesc_size) { + xfree(memmap); + return -EINVAL; + } + + *memmap_p = memmap; + return 0; +} + +static int +foreign_p2m_allocate_pte(struct domain* d, + const struct xen_ia64_memmap_info* memmap_info, + const void* memmap) +{ + const void* memmap_end = memmap + memmap_info->efi_memmap_size; + const void* p; + + for (p = memmap; p < memmap_end; p += memmap_info->efi_memdesc_size) { + const efi_memory_desc_t* md = p; + unsigned long start = md->phys_addr; + unsigned long end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); + unsigned long gpaddr; + + for (gpaddr = start; gpaddr < end; gpaddr += PAGE_SIZE) { + if (lookup_alloc_domain_pte(d, gpaddr) == NULL) { + return -ENOMEM; + } + } + } + + return 0; +} + +struct foreign_p2m_region { + unsigned long gpfn; + unsigned long num_gpfn; +}; + +struct foreign_p2m_entry { + struct list_head list; + int busy; + + /* src domain */ + struct domain* src_dom; + + /* region into which foreign p2m table is mapped */ + unsigned long gpfn; + unsigned long num_gpfn; + unsigned int num_region; + struct foreign_p2m_region region[]; +}; + +/* caller must increment the reference count of src_dom */ +static int +foreign_p2m_alloc(struct foreign_p2m* foreign_p2m, + unsigned long dest_gpfn, struct domain* src_dom, + struct xen_ia64_memmap_info* memmap_info, void* memmap, + struct foreign_p2m_entry** entryp) +{ + void* memmap_end = memmap + memmap_info->efi_memmap_size; + efi_memory_desc_t* md; + unsigned long dest_gpfn_end; + unsigned long src_gpfn; + unsigned long src_gpfn_end; + + unsigned int num_region; + struct foreign_p2m_entry* entry; + struct foreign_p2m_entry* pos; + + num_region = (memmap_end - memmap) / memmap_info->efi_memdesc_size; + + md = memmap; + src_gpfn = md->phys_addr >> PAGE_SHIFT; + src_gpfn &= ~(PTRS_PER_PTE - 1); + + md = memmap + (num_region - 1) * memmap_info->efi_memdesc_size; + src_gpfn_end = + (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT; + if (src_gpfn_end > (src_dom->arch.convmem_end >> PAGE_SHIFT)) + return -EINVAL; + + src_gpfn_end |= PTRS_PER_PTE - 1; + dest_gpfn_end = dest_gpfn + (src_gpfn_end - src_gpfn) / PTRS_PER_PTE; + + entry = _xmalloc(sizeof(*entry) + num_region * sizeof(entry->region[0]), + __alignof__(*entry)); + if (entry == NULL) + return -ENOMEM; + + entry->busy = 1; + entry->gpfn = dest_gpfn; + entry->num_gpfn = dest_gpfn_end - dest_gpfn + 1; + entry->src_dom = src_dom; + entry->num_region = 0; + memset(entry->region, 0, sizeof(entry->region[0]) * num_region); + + spin_lock(&foreign_p2m->lock); + list_for_each_entry(pos, &foreign_p2m->head, list) { + if (pos->gpfn + pos->num_gpfn < dest_gpfn) continue; - } - - assign_pte = lookup_alloc_domain_pte(d, (assign_start_gpfn << - PAGE_SHIFT) + i * sizeof(pte_t)); - if (assign_pte == NULL) { - gdprintk(XENLOG_INFO, "%s failed to allocate pte page\n", __func__); - return -ENOMEM; - } - - // skip to next pte page - i += PTRS_PER_PTE; - i &= ~(PTRS_PER_PTE - 1); - } - - // expose pte page - i = 0; - while (i < expose_num_pfn) { - conv_pte = lookup_noalloc_domain_pte(d, (conv_start_gpfn + i) << - PAGE_SHIFT); - if (conv_pte == NULL) { - i++; + + if (dest_gpfn_end < pos->gpfn) { + list_add(&entry->list, &pos->list); + spin_unlock(&foreign_p2m->lock); + *entryp = entry; + return 0; + } + + /* overlap */ + break; + } + spin_unlock(&foreign_p2m->lock); + xfree(entry); + return -EBUSY; +} + +static void +foreign_p2m_unexpose(struct domain* dest_dom, struct foreign_p2m_entry* entry) +{ + unsigned int i; + BUG_ON(!entry->busy); + for (i = 0; i < entry->num_region; i++) + unexpose_p2m(dest_dom, + entry->region[i].gpfn, entry->region[i].num_gpfn); +} + +static void +foreign_p2m_unbusy(struct foreign_p2m* foreign_p2m, + struct foreign_p2m_entry* entry) +{ + spin_lock(&foreign_p2m->lock); + BUG_ON(!entry->busy); + entry->busy = 0; + spin_unlock(&foreign_p2m->lock); +} + +static void +foreign_p2m_free(struct foreign_p2m* foreign_p2m, + struct foreign_p2m_entry* entry) +{ + spin_lock(&foreign_p2m->lock); + BUG_ON(!entry->busy); + list_del(&entry->list); + spin_unlock(&foreign_p2m->lock); + + put_domain(entry->src_dom); + xfree(entry); +} + +void +foreign_p2m_init(struct domain* d) +{ + struct foreign_p2m* foreign_p2m = &d->arch.foreign_p2m; + INIT_LIST_HEAD(&foreign_p2m->head); + spin_lock_init(&foreign_p2m->lock); +} + +void +foreign_p2m_destroy(struct domain* d) +{ + struct foreign_p2m* foreign_p2m = &d->arch.foreign_p2m; + struct foreign_p2m_entry* entry; + struct foreign_p2m_entry* n; + + spin_lock(&foreign_p2m->lock); + list_for_each_entry_safe(entry, n, &foreign_p2m->head, list) { + /* mm_teardown() cleared p2m table already */ + /* foreign_p2m_unexpose(d, entry);*/ + list_del(&entry->list); + put_domain(entry->src_dom); + xfree(entry); + } + spin_unlock(&foreign_p2m->lock); +} + +unsigned long +dom0vp_expose_foreign_p2m(struct domain* dest_dom, + unsigned long dest_gpfn, + domid_t domid, + XEN_GUEST_HANDLE(char) buffer, + unsigned long flags) +{ + unsigned long ret = 0; + struct domain* src_dom; + struct xen_ia64_memmap_info memmap_info; + char* memmap; + void* memmap_end; + void* p; + + struct foreign_p2m_entry* entry; + unsigned long prev_src_gpfn_end; + + ret = memmap_info_copy_from_guest(&memmap_info, &memmap, buffer); + if (ret != 0) + return ret; + + dest_dom = rcu_lock_domain(dest_dom); + if (dest_dom == NULL) { + ret = -EINVAL; + goto out; + } +#if 1 + // Self foreign domain p2m exposure isn't allowed. + // Otherwise the domain can't be destroyed because + // no one decrements the domain reference count. + if (domid == dest_dom->domain_id) { + ret = -EINVAL; + goto out; + } +#endif + + src_dom = get_domain_by_id(domid); + if (src_dom == NULL) { + ret = -EINVAL; + goto out_unlock; + } + + if (flags & IA64_DOM0VP_EFP_ALLOC_PTE) { + ret = foreign_p2m_allocate_pte(src_dom, &memmap_info, memmap); + if (ret != 0) + goto out_unlock; + } + + ret = foreign_p2m_alloc(&dest_dom->arch.foreign_p2m, dest_gpfn, + src_dom, &memmap_info, memmap, &entry); + if (ret != 0) + goto out_unlock; + + prev_src_gpfn_end = 0; + memmap_end = memmap + memmap_info.efi_memmap_size; + for (p = memmap; p < memmap_end; p += memmap_info.efi_memdesc_size) { + efi_memory_desc_t* md = p; + unsigned long src_gpfn = md->phys_addr >> PAGE_SHIFT; + unsigned long src_gpfn_end = + (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> PAGE_SHIFT; + unsigned long num_src_gpfn; + + if (src_gpfn <= prev_src_gpfn_end) + src_gpfn = prev_src_gpfn_end; + if (src_gpfn_end <= prev_src_gpfn_end) continue; - } - - if (expose_p2m_page(d, (assign_start_gpfn << PAGE_SHIFT) + - i * sizeof(pte_t), virt_to_page(conv_pte)) < 0) { - gdprintk(XENLOG_INFO, "%s failed to assign page\n", __func__); - return -EAGAIN; - } - - // skip to next pte page - i += PTRS_PER_PTE; - i &= ~(PTRS_PER_PTE - 1); - } - - // expose p2m_pte_zero_page - for (i = 0; i < expose_num_pfn / PTRS_PER_PTE + 1; i++) { - assign_pte = lookup_noalloc_domain_pte(d, (assign_start_gpfn + i) << - PAGE_SHIFT); - if (assign_pte == NULL || pte_present(*assign_pte)) + if (src_gpfn_end >= (src_dom->arch.convmem_end >> PAGE_SHIFT)) { + ret = -EINVAL; + break; + } + + src_gpfn &= ~(PTRS_PER_PTE - 1); + num_src_gpfn = src_gpfn_end - src_gpfn; + num_src_gpfn |= PTRS_PER_PTE - 1; + + prev_src_gpfn_end = src_gpfn + num_src_gpfn; + ret = expose_p2m(dest_dom, dest_gpfn + src_gpfn / PTRS_PER_PTE, + src_dom, src_gpfn, num_src_gpfn); + if (ret != 0) + break; + + entry->region[entry->num_region].gpfn = + dest_gpfn + src_gpfn / PTRS_PER_PTE; + entry->region[entry->num_region].num_gpfn = + num_src_gpfn / PTRS_PER_PTE + 1; + entry->num_region++; + } + + if (ret != 0) { + foreign_p2m_unbusy(&dest_dom->arch.foreign_p2m, entry); + } else { + foreign_p2m_unexpose(dest_dom, entry); + foreign_p2m_free(&dest_dom->arch.foreign_p2m, entry); + } + + out_unlock: + rcu_unlock_domain(dest_dom); + out: + xfree(memmap); + +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_flush(¤t->arch.dfree); +#endif + return ret; +} + +unsigned long +dom0vp_unexpose_foreign_p2m(struct domain* dest_dom, + unsigned long dest_gpfn, + domid_t domid) +{ + int ret = -ENOENT; + struct foreign_p2m* foreign_p2m = &dest_dom->arch.foreign_p2m; + struct foreign_p2m_entry* entry; + + dest_dom = rcu_lock_domain(dest_dom); + if (dest_dom == NULL) + return ret; + spin_lock(&foreign_p2m->lock); + list_for_each_entry(entry, &foreign_p2m->head, list) { + if (entry->gpfn < dest_gpfn) continue; - - if (expose_p2m_page(d, (assign_start_gpfn + i) << PAGE_SHIFT, - p2m_pte_zero_page) < 0) { - gdprintk(XENLOG_INFO, "%s failed to assign zero-pte page\n", __func__); - return -EAGAIN; - } - } - - return 0; + if (dest_gpfn < entry->gpfn) + break; + + if (domid == entry->src_dom->domain_id) + ret = 0; + else + ret = -EINVAL; + break; + } + if (ret == 0) { + if (entry->busy == 0) + entry->busy = 1; + else + ret = -EBUSY; + } + spin_unlock(&foreign_p2m->lock); + + if (ret == 0) { + foreign_p2m_unexpose(dest_dom, entry); + foreign_p2m_free(&dest_dom->arch.foreign_p2m, entry); +#ifdef CONFIG_XEN_IA64_DEFERRED_FREE + dfree_flush(¤t->arch.dfree); +#endif + } + rcu_unlock_domain(dest_dom); + return ret; } #endif diff -r 2b14a1f22eec -r 8ebdf907115d xen/include/asm-ia64/domain.h --- a/xen/include/asm-ia64/domain.h Fri May 25 09:43:21 2007 -0600 +++ b/xen/include/asm-ia64/domain.h Thu May 31 12:14:08 2007 +0900 @@ -46,6 +46,16 @@ struct mm_struct { struct mm_struct { volatile pgd_t * pgd; // atomic_t mm_users; /* How many users with user space? */ +}; + +struct foreign_p2m { + spinlock_t lock; + /* + * sorted list with entry->gpfn. + * It is expected that only small number of foreign domain p2m + * mapping happens at the same time. + */ + struct list_head head; }; struct last_vcpu { @@ -127,6 +137,9 @@ struct arch_domain { /* Number of faults. */ atomic64_t shadow_fault_count; + /* for foreign domain p2m table mapping */ + struct foreign_p2m foreign_p2m; + struct last_vcpu last_vcpu[NR_CPUS]; #ifdef CONFIG_XEN_IA64_TLB_TRACK diff -r 2b14a1f22eec -r 8ebdf907115d xen/include/asm-ia64/mm.h --- a/xen/include/asm-ia64/mm.h Fri May 25 09:43:21 2007 -0600 +++ b/xen/include/asm-ia64/mm.h Thu May 31 12:14:08 2007 +0900 @@ -440,9 +440,17 @@ extern unsigned long dom0vp_add_physmap_ #ifdef CONFIG_XEN_IA64_EXPOSE_P2M extern void expose_p2m_init(void); extern unsigned long dom0vp_expose_p2m(struct domain* d, unsigned long conv_start_gpfn, unsigned long assign_start_gpfn, unsigned long expose_size, unsigned long granule_pfn); +extern void foreign_p2m_init(struct domain* d); +extern void foreign_p2m_destroy(struct domain* d); +extern unsigned long dom0vp_expose_foreign_p2m(struct domain* dest_dom, unsigned long dest_gpfn, domid_t domid, XEN_GUEST_HANDLE(char) buffer, unsigned long flags); +extern unsigned long dom0vp_unexpose_foreign_p2m(struct domain* dest_dom, unsigned long dest_gpfn, domid_t domid); #else #define expose_p2m_init() do { } while (0) #define dom0vp_expose_p2m(d, conv_start_gpfn, assign_start_gpfn, expose_size, granule_pfn) (-ENOSYS) +#define foreign_p2m_init(d) do { } while (0) +#define foreign_p2m_destroy(d) do { } while (0) +#define dom0vp_expose_foreign_p2m(dest_dom, dest_gpfn, domid, buffer, flags) (-ENOSYS) +#define dom0vp_unexpose_foreign_p2m(dest_dom, dest_gpfn, domid) (-ENOSYS) #endif extern volatile unsigned long *mpt_table; diff -r 2b14a1f22eec -r 8ebdf907115d xen/include/public/arch-ia64.h --- a/xen/include/public/arch-ia64.h Fri May 25 09:43:21 2007 -0600 +++ b/xen/include/public/arch-ia64.h Thu May 31 12:14:08 2007 +0900 @@ -516,6 +516,13 @@ DEFINE_XEN_GUEST_HANDLE(vcpu_guest_conte /* get fpswa revision */ #define IA64_DOM0VP_fpswa_revision 10 +/* expose the foreign domain's p2m table into privileged domain */ +#define IA64_DOM0VP_expose_foreign_p2m 11 +#define IA64_DOM0VP_EFP_ALLOC_PTE 0x1 /* allocate p2m table */ + +/* unexpose the foreign domain's p2m table into privileged domain */ +#define IA64_DOM0VP_unexpose_foreign_p2m 12 + // flags for page assignement to pseudo physical address space #define _ASSIGN_readonly 0 #define ASSIGN_readonly (1UL << _ASSIGN_readonly)