[IA64] fix XENMEM_add_to_physmap with XENMAPSPACE_mfn.
This patch fixes HVM domain save/restore.
Tools stack is aware of where memory is populated in guest domain.
But XENMEM_add_to_physmap with XENMAPSPACE_mfn doesn't update
the information related to guest memory map. So guest domain
save/dump-core fails to dump pages which were added by the hypercall.
This patch makes the hypercall update the memory map information
of a given guest domain.
This introduces the race between writers and readers of
the info. By introducing sequence counter in shared_info page
the guest memory map information is protected by sequence lock.
The reader is tools stack. They will be updated the following patch.
Signed-off-by: Isaku Yamahata <yamahata@xxxxxxxxxxxxx>
diff -r df56aa8b2e34 xen/arch/ia64/xen/mm.c
--- a/xen/arch/ia64/xen/mm.c Mon Sep 29 21:12:54 2008 +0900
+++ b/xen/arch/ia64/xen/mm.c Mon Sep 29 21:18:21 2008 +0900
@@ -186,6 +186,9 @@
static void domain_page_flush_and_put(struct domain* d, unsigned long mpaddr,
volatile pte_t* ptep, pte_t old_pte,
struct page_info* page);
+
+static void __xencomm_mark_dirty(struct domain *d,
+ unsigned long addr, unsigned int len);
extern unsigned long ia64_iobase;
@@ -2141,6 +2144,323 @@
rcu_unlock_domain(dest_dom);
return ret;
}
+
+/* this lock can be only for memmap_info. domain_lock() is abused here */
+static void
+memmap_lock(struct domain *d)
+{
+ domain_lock(d);
+}
+
+static void
+memmap_unlock(struct domain *d)
+{
+ domain_unlock(d);
+}
+
+static void
+memmap_seqlock(struct domain *d)
+{
+ memmap_lock(d);
+ ++d->shared_info->arch.memmap_sequence;
+ smp_wmb();
+}
+
+static void
+memmap_sequnlock(struct domain *d)
+{
+ smp_wmb();
+ d->shared_info->arch.memmap_sequence++;
+ memmap_unlock(d);
+}
+
+/* copy memory range to domain pseudo physical address space */
+static int
+memmap_copy_to(struct domain *d, unsigned long dest_gpfn,
+ void *src, unsigned long num_pages)
+{
+ BUG_ON(((unsigned long)src & ~PAGE_MASK) != 0);
+
+ while (num_pages > 0) {
+ unsigned long mfn;
+ struct page_info *page;
+ void *virt;
+
+ mfn = gmfn_to_mfn_foreign(d, dest_gpfn);
+ if (mfn == 0 || mfn == INVALID_MFN)
+ return -EFAULT;
+ page = mfn_to_page(mfn);
+ if (get_page(page, d) == 0)
+ return -EFAULT;
+ virt = mfn_to_virt(mfn);
+ copy_page(virt, src);
+ __xencomm_mark_dirty(d, (unsigned long)virt, PAGE_SIZE);
+ put_page(page);
+
+ src += PAGE_SIZE;
+ dest_gpfn++;
+ num_pages--;
+ }
+
+ return 0;
+}
+
+/* copy memory range from domain pseudo physical address space */
+static int
+memmap_copy_from(void *dest, struct domain *d, unsigned long src_gpfn,
+ unsigned long num_pages)
+{
+ BUG_ON(((unsigned long)dest & ~PAGE_MASK) != 0);
+
+ while (num_pages > 0) {
+ unsigned long mfn;
+ struct page_info *page;
+
+ mfn = gmfn_to_mfn_foreign(d, src_gpfn);
+ if (mfn == 0 || mfn == INVALID_MFN)
+ return -EFAULT;
+ page = mfn_to_page(mfn);
+ if (get_page(page, d) == 0)
+ return -EFAULT;
+ copy_page(dest, mfn_to_virt(mfn));
+ put_page(page);
+
+ dest += PAGE_SIZE;
+ src_gpfn++;
+ num_pages--;
+ }
+
+ return 0;
+}
+
+static int
+memdesc_can_expand(const struct xen_ia64_memmap_info *memmap_info,
+ unsigned long num_pages)
+{
+ /* Is there room for one more md? */
+ if ((num_pages << PAGE_SHIFT) <
+ (sizeof(*memmap_info) + memmap_info->efi_memmap_size +
+ memmap_info->efi_memdesc_size))
+ return 0;
+
+ return 1;
+}
+
+static int
+memdesc_can_collapse(const efi_memory_desc_t *lhs,
+ const efi_memory_desc_t *rhs)
+{
+ return (lhs->type == rhs->type && lhs->attribute == rhs->attribute);
+}
+
+static int
+__dom0vp_add_memdesc_one(struct xen_ia64_memmap_info *memmap_info,
+ unsigned long num_pages,
+ const efi_memory_desc_t *md)
+{
+ void* const memmap_end = (void*)memmap_info->memdesc +
+ memmap_info->efi_memmap_size;
+ void *p;
+ efi_memory_desc_t *tmp_md;
+ efi_memory_desc_t *s_md;
+ efi_memory_desc_t *e_md;
+ u64 phys_addr;
+ u64 phys_addr_end;
+
+ /* fast path. appending to the last entry */
+ tmp_md = (efi_memory_desc_t*)(memmap_end - memmap_info->efi_memdesc_size);
+ if (MD_END(tmp_md) < md->phys_addr) {
+ /* append one */
+ if (!memdesc_can_expand(memmap_info, num_pages))
+ return -ENOMEM;
+
+ memcpy(memmap_end, md, memmap_info->efi_memdesc_size);
+ memmap_info->efi_memmap_size += memmap_info->efi_memdesc_size;
+ return 0;
+ }
+ /* fast path. expand the last entry */
+ if (tmp_md->phys_addr <= md->phys_addr) {
+ if (!memdesc_can_collapse(tmp_md, md))
+ return -EINVAL;
+
+ phys_addr_end = max(MD_END(tmp_md), MD_END(md));
+ tmp_md->num_pages =
+ (phys_addr_end - tmp_md->phys_addr) >> EFI_PAGE_SHIFT;
+ return 0;
+ }
+
+ /* slow path */
+ s_md = NULL;
+ e_md = NULL;
+ for (p = memmap_info->memdesc;
+ p < memmap_end;
+ p += memmap_info->efi_memdesc_size) {
+ tmp_md = p;
+
+ if (MD_END(tmp_md) < md->phys_addr)
+ continue;
+
+ if (MD_END(md) < tmp_md->phys_addr) {
+ if (s_md == NULL) {
+ void *next_md = p + memmap_info->efi_memdesc_size;
+ size_t left_size = memmap_end - (void*)tmp_md;
+
+ /* found hole. just insert md here*/
+ if (!memdesc_can_expand(memmap_info, num_pages))
+ return -ENOMEM;
+
+ memmove(next_md, tmp_md, left_size);
+ memcpy(tmp_md, md, memmap_info->efi_memdesc_size);
+ memmap_info->efi_memmap_size += memmap_info->efi_memdesc_size;
+ return 0;
+ }
+ break;
+ }
+
+ if (s_md == NULL)
+ s_md = tmp_md;
+ e_md = tmp_md;
+
+ if (!memdesc_can_collapse(tmp_md, md))
+ return -EINVAL;
+ }
+ BUG_ON(s_md == NULL || e_md == NULL);
+
+ /* collapse into one */
+ phys_addr = min(md->phys_addr, s_md->phys_addr);
+ phys_addr_end = max(MD_END(md), MD_END(e_md));
+ s_md->phys_addr = phys_addr;
+ s_md->num_pages = (phys_addr_end - phys_addr) >> EFI_PAGE_SHIFT;
+ if (s_md != e_md) {
+ void *next_s_md = (void*)s_md + memmap_info->efi_memdesc_size;
+ void *next_e_md = (void*)e_md + memmap_info->efi_memdesc_size;
+ size_t left_size = memmap_end - (void*)next_e_md;
+
+ memmap_info->efi_memmap_size -= (void*)e_md - (void*)s_md;
+ if (left_size > 0)
+ memmove(next_s_md, next_e_md, left_size);
+ }
+
+ return 0;
+}
+
+/*
+ * d->arch.convmem_end is mostly read only and sometimes increased.
+ * It is protected by memmap_lock()
+ *
+ * d->arch.convmem_end is also referned by guest(self p2m exposure)
+ * d->shared_info.arch.memmap_info_xxx and memmap_info are
+ * referenced by tools stack(save/dump-core/foreign p2m exposure).
+ * They are protected d->shared_info.arch.memmap_sequence of
+ * sequence lock variable by read side(tools stack and guest kernel).
+ *
+ * reader side:
+ * - get d->arch.convmem_end (via XENMEM_maximum_gpfn)
+ * - get sequence counter
+ * - copy the memmap into local buffer
+ * - check sequence counter. try again if necessary
+ * - get d->arch.convmem_end. try again if changed.
+ *
+ * writer side:
+ * - increase d->arch.convmem_end at first if necessary
+ * - lock sequence lock
+ * - lock memmap_lock spinlock.
+ * - increment sequence counter
+ * - update memmap_info
+ * - release sequence lock
+ */
+static int
+__dom0vp_add_memdesc(struct domain *targ_d,
+ const struct xen_ia64_memmap_info *u_memmap_info,
+ const char *u_memmap)
+{
+ int ret = 0;
+ const void* const u_memmap_end = u_memmap + u_memmap_info->efi_memmap_size;
+ const efi_memory_desc_t *md;
+
+ unsigned long md_end_max;
+ unsigned long num_pages;
+ unsigned long order;
+ unsigned long memmap_info_pfn;
+
+ struct page_info *page;
+ struct xen_ia64_memmap_info *memmap_info;
+ size_t unused_size;
+
+ const void *p;
+
+ /* update d->arch.convmem_end */
+ md_end_max = 0;
+ for (p = u_memmap; p < u_memmap_end;
+ p += u_memmap_info->efi_memdesc_size) {
+ md = p;
+ if (MD_END(md) > md_end_max)
+ md_end_max = MD_END(md);
+ }
+ memmap_lock(targ_d);
+ /* convmem_end is also protected memdesc lock */
+ if (md_end_max > targ_d->arch.convmem_end)
+ targ_d->arch.convmem_end = md_end_max;
+ num_pages = targ_d->shared_info->arch.memmap_info_num_pages;
+ memmap_unlock(targ_d);
+
+ again:
+ order = get_order(num_pages << PAGE_SHIFT);
+ page = alloc_domheap_pages(NULL, order, 0);
+ if (page == NULL)
+ return -ENOMEM;
+ memmap_info = page_to_virt(page);
+
+ /* write seqlock targ_d->shared_info.memmap_sequence */
+ memmap_seqlock(targ_d);
+ if (targ_d->shared_info->arch.memmap_info_num_pages != num_pages) {
+ num_pages = targ_d->shared_info->arch.memmap_info_num_pages;
+ memmap_sequnlock(targ_d);
+ free_domheap_pages(page, order);
+ goto again;
+ }
+ memmap_info_pfn = targ_d->shared_info->arch.memmap_info_pfn;
+
+ /* copy into local to make them virtually contiguous */
+ ret = memmap_copy_from(memmap_info,
+ targ_d, memmap_info_pfn, num_pages);
+ if (ret != 0)
+ goto out;
+
+ if (memmap_info->efi_memdesc_size != u_memmap_info->efi_memdesc_size ||
+ memmap_info->efi_memdesc_version !=
+ u_memmap_info->efi_memdesc_version) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* update memdesc */
+ for (p = u_memmap;
+ p < u_memmap_end;
+ p += u_memmap_info->efi_memdesc_size) {
+ md = p;
+ ret = __dom0vp_add_memdesc_one(memmap_info, num_pages, md);
+ if (ret != 0)
+ goto out;
+ }
+
+ /* zero out the unused region to avoid hypervisor bit leak */
+ unused_size = (num_pages << PAGE_SHIFT) -
+ (sizeof(*memmap_info) + memmap_info->efi_memmap_size);
+ if (unused_size > 0)
+ memset((void*)memmap_info->memdesc + memmap_info->efi_memmap_size,
+ 0, unused_size);
+
+ /* copy back into domain. */
+ ret = memmap_copy_to(targ_d, memmap_info_pfn, memmap_info, num_pages);
+
+ out:
+ /* write sequnlock targ_d->shared_info.arch.memmap_sequence */
+ memmap_sequnlock(targ_d);
+
+ free_domheap_pages(page, order);
+ return ret;
+}
#endif
// grant table host mapping
@@ -2863,8 +3183,35 @@
case XENMAPSPACE_mfn:
{
if ( get_page_from_pagenr(xatp.idx, d) ) {
+ struct xen_ia64_memmap_info memmap_info;
+ efi_memory_desc_t md;
+ int ret;
+
mfn = xatp.idx;
page = mfn_to_page(mfn);
+
+ memmap_info.efi_memmap_size = sizeof(md);
+ memmap_info.efi_memdesc_size = sizeof(md);
+ memmap_info.efi_memdesc_version =
+ EFI_MEMORY_DESCRIPTOR_VERSION;
+
+ md.type = EFI_CONVENTIONAL_MEMORY;
+ md.pad = 0;
+ md.phys_addr = xatp.gpfn << PAGE_SHIFT;
+ md.virt_addr = 0;
+ md.num_pages = 1UL << (PAGE_SHIFT - EFI_PAGE_SHIFT);
+ md.attribute = EFI_MEMORY_WB;
+
+ ret = __dom0vp_add_memdesc(d, &memmap_info, (char*)&md);
+ if (ret != 0) {
+ put_page(page);
+ rcu_unlock_domain(d);
+ gdprintk(XENLOG_DEBUG,
+ "%s:%d td %d gpfn 0x%lx mfn 0x%lx ret %d\n",
+ __func__, __LINE__,
+ d->domain_id, xatp.gpfn, xatp.idx, ret);
+ return ret;
+ }
}
break;
}
@@ -2999,9 +3346,9 @@
return (!mfn_valid(mfn) || (page_get_owner(mfn_to_page(mfn)) == dom_io));
}
-void xencomm_mark_dirty(unsigned long addr, unsigned int len)
+static void __xencomm_mark_dirty(struct domain *d,
+ unsigned long addr, unsigned int len)
{
- struct domain *d = current->domain;
unsigned long gpfn;
unsigned long end_addr = addr + len;
@@ -3011,6 +3358,11 @@
shadow_mark_page_dirty(d, gpfn);
}
}
+}
+
+void xencomm_mark_dirty(unsigned long addr, unsigned int len)
+{
+ __xencomm_mark_dirty(current->domain, addr, len);
}
int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn)
diff -r df56aa8b2e34 xen/include/asm-ia64/mm.h
--- a/xen/include/asm-ia64/mm.h Mon Sep 29 21:12:54 2008 +0900
+++ b/xen/include/asm-ia64/mm.h Mon Sep 29 21:18:21 2008 +0900
@@ -455,6 +455,7 @@
#define foreign_p2m_destroy(d) do { } while (0)
#define dom0vp_expose_foreign_p2m(dest_dom, dest_gpfn, domid, buffer, flags)
(-ENOSYS)
#define dom0vp_unexpose_foreign_p2m(dest_dom, dest_gpfn, domid)
(-ENOSYS)
+#define __dom0vp_add_memdesc(d, memmap_info, memdesc) (-ENOSYS)
#endif
extern volatile unsigned long *mpt_table;
diff -r df56aa8b2e34 xen/include/public/arch-ia64.h
--- a/xen/include/public/arch-ia64.h Mon Sep 29 21:12:54 2008 +0900
+++ b/xen/include/public/arch-ia64.h Mon Sep 29 21:18:21 2008 +0900
@@ -250,8 +250,9 @@
unsigned int memmap_info_num_pages;/* currently only = 1 case is
supported. */
unsigned long memmap_info_pfn;
+ unsigned long memmap_sequence; /* seqlock to update memmap_info */
- uint64_t pad[31];
+ uint64_t pad[30];
};
typedef struct arch_shared_info arch_shared_info_t;
memmap-info-update.patch
Description: Text Data
_______________________________________________
Xen-ia64-devel mailing list
Xen-ia64-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-ia64-devel
|