# HG changeset patch # User yamahata@xxxxxxxxxxxxx # Date 1188869024 -32400 # Node ID ad2d25b057599b3b5c6646d1f222609d7af203a6 # Parent 420fb0f102c22ccb0d0a09ce3ef9c65e17ca3fe2 rewrite ia64 domain save/restore with foreign p2m exposure PATCHNAME: rewrite_ia64_domain_save_restore_foreign_p2m Signed-off-by: Isaku Yamahata diff -r 420fb0f102c2 -r ad2d25b05759 tools/libxc/ia64/xc_ia64_linux_restore.c --- a/tools/libxc/ia64/xc_ia64_linux_restore.c Tue Aug 28 13:10:18 2007 +0900 +++ b/tools/libxc/ia64/xc_ia64_linux_restore.c Tue Sep 04 10:23:44 2007 +0900 @@ -5,12 +5,17 @@ * * Copyright (c) 2003, K A Fraser. * Rewritten for ia64 by Tristan Gingold + * + * Copyright (c) 2007 Isaku Yamahata + * Use foreign p2m exposure. */ #include #include #include "xg_private.h" +#include "xc_ia64_save_restore.h" +#include "xc_ia64.h" #define PFN_TO_KB(_pfn) ((_pfn) << (PAGE_SHIFT - 10)) @@ -40,6 +45,16 @@ read_exact(int fd, void *buf, size_t cou } static int +populate_page_if_necessary(int xc_handle, uint32_t dom, unsigned long gmfn, + struct xen_ia64_p2m_table *p2m_table) +{ + if (xc_ia64_p2m_present(p2m_table, gmfn)) + return 0; + + return xc_domain_memory_populate_physmap(xc_handle, dom, 1, 0, 0, &gmfn); +} + +static int read_page(int xc_handle, int io_fd, uint32_t dom, unsigned long pfn) { void *mem; @@ -66,7 +81,8 @@ xc_domain_restore(int xc_handle, int io_ unsigned int hvm, unsigned int pae) { DECLARE_DOMCTL; - int rc = 1, i; + int rc = 1; + unsigned int i; unsigned long gmfn; unsigned long ver; @@ -78,10 +94,11 @@ xc_domain_restore(int xc_handle, int io_ /* A copy of the CPU context of the guest. */ vcpu_guest_context_t ctxt; - unsigned long *page_array = NULL; - /* A temporary mapping of the guest's start_info page. */ start_info_t *start_info; + + struct xen_ia64_p2m_table p2m_table; + xc_ia64_p2m_init(&p2m_table); if (hvm) { ERROR("HVM Restore is unsupported"); @@ -102,7 +119,7 @@ xc_domain_restore(int xc_handle, int io_ ERROR("Error when reading version"); goto out; } - if (ver != 1) { + if (ver != XC_IA64_SR_FORMAT_VER_ONE && ver != XC_IA64_SR_FORMAT_VER_TWO) { ERROR("version of save doesn't match"); goto out; } @@ -112,25 +129,6 @@ xc_domain_restore(int xc_handle, int io_ ERROR("Unable to lock_pages ctxt"); return 1; } - - /* Get pages. */ - page_array = malloc(p2m_size * sizeof(unsigned long)); - if (page_array == NULL) { - ERROR("Could not allocate memory"); - goto out; - } - - for ( i = 0; i < p2m_size; i++ ) - page_array[i] = i; - - if ( xc_domain_memory_populate_physmap(xc_handle, dom, p2m_size, - 0, 0, page_array) ) - { - ERROR("Failed to allocate memory for %ld KB to dom %d.\n", - PFN_TO_KB(p2m_size), dom); - goto out; - } - DPRINTF("Allocated memory by %ld KB\n", PFN_TO_KB(p2m_size)); if (!read_exact(io_fd, &domctl.u.arch_setup, sizeof(domctl.u.arch_setup))) { ERROR("read: domain setup"); @@ -155,6 +153,34 @@ xc_domain_restore(int xc_handle, int io_ } shared_info_frame = domctl.u.getdomaininfo.shared_info_frame; + if (ver == XC_IA64_SR_FORMAT_VER_TWO) { + unsigned int memmap_info_num_pages; + unsigned long memmap_size; + xen_ia64_memmap_info_t *memmap_info; + + if (!read_exact(io_fd, &memmap_info_num_pages, + sizeof(memmap_info_num_pages))) { + ERROR("read: memmap_info_num_pages"); + goto out; + } + memmap_size = memmap_info_num_pages * PAGE_SIZE; + memmap_info = malloc(memmap_size); + if (memmap_info == NULL) { + ERROR("Could not allocate memory for memmap_info"); + goto out; + } + if (!read_exact(io_fd, memmap_info, memmap_size)) { + ERROR("read: memmap_info"); + goto out; + } + if (xc_ia64_p2m_map(&p2m_table, xc_handle, + dom, memmap_info, IA64_DOM0VP_EFP_ALLOC_PTE)) { + ERROR("p2m mapping"); + goto out; + } + free(memmap_info); + } + DPRINTF("Reloading memory pages: 0%%\n"); while (1) { @@ -165,17 +191,26 @@ xc_domain_restore(int xc_handle, int io_ if (gmfn == INVALID_MFN) break; + if (populate_page_if_necessary(xc_handle, dom, gmfn, &p2m_table) < 0) { + ERROR("can not populate page 0x%lx", gmfn); + goto out; + } if (read_page(xc_handle, io_fd, dom, gmfn) < 0) goto out; } DPRINTF("Received all pages\n"); - /* Get the list of PFNs that are not in the psuedo-phys map */ + /* + * Get the list of PFNs that are not in the psuedo-phys map. + * Although we allocate pages on demand, balloon driver may + * decreased simaltenously. So we have to free the freed + * pages here. + */ { unsigned int count; unsigned long *pfntab; - int rc; + unsigned int nr_frees; if (!read_exact(io_fd, &count, sizeof(count))) { ERROR("Error when reading pfn count"); @@ -190,35 +225,31 @@ xc_domain_restore(int xc_handle, int io_ if (!read_exact(io_fd, pfntab, sizeof(unsigned long)*count)) { ERROR("Error when reading pfntab"); - goto out; - } - - DPRINTF ("Try to free %u pages\n", count); - + free(pfntab); + goto out; + } + + + nr_frees = 0; for (i = 0; i < count; i++) { - - volatile unsigned long pfn; - - struct xen_memory_reservation reservation = { - .nr_extents = 1, - .extent_order = 0, - .domid = dom - }; - set_xen_guest_handle(reservation.extent_start, - (unsigned long *)&pfn); - - pfn = pfntab[i]; - rc = xc_memory_op(xc_handle, XENMEM_decrease_reservation, - &reservation); - if (rc != 1) { + if (xc_ia64_p2m_allocated(&p2m_table, pfntab[i])) { + pfntab[nr_frees] = pfntab[i]; + nr_frees++; + } + } + if (nr_frees > 0) { + if (xc_domain_memory_decrease_reservation(xc_handle, dom, nr_frees, + 0, pfntab) < 0) { ERROR("Could not decrease reservation : %d", rc); + free(pfntab); goto out; } - } - - DPRINTF("Decreased reservation by %d pages\n", count); - } - + else + DPRINTF("Decreased reservation by %d / %d pages\n", + nr_frees, count); + } + free(pfntab); + } if (!read_exact(io_fd, &ctxt, sizeof(ctxt))) { ERROR("Error when reading ctxt"); @@ -274,6 +305,10 @@ xc_domain_restore(int xc_handle, int io_ munmap (shared_info, PAGE_SIZE); /* Uncanonicalise the suspend-record frame number and poke resume rec. */ + if (populate_page_if_necessary(xc_handle, dom, gmfn, &p2m_table)) { + ERROR("cannot populate page 0x%lx", gmfn); + goto out; + } start_info = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, gmfn); if (start_info == NULL) { @@ -309,8 +344,7 @@ xc_domain_restore(int xc_handle, int io_ if ((rc != 0) && (dom != 0)) xc_domain_destroy(xc_handle, dom); - if (page_array != NULL) - free(page_array); + xc_ia64_p2m_unmap(&p2m_table); unlock_pages(&ctxt, sizeof(ctxt)); diff -r 420fb0f102c2 -r ad2d25b05759 tools/libxc/ia64/xc_ia64_linux_save.c --- a/tools/libxc/ia64/xc_ia64_linux_save.c Tue Aug 28 13:10:18 2007 +0900 +++ b/tools/libxc/ia64/xc_ia64_linux_save.c Tue Sep 04 10:23:44 2007 +0900 @@ -5,6 +5,9 @@ * * Copyright (c) 2003, K A Fraser. * Rewritten for ia64 by Tristan Gingold + * + * Copyright (c) 2007 Isaku Yamahata + * Use foreign p2m exposure. */ #include @@ -14,6 +17,9 @@ #include #include "xg_private.h" +#include "xc_ia64.h" +#include "xc_ia64_save_restore.h" +#include "xc_efi.h" /* ** Default values for important tuning parameters. Can override by passing @@ -151,8 +157,6 @@ xc_domain_save(int xc_handle, int io_fd, /* A copy of the CPU context of the guest. */ vcpu_guest_context_t ctxt; - unsigned long *page_array = NULL; - /* Live mapping of shared info structure */ shared_info_t *live_shinfo = NULL; @@ -180,6 +184,17 @@ xc_domain_save(int xc_handle, int io_fd, unsigned long *to_skip = NULL; char *mem; + + unsigned int memmap_info_num_pages; + unsigned long memmap_size = 0; + xen_ia64_memmap_info_t *memmap_info_live = NULL; + xen_ia64_memmap_info_t *memmap_info = NULL; + void *memmap_desc_start; + void *memmap_desc_end; + void *p; + efi_memory_desc_t *md; + struct xen_ia64_p2m_table p2m_table; + xc_ia64_p2m_init(&p2m_table); if (debug) fprintf(stderr, "xc_linux_save (ia64): started dom=%d\n", dom); @@ -217,12 +232,6 @@ xc_domain_save(int xc_handle, int io_fd, } p2m_size = xc_memory_op(xc_handle, XENMEM_maximum_gpfn, &dom); - - page_array = malloc(p2m_size * sizeof(unsigned long)); - if (page_array == NULL) { - ERROR("Could not allocate memory"); - goto out; - } /* This is expected by xm restore. */ if (!write_exact(io_fd, &p2m_size, sizeof(unsigned long))) { @@ -236,7 +245,7 @@ xc_domain_save(int xc_handle, int io_fd, The version is hard-coded, don't forget to change the restore code too! */ { - unsigned long version = 1; + unsigned long version = XC_IA64_SR_FORMAT_VER_CURRENT; if (!write_exact(io_fd, &version, sizeof(unsigned long))) { ERROR("write: version"); @@ -304,6 +313,38 @@ xc_domain_save(int xc_handle, int io_fd, } + memmap_info_num_pages = live_shinfo->arch.memmap_info_num_pages; + memmap_size = PAGE_SIZE * memmap_info_num_pages; + memmap_info_live = xc_map_foreign_range(xc_handle, info.domid, + memmap_size, PROT_READ, + live_shinfo->arch.memmap_info_pfn); + if (memmap_info_live == NULL) { + PERROR("Could not map memmap info."); + goto out; + } + memmap_info = malloc(memmap_size); + if (memmap_info == NULL) { + PERROR("Could not allocate memmap info memory"); + goto out; + } + memcpy(memmap_info, memmap_info_live, memmap_size); + munmap(memmap_info_live, memmap_size); + memmap_info_live = NULL; + + if (xc_ia64_p2m_map(&p2m_table, xc_handle, dom, memmap_info, 0) < 0) { + PERROR("xc_ia64_p2m_map"); + goto out; + } + if (!write_exact(io_fd, + &memmap_info_num_pages, sizeof(memmap_info_num_pages))) { + PERROR("write: arch.memmap_info_num_pages"); + goto out; + } + if (!write_exact(io_fd, memmap_info, memmap_size)) { + PERROR("write: memmap_info"); + goto out; + } + sent_last_iter = p2m_size; total_sent = 0; @@ -313,13 +354,6 @@ xc_domain_save(int xc_handle, int io_fd, sent_this_iter = 0; skip_this_iter = 0; - - /* Get the pfn list, as it may change. */ - if (xc_ia64_get_pfn_list(xc_handle, dom, page_array, - 0, p2m_size) != p2m_size) { - ERROR("Could not get the page frame list"); - goto out; - } /* Dirtied pages won't be saved. slightly wasteful to peek the whole array evey time, @@ -334,45 +368,64 @@ xc_domain_save(int xc_handle, int io_fd, } /* Start writing out the saved-domain record. */ - for (N = 0; N < p2m_size; N++) { - if (page_array[N] == INVALID_MFN) + memmap_desc_start = &memmap_info->memdesc; + memmap_desc_end = memmap_desc_start + memmap_info->efi_memmap_size; + for (p = memmap_desc_start; + p < memmap_desc_end; + p += memmap_info->efi_memdesc_size) { + md = p; + if (md->type != EFI_CONVENTIONAL_MEMORY || + md->attribute != EFI_MEMORY_WB || + md->num_pages == 0) continue; - if (!last_iter) { - if (test_bit(N, to_skip) && test_bit(N, to_send)) - skip_this_iter++; - if (test_bit(N, to_skip) || !test_bit(N, to_send)) + + for (N = md->phys_addr >> PAGE_SHIFT; + N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> + PAGE_SHIFT; + N++) { + + if (!xc_ia64_p2m_allocated(&p2m_table, N)) continue; - } - - if (debug) - fprintf(stderr, "xc_linux_save: page %lx (%lu/%lu)\n", - page_array[N], N, p2m_size); - - mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, - PROT_READ|PROT_WRITE, N); - if (mem == NULL) { - /* The page may have move. - It will be remarked dirty. - FIXME: to be tracked. */ - fprintf(stderr, "cannot map mfn page %lx gpfn %lx: %s\n", - page_array[N], N, safe_strerror(errno)); - continue; - } - - if (!write_exact(io_fd, &N, sizeof(N))) { - ERROR("write: p2m_size"); + + if (!last_iter) { + if (test_bit(N, to_skip) && test_bit(N, to_send)) + skip_this_iter++; + if (test_bit(N, to_skip) || !test_bit(N, to_send)) + continue; + } + + if (debug) + fprintf(stderr, "xc_linux_save: page %lx (%lu/%lu)\n", + xc_ia64_p2m_mfn(&p2m_table, N), + N, p2m_size); + + mem = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, + PROT_READ|PROT_WRITE, N); + if (mem == NULL) { + /* The page may have move. + It will be remarked dirty. + FIXME: to be tracked. */ + fprintf(stderr, "cannot map mfn page %lx gpfn %lx: %s\n", + xc_ia64_p2m_mfn(&p2m_table, N), + N, safe_strerror(errno)); + continue; + } + + if (!write_exact(io_fd, &N, sizeof(N))) { + ERROR("write: p2m_size"); + munmap(mem, PAGE_SIZE); + goto out; + } + + if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) { + ERROR("Error when writing to state file (5)"); + munmap(mem, PAGE_SIZE); + goto out; + } munmap(mem, PAGE_SIZE); - goto out; - } - - if (write(io_fd, mem, PAGE_SIZE) != PAGE_SIZE) { - ERROR("Error when writing to state file (5)"); - munmap(mem, PAGE_SIZE); - goto out; - } - munmap(mem, PAGE_SIZE); - sent_this_iter++; - total_sent++; + sent_this_iter++; + total_sent++; + } } if (last_iter) @@ -420,36 +473,69 @@ xc_domain_save(int xc_handle, int io_fd, } } - /* Send through a list of all the PFNs that were not in map at the close */ + /* + * Send through a list of all the PFNs that were not in map at the close. + * We send pages which was allocated. However balloon driver may + * decreased after sending page. So we have to check the freed + * page after pausing the domain. + */ { - unsigned int i,j; + unsigned long N; unsigned long pfntab[1024]; - - for (i = 0, j = 0; i < p2m_size; i++) { - if (page_array[i] == INVALID_MFN) - j++; - } - + unsigned int j; + + j = 0; + for (p = memmap_desc_start; + p < memmap_desc_end; + p += memmap_info->efi_memdesc_size) { + md = p; + if (md->type != EFI_CONVENTIONAL_MEMORY || + md->attribute != EFI_MEMORY_WB || + md->num_pages == 0) + continue; + for (N = md->phys_addr >> PAGE_SHIFT; + N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> + PAGE_SHIFT; + N++) { + if (!xc_ia64_p2m_allocated(&p2m_table, N)) + j++; + } + } if (!write_exact(io_fd, &j, sizeof(unsigned int))) { ERROR("Error when writing to state file (6a)"); goto out; } - - for (i = 0, j = 0; i < p2m_size; ) { - - if (page_array[i] == INVALID_MFN) - pfntab[j++] = i; - - i++; - if (j == 1024 || i == p2m_size) { - if (!write_exact(io_fd, &pfntab, sizeof(unsigned long)*j)) { - ERROR("Error when writing to state file (6b)"); - goto out; + + j = 0; + for (p = memmap_desc_start; + p < memmap_desc_end; + p += memmap_info->efi_memdesc_size) { + md = p; + if (md->type != EFI_CONVENTIONAL_MEMORY || + md->attribute != EFI_MEMORY_WB || + md->num_pages == 0) + continue; + for (N = md->phys_addr >> PAGE_SHIFT; + N < (md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT)) >> + PAGE_SHIFT; + N++) { + if (!xc_ia64_p2m_allocated(&p2m_table, N)) + pfntab[j++] = N; + if (j == sizeof(pfntab)/sizeof(pfntab[0])) { + if (!write_exact(io_fd, &pfntab, sizeof(pfntab[0]) * j)) { + ERROR("Error when writing to state file (6b)"); + goto out; + } + j = 0; } - j = 0; - } - } - + } + } + if (j > 0) { + if (!write_exact(io_fd, &pfntab, sizeof(pfntab[0]) * j)) { + ERROR("Error when writing to state file (6b)"); + goto out; + } + } } if (xc_vcpu_getcontext(xc_handle, dom, 0, &ctxt)) { @@ -494,13 +580,17 @@ xc_domain_save(int xc_handle, int io_fd, } } - free(page_array); unlock_pages(to_send, bitmap_size); free(to_send); unlock_pages(to_skip, bitmap_size); free(to_skip); if (live_shinfo) munmap(live_shinfo, PAGE_SIZE); + if (memmap_info_live) + munmap(memmap_info_live, memmap_size); + if (memmap_info) + free(memmap_info); + xc_ia64_p2m_unmap(&p2m_table); fprintf(stderr,"Save exit rc=%d\n",rc); diff -r 420fb0f102c2 -r ad2d25b05759 tools/libxc/ia64/xc_ia64_save_restore.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/libxc/ia64/xc_ia64_save_restore.h Tue Sep 04 10:23:44 2007 +0900 @@ -0,0 +1,44 @@ +/****************************************************************************** + * xc_ia64_save_restore.h + * + * Copyright (c) 2006 Isaku Yamahata + * VA Linux Systems Japan K.K. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef XC_IA64_SAVE_RESTORE_H +#define XC_IA64_SR_H + + /* introduced changeset 10692:306d7857928c of xen-ia64-unstable.ht */ +#define XC_IA64_SR_FORMAT_VER_ONE 1UL + /* using foreign p2m exposure version */ +#define XC_IA64_SR_FORMAT_VER_TWO 2UL +#define XC_IA64_SR_FORMAT_VER_MAX 2UL + +#define XC_IA64_SR_FORMAT_VER_CURRENT XC_IA64_SR_FORMAT_VER_TWO + +#endif /* XC_IA64_SAVE_RESTORE_H */ + +/* + * Local variables: + * mode: C + * c-set-style: "BSD" + * c-basic-offset: 4 + * tab-width: 4 + * indent-tabs-mode: nil + * End: + */