Xen project Mailing List

Re: [Xen-devel] [PATCH v3 15/19] libxc: allocate memory with vNUMA information for HVM guest

From: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>

Date: Tue, 13 Jan 2015 13:15:21 -0500

Cc: Ian Campbell <ian.campbell@xxxxxxxxxx>, dario.faggioli@xxxxxxxxxx, Ian Jackson <ian.jackson@xxxxxxxxxxxxx>, xen-devel@xxxxxxxxxxxxx, jbeulich@xxxxxxxx, Elena Ufimtseva <ufimtseva@xxxxxxxxx>

Delivery-date: Tue, 13 Jan 2015 18:15:55 +0000

List-id: Xen developer discussion <xen-devel.lists.xen.org>

On Tue, Jan 13, 2015 at 12:11:43PM +0000, Wei Liu wrote: > The algorithm is more or less the same as the one used for PV guest. > Libxc gets hold of the mapping of vnode to pnode and size of each vnode > then allocate memory accordingly. Could you split this patch in two? One part for the adding of the code and the other for moving the existing code around? > > And then the function returns low memory end, high memory end and mmio > start to caller. Libxl needs those values to construct vmemranges for > that guest. > > Signed-off-by: Wei Liu <wei.liu2@xxxxxxxxxx> > Cc: Ian Campbell <ian.campbell@xxxxxxxxxx> > Cc: Ian Jackson <ian.jackson@xxxxxxxxxxxxx> > Cc: Dario Faggioli <dario.faggioli@xxxxxxxxxx> > Cc: Elena Ufimtseva <ufimtseva@xxxxxxxxx> > --- > Changes in v3: > 1. Rewrite commit log. > 2. Add a few code comments. > --- > tools/libxc/include/xenguest.h | 7 ++ > tools/libxc/xc_hvm_build_x86.c | 224 > ++++++++++++++++++++++++++-------------- > 2 files changed, 151 insertions(+), 80 deletions(-) > > diff --git a/tools/libxc/include/xenguest.h b/tools/libxc/include/xenguest.h > index 40bbac8..d1cbb4e 100644 > --- a/tools/libxc/include/xenguest.h > +++ b/tools/libxc/include/xenguest.h > @@ -230,6 +230,13 @@ struct xc_hvm_build_args { > struct xc_hvm_firmware_module smbios_module; > /* Whether to use claim hypercall (1 - enable, 0 - disable). */ > int claim_enabled; > + unsigned int nr_vnodes; /* Number of vnodes */ > + unsigned int *vnode_to_pnode; /* Vnode to pnode mapping */ > + uint64_t *vnode_size; /* Size of vnodes */ > + /* Out parameters */ > + uint64_t lowmem_end; > + uint64_t highmem_end; > + uint64_t mmio_start; > }; > > /** > diff --git a/tools/libxc/xc_hvm_build_x86.c b/tools/libxc/xc_hvm_build_x86.c > index c81a25b..54d3dc8 100644 > --- a/tools/libxc/xc_hvm_build_x86.c > +++ b/tools/libxc/xc_hvm_build_x86.c > @@ -89,7 +89,8 @@ static int modules_init(struct xc_hvm_build_args *args, > } > > static void build_hvm_info(void *hvm_info_page, uint64_t mem_size, > - uint64_t mmio_start, uint64_t mmio_size) > + uint64_t mmio_start, uint64_t mmio_size, > + struct xc_hvm_build_args *args) > { > struct hvm_info_table *hvm_info = (struct hvm_info_table *) > (((unsigned char *)hvm_info_page) + HVM_INFO_OFFSET); > @@ -119,6 +120,10 @@ static void build_hvm_info(void *hvm_info_page, uint64_t > mem_size, > hvm_info->high_mem_pgend = highmem_end >> PAGE_SHIFT; > hvm_info->reserved_mem_pgstart = ioreq_server_pfn(0); > > + args->lowmem_end = lowmem_end; > + args->highmem_end = highmem_end; > + args->mmio_start = mmio_start; > + > /* Finish with the checksum. */ > for ( i = 0, sum = 0; i < hvm_info->length; i++ ) > sum += ((uint8_t *)hvm_info)[i]; > @@ -244,7 +249,7 @@ static int setup_guest(xc_interface *xch, > char *image, unsigned long image_size) > { > xen_pfn_t *page_array = NULL; > - unsigned long i, nr_pages = args->mem_size >> PAGE_SHIFT; > + unsigned long i, j, nr_pages = args->mem_size >> PAGE_SHIFT; > unsigned long target_pages = args->mem_target >> PAGE_SHIFT; > uint64_t mmio_start = (1ull << 32) - args->mmio_size; > uint64_t mmio_size = args->mmio_size; > @@ -258,13 +263,13 @@ static int setup_guest(xc_interface *xch, > xen_capabilities_info_t caps; > unsigned long stat_normal_pages = 0, stat_2mb_pages = 0, > stat_1gb_pages = 0; > - int pod_mode = 0; > + unsigned int memflags = 0; > int claim_enabled = args->claim_enabled; > xen_pfn_t special_array[NR_SPECIAL_PAGES]; > xen_pfn_t ioreq_server_array[NR_IOREQ_SERVER_PAGES]; > - > - if ( nr_pages > target_pages ) > - pod_mode = XENMEMF_populate_on_demand; > + uint64_t dummy_vnode_size; > + unsigned int dummy_vnode_to_pnode; > + uint64_t total; > > memset(&elf, 0, sizeof(elf)); > if ( elf_init(&elf, image, image_size) != 0 ) > @@ -276,6 +281,37 @@ static int setup_guest(xc_interface *xch, > v_start = 0; > v_end = args->mem_size; > > + if ( nr_pages > target_pages ) > + memflags |= XENMEMF_populate_on_demand; > + > + if ( args->nr_vnodes == 0 ) > + { > + /* Build dummy vnode information */ > + args->nr_vnodes = 1; > + dummy_vnode_to_pnode = XC_VNUMA_NO_NODE; > + dummy_vnode_size = args->mem_size >> 20; > + args->vnode_size = &dummy_vnode_size; > + args->vnode_to_pnode = &dummy_vnode_to_pnode; > + } > + else > + { > + if ( nr_pages > target_pages ) > + { > + PERROR("Cannot enable vNUMA and PoD at the same time"); > + goto error_out; > + } > + } > + > + total = 0; > + for ( i = 0; i < args->nr_vnodes; i++ ) > + total += (args->vnode_size[i] << 20); > + if ( total != args->mem_size ) > + { > + PERROR("Memory size requested by vNUMA (0x%"PRIx64") mismatches > memory size configured for domain (0x%"PRIx64")", > + total, args->mem_size); > + goto error_out; > + } > + > if ( xc_version(xch, XENVER_capabilities, &caps) != 0 ) > { > PERROR("Could not get Xen capabilities"); > @@ -320,7 +356,7 @@ static int setup_guest(xc_interface *xch, > } > } > > - if ( pod_mode ) > + if ( memflags & XENMEMF_populate_on_demand ) > { > /* > * Subtract VGA_HOLE_SIZE from target_pages for the VGA > @@ -349,103 +385,128 @@ static int setup_guest(xc_interface *xch, > * ensure that we can be preempted and hence dom0 remains responsive. > */ > rc = xc_domain_populate_physmap_exact( > - xch, dom, 0xa0, 0, pod_mode, &page_array[0x00]); > + xch, dom, 0xa0, 0, memflags, &page_array[0x00]); > cur_pages = 0xc0; > stat_normal_pages = 0xc0; > > - while ( (rc == 0) && (nr_pages > cur_pages) ) > + for ( i = 0; i < args->nr_vnodes; i++ ) > { > - /* Clip count to maximum 1GB extent. */ > - unsigned long count = nr_pages - cur_pages; > - unsigned long max_pages = SUPERPAGE_1GB_NR_PFNS; > - > - if ( count > max_pages ) > - count = max_pages; > - > - cur_pfn = page_array[cur_pages]; > + unsigned int new_memflags = memflags; > + uint64_t pages, finished; > > - /* Take care the corner cases of super page tails */ > - if ( ((cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1)) != 0) && > - (count > (-cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1))) ) > - count = -cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1); > - else if ( ((count & (SUPERPAGE_1GB_NR_PFNS-1)) != 0) && > - (count > SUPERPAGE_1GB_NR_PFNS) ) > - count &= ~(SUPERPAGE_1GB_NR_PFNS - 1); > - > - /* Attemp to allocate 1GB super page. Because in each pass we only > - * allocate at most 1GB, we don't have to clip super page boundaries. > - */ > - if ( ((count | cur_pfn) & (SUPERPAGE_1GB_NR_PFNS - 1)) == 0 && > - /* Check if there exists MMIO hole in the 1GB memory range */ > - !check_mmio_hole(cur_pfn << PAGE_SHIFT, > - SUPERPAGE_1GB_NR_PFNS << PAGE_SHIFT, > - mmio_start, mmio_size) ) > + if ( args->vnode_to_pnode[i] != XC_VNUMA_NO_NODE ) > { > - long done; > - unsigned long nr_extents = count >> SUPERPAGE_1GB_SHIFT; > - xen_pfn_t sp_extents[nr_extents]; > - > - for ( i = 0; i < nr_extents; i++ ) > - sp_extents[i] = > page_array[cur_pages+(i<<SUPERPAGE_1GB_SHIFT)]; > - > - done = xc_domain_populate_physmap(xch, dom, nr_extents, > SUPERPAGE_1GB_SHIFT, > - pod_mode, sp_extents); > - > - if ( done > 0 ) > - { > - stat_1gb_pages += done; > - done <<= SUPERPAGE_1GB_SHIFT; > - cur_pages += done; > - count -= done; > - } > + new_memflags |= XENMEMF_exact_node(args->vnode_to_pnode[i]); > + new_memflags |= XENMEMF_exact_node_request; > } > > - if ( count != 0 ) > + pages = (args->vnode_size[i] << 20) >> PAGE_SHIFT; > + /* Consider vga hole belongs to node 0 */ > + if ( i == 0 ) > + finished = 0xc0; > + else > + finished = 0; > + > + while ( (rc == 0) && (pages > finished) ) > { > - /* Clip count to maximum 8MB extent. */ > - max_pages = SUPERPAGE_2MB_NR_PFNS * 4; > + /* Clip count to maximum 1GB extent. */ > + unsigned long count = pages - finished; > + unsigned long max_pages = SUPERPAGE_1GB_NR_PFNS; > + > if ( count > max_pages ) > count = max_pages; > - > - /* Clip partial superpage extents to superpage boundaries. */ > - if ( ((cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1)) != 0) && > - (count > (-cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1))) ) > - count = -cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1); > - else if ( ((count & (SUPERPAGE_2MB_NR_PFNS-1)) != 0) && > - (count > SUPERPAGE_2MB_NR_PFNS) ) > - count &= ~(SUPERPAGE_2MB_NR_PFNS - 1); /* clip non-s.p. tail > */ > - > - /* Attempt to allocate superpage extents. */ > - if ( ((count | cur_pfn) & (SUPERPAGE_2MB_NR_PFNS - 1)) == 0 ) > + > + cur_pfn = page_array[cur_pages]; > + > + /* Take care the corner cases of super page tails */ > + if ( ((cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1)) != 0) && > + (count > (-cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1))) ) > + count = -cur_pfn & (SUPERPAGE_1GB_NR_PFNS-1); > + else if ( ((count & (SUPERPAGE_1GB_NR_PFNS-1)) != 0) && > + (count > SUPERPAGE_1GB_NR_PFNS) ) > + count &= ~(SUPERPAGE_1GB_NR_PFNS - 1); > + > + /* Attemp to allocate 1GB super page. Because in each pass we > only > + * allocate at most 1GB, we don't have to clip super page > boundaries. > + */ > + if ( ((count | cur_pfn) & (SUPERPAGE_1GB_NR_PFNS - 1)) == 0 && > + /* Check if there exists MMIO hole in the 1GB memory range > */ > + !check_mmio_hole(cur_pfn << PAGE_SHIFT, > + SUPERPAGE_1GB_NR_PFNS << PAGE_SHIFT, > + mmio_start, mmio_size) ) > { > long done; > - unsigned long nr_extents = count >> SUPERPAGE_2MB_SHIFT; > + unsigned long nr_extents = count >> SUPERPAGE_1GB_SHIFT; > xen_pfn_t sp_extents[nr_extents]; > > - for ( i = 0; i < nr_extents; i++ ) > - sp_extents[i] = > page_array[cur_pages+(i<<SUPERPAGE_2MB_SHIFT)]; > + for ( j = 0; j < nr_extents; j++ ) > + sp_extents[j] = > page_array[cur_pages+(j<<SUPERPAGE_1GB_SHIFT)]; > > - done = xc_domain_populate_physmap(xch, dom, nr_extents, > SUPERPAGE_2MB_SHIFT, > - pod_mode, sp_extents); > + done = xc_domain_populate_physmap(xch, dom, nr_extents, > SUPERPAGE_1GB_SHIFT, > + new_memflags, sp_extents); > > if ( done > 0 ) > { > - stat_2mb_pages += done; > - done <<= SUPERPAGE_2MB_SHIFT; > + stat_1gb_pages += done; > + done <<= SUPERPAGE_1GB_SHIFT; > cur_pages += done; > + finished += done; > count -= done; > } > } > - } > > - /* Fall back to 4kB extents. */ > - if ( count != 0 ) > - { > - rc = xc_domain_populate_physmap_exact( > - xch, dom, count, 0, pod_mode, &page_array[cur_pages]); > - cur_pages += count; > - stat_normal_pages += count; > + if ( count != 0 ) > + { > + /* Clip count to maximum 8MB extent. */ > + max_pages = SUPERPAGE_2MB_NR_PFNS * 4; > + if ( count > max_pages ) > + count = max_pages; > + > + /* Clip partial superpage extents to superpage boundaries. */ > + if ( ((cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1)) != 0) && > + (count > (-cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1))) ) > + count = -cur_pfn & (SUPERPAGE_2MB_NR_PFNS-1); > + else if ( ((count & (SUPERPAGE_2MB_NR_PFNS-1)) != 0) && > + (count > SUPERPAGE_2MB_NR_PFNS) ) > + count &= ~(SUPERPAGE_2MB_NR_PFNS - 1); /* clip non-s.p. > tail */ > + > + /* Attempt to allocate superpage extents. */ > + if ( ((count | cur_pfn) & (SUPERPAGE_2MB_NR_PFNS - 1)) == 0 ) > + { > + long done; > + unsigned long nr_extents = count >> SUPERPAGE_2MB_SHIFT; > + xen_pfn_t sp_extents[nr_extents]; > + > + for ( j = 0; j < nr_extents; j++ ) > + sp_extents[j] = > page_array[cur_pages+(j<<SUPERPAGE_2MB_SHIFT)]; > + > + done = xc_domain_populate_physmap(xch, dom, nr_extents, > SUPERPAGE_2MB_SHIFT, > + new_memflags, > sp_extents); > + > + if ( done > 0 ) > + { > + stat_2mb_pages += done; > + done <<= SUPERPAGE_2MB_SHIFT; > + cur_pages += done; > + finished += done; > + count -= done; > + } > + } > + } > + > + /* Fall back to 4kB extents. */ > + if ( count != 0 ) > + { > + rc = xc_domain_populate_physmap_exact( > + xch, dom, count, 0, new_memflags, > &page_array[cur_pages]); > + cur_pages += count; > + finished += count; > + stat_normal_pages += count; > + } > } > + > + if ( rc != 0 ) > + break; > } > > if ( rc != 0 ) > @@ -469,7 +530,7 @@ static int setup_guest(xc_interface *xch, > xch, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, > HVM_INFO_PFN)) == NULL ) > goto error_out; > - build_hvm_info(hvm_info_page, v_end, mmio_start, mmio_size); > + build_hvm_info(hvm_info_page, v_end, mmio_start, mmio_size, args); > munmap(hvm_info_page, PAGE_SIZE); > > /* Allocate and clear special pages. */ > @@ -608,6 +669,9 @@ int xc_hvm_build(xc_interface *xch, uint32_t domid, > args.acpi_module.guest_addr_out; > hvm_args->smbios_module.guest_addr_out = > args.smbios_module.guest_addr_out; > + hvm_args->lowmem_end = args.lowmem_end; > + hvm_args->highmem_end = args.highmem_end; > + hvm_args->mmio_start = args.mmio_start; > } > > free(image); > -- > 1.7.10.4 > > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxx > http://lists.xen.org/xen-devel _______________________________________________ Xen-devel mailing list Xen-devel@xxxxxxxxxxxxx http://lists.xen.org/xen-devel

©2013 Xen Project, A Linux Foundation Collaborative Project. All Rights Reserved.
Linux Foundation is a registered trademark of The Linux Foundation.
Xen Project is a trademark of The Linux Foundation.