diff -r 6eba62092cb4 -r 3004092c4792 tools/libxc/Makefile --- a/tools/libxc/Makefile Thu Apr 01 13:06:14 2010 -0400 +++ b/tools/libxc/Makefile Thu Apr 01 16:23:27 2010 -0400 @@ -25,7 +25,7 @@ CTRL_SRCS-y += xc_mem_event.c CTRL_SRCS-y += xc_mem_paging.c CTRL_SRCS-y += xc_memshr.c -CTRL_SRCS-y += xc_dom_numa.c +CTRL_SRCS-y += xc_cpumap.c CTRL_SRCS-$(CONFIG_X86) += xc_pagetab.c CTRL_SRCS-$(CONFIG_Linux) += xc_linux.c CTRL_SRCS-$(CONFIG_SunOS) += xc_solaris.c @@ -51,6 +51,7 @@ GUEST_SRCS-$(CONFIG_X86) += xc_dom_bzimageloader.c GUEST_SRCS-y += xc_dom_binloader.c GUEST_SRCS-y += xc_dom_compat_linux.c +GUEST_SRCS-y += xc_dom_numa.c GUEST_SRCS-$(CONFIG_X86) += xc_dom_x86.c GUEST_SRCS-$(CONFIG_X86) += xc_cpuid_x86.c diff -r 6eba62092cb4 -r 3004092c4792 tools/libxc/xc_dom_numa.c --- a/tools/libxc/xc_dom_numa.c Thu Apr 01 13:06:14 2010 -0400 +++ b/tools/libxc/xc_dom_numa.c Thu Apr 01 16:23:27 2010 -0400 @@ -5,12 +5,21 @@ #include "xg_private.h" #include "xc_dom.h" #include "xc_dom_numa.h" +#include "xc_cpumap.h" #define KB (1024) #define MB (KB * KB) #define pfn_to_paddr(pfn) ((xen_paddr_t)(pfn) << PAGE_SHIFT_X86) +/* Construct a xenctl_cpumap structure using buffer from the xenctl_cpumask + * structure */ +#define xc_cpumap_from_cpumask(map, mask) \ +do { \ + (map)->nr_cpus = XENCTL_NR_CPUS; \ + set_xen_guest_handle((map)->bitmap, (mask)->bits); \ +}while(0) + /* numa layout structures */ xc_machine_numa_layout_t phys_numa_layout; xc_domain_numa_layout_t pv_numa_layout; @@ -19,7 +28,7 @@ xc_domain_numa_layout_t *layout) { unsigned int i, j; - char vcpumask[128] = ""; + char mapstr[128] = ""; xc_dom_printf("%s called :\n NUMA-LAYOUT(Dom%d) : vcpus(%u), vnodes(%u)", __FUNCTION__, dom->guest_domid, layout->max_vcpus, layout->max_vnodes); switch (layout->type) @@ -38,14 +47,13 @@ } for (i = 0; i < layout->max_vnodes; i++) { + struct xenctl_cpumap cpumap; struct xen_vnode_data *vnode_data = &layout->vnode_data[i]; -#if 0 - xc_cpumask_scnprintf(vcpumask, sizeof(vcpumask), - *((cpumask_t *)&vnode_data->vcpu_mask)); -#endif + xc_cpumap_from_cpumask(&cpumap, &vnode_data->vcpu_mask); + xc_cpumap_snprintf(mapstr, sizeof(mapstr), cpumap); xc_dom_printf("vnode[%u]:mnode(%u), node_nr_pages(%lx), \ vcpu_mask(%s)\n", vnode_data->vnode_id, vnode_data->mnode_id, - vnode_data->nr_pages, vcpumask); + vnode_data->nr_pages, mapstr); } xc_dom_printf("vnode distances :\n"); @@ -293,10 +301,47 @@ return 0; } +/* Construct a xenctl_cpumap structure using buffer from the xenctl_cpumask + * structure */ +#define xc_cpumap_from_cpumask(map, mask) \ +do { \ + (map)->nr_cpus = XENCTL_NR_CPUS; \ + set_xen_guest_handle((map)->bitmap, (mask)->bits); \ +}while(0) + +static int xc_setup_vnode_vcpu_masks(xc_domain_numa_layout_t *pv_layout) +{ + int numa_strategy, vcpu; + + numa_strategy = pv_layout->type; + + /* In case of striped allocation, vnodes are not exposed. + * Only the mapping from vnode to mnode is of importance */ + if (numa_strategy == XEN_DOM_NUMA_STRIPED) + return 0; + + for (vcpu=0; vcpumax_vcpus; vcpu++) + { + struct xenctl_cpumap vcpumap; + struct xenctl_cpumask *vcpumask; + int vnode = vcpu/(pv_layout->max_vcpus/pv_layout->max_vnodes); + + vcpumask = &pv_layout->vnode_data[vnode].vcpu_mask; + xc_cpumap_from_cpumask(&vcpumap, vcpumask); + xc_cpumap_set_cpu(vcpu, vcpumap); + } + + return 0; +} + /* Policies for node selection need more research/experience. * Also, live migration of the VMs (to other nodes) could provide * periodic load balancing across the nodes. */ + #define XC_VNODE_MIN_SIZE (128 * MB) +#define XC_VCPUS_PER_VNODE (1) +#define XC_POWER_OF_2(x) (((x) & ((x) - 1)) == 0) + static int xc_select_domain_vnodes(struct xc_dom_image *dom, xc_machine_numa_layout_t *phys_layout, xc_domain_numa_layout_t *pv_layout) { @@ -315,17 +360,37 @@ return -1; } - /* Attempt to confine the VM or split the guest (and make it numa aware) */ + if (!(pv_layout->max_vcpus = + xc_get_max_vcpus(dom->guest_xc, dom->guest_domid))) + { + xc_dom_printf("%s: xc_get_max_vcpus failed !\n", __FUNCTION__); + return -1; + } + + if (pv_layout->max_vcpus > XENCTL_NR_CPUS) + { + xc_dom_printf("%s: Failed - More than %d vcpus!\n", + __FUNCTION__, XENCTL_NR_CPUS); + return -1; + } + + /* Attempt to confine or split the VM */ for (i = 1; i <= phys_layout->max_nodes; i<<=1) { uint64_t vnode_size_pages; - memset(node_pages_selected, 0, sizeof(node_pages_selected)); - vnode_size_pages = pv_dom_pages/i; if ((vnode_size_pages << page_shift) < XC_VNODE_MIN_SIZE) break; + /* Not enough vcpus to distribute over */ + if (pv_layout->max_vcpus < (i*XC_VCPUS_PER_VNODE)) + break; + + if ((i > 1) && !XC_POWER_OF_2(pv_layout->max_vcpus)) + break; + + memset(node_pages_selected, 0, sizeof(node_pages_selected)); if (!xc_select_best_fit_nodes(phys_layout, i, vnode_size_pages, node_pages_selected, page_shift)) { @@ -357,13 +422,6 @@ pv_layout->version = XEN_DOM_NUMA_INTERFACE_VERSION; pv_layout->type = numa_strategy; - if (!(pv_layout->max_vcpus = - xc_get_max_vcpus(dom->guest_xc, dom->guest_domid))) - { - xc_dom_printf("%s: xc_get_max_vcpus failed !\n", __FUNCTION__); - return -1; - } - for (i = 0; i < phys_layout->max_nodes; i++) { struct xenmem_node_data *node_data; @@ -385,6 +443,11 @@ * vcpus over vnodes and assigning affinities */ } + if (xc_setup_vnode_vcpu_masks(pv_layout)) + { + xc_dom_printf("%s: xc_setup_vnode_vcpu_masks failed !\n", __FUNCTION__); + return -1; + } dom->numa_layout = pv_layout; return 0; } diff -r 6eba62092cb4 -r 3004092c4792 tools/libxc/xc_domain.c --- a/tools/libxc/xc_domain.c Thu Apr 01 13:06:14 2010 -0400 +++ b/tools/libxc/xc_domain.c Thu Apr 01 16:23:27 2010 -0400 @@ -8,6 +8,7 @@ #include "xc_private.h" #include "xg_save_restore.h" +#include "xc_cpumap.h" #include #include @@ -98,28 +99,17 @@ int xc_vcpu_setaffinity(int xc_handle, uint32_t domid, int vcpu, - uint64_t *cpumap, int cpusize) + struct xenctl_cpumap *cpumap) { DECLARE_DOMCTL; int ret = -1; - uint8_t *local = malloc(cpusize); - if(local == NULL) - { - PERROR("Could not alloc memory for Xen hypercall"); - goto out; - } domctl.cmd = XEN_DOMCTL_setvcpuaffinity; domctl.domain = (domid_t)domid; - domctl.u.vcpuaffinity.vcpu = vcpu; + domctl.u.vcpuaffinity.vcpu = vcpu; + domctl.u.vcpuaffinity.cpumap = *cpumap; - bitmap_64_to_byte(local, cpumap, cpusize * 8); - - set_xen_guest_handle(domctl.u.vcpuaffinity.cpumap.bitmap, local); - - domctl.u.vcpuaffinity.cpumap.nr_cpus = cpusize * 8; - - if ( lock_pages(local, cpusize) != 0 ) + if (xc_cpumap_lock_pages(cpumap)) { PERROR("Could not lock memory for Xen hypercall"); goto out; @@ -127,10 +117,8 @@ ret = do_domctl(xc_handle, &domctl); - unlock_pages(local, cpusize); - + xc_cpumap_unlock_pages(cpumap); out: - free(local); return ret; } @@ -138,27 +126,17 @@ int xc_vcpu_getaffinity(int xc_handle, uint32_t domid, int vcpu, - uint64_t *cpumap, int cpusize) + struct xenctl_cpumap *cpumap) { DECLARE_DOMCTL; int ret = -1; - uint8_t * local = malloc(cpusize); - - if(local == NULL) - { - PERROR("Could not alloc memory for Xen hypercall"); - goto out; - } domctl.cmd = XEN_DOMCTL_getvcpuaffinity; domctl.domain = (domid_t)domid; domctl.u.vcpuaffinity.vcpu = vcpu; + domctl.u.vcpuaffinity.cpumap = *cpumap; - - set_xen_guest_handle(domctl.u.vcpuaffinity.cpumap.bitmap, local); - domctl.u.vcpuaffinity.cpumap.nr_cpus = cpusize * 8; - - if ( lock_pages(local, sizeof(local)) != 0 ) + if (xc_cpumap_lock_pages(cpumap)) { PERROR("Could not lock memory for Xen hypercall"); goto out; @@ -166,10 +144,8 @@ ret = do_domctl(xc_handle, &domctl); - unlock_pages(local, sizeof (local)); - bitmap_byte_to_64(cpumap, local, cpusize * 8); + xc_cpumap_unlock_pages(cpumap); out: - free(local); return ret; }