[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 3/4] [HVM] allocate HVM guest memory with NUMA in mind



Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>
# HG changeset patch
# User andre.przywara@xxxxxxx
# Date 1186563732 -7200
# Node ID f5e9f20109d9dc3c82bfadcedd4af77a35e8c5fb
# Parent  e730c1207604414f6f2779cc6adb213e3c1362eb
allocate HVM guest memory according to NUMA setup

diff -r e730c1207604 -r f5e9f20109d9 tools/libxc/xc_hvm_build.c
--- a/tools/libxc/xc_hvm_build.c        Tue Aug 07 15:11:00 2007 +0200
+++ b/tools/libxc/xc_hvm_build.c        Wed Aug 08 11:02:12 2007 +0200
@@ -152,8 +152,101 @@ static int loadelfimage(
     return rc;
 }
 
+#define MAX_CPU_ID 255
+
+static int setup_numa_affinity (int xc_handle, uint32_t dom, int numanodes)
+{
+    xc_physinfo_t physinfo;
+    xc_cpu_to_node_t *cpumap;
+    uint64_t *nodemasks;
+
+    int nrcpus, i, node;
+    xc_dominfo_t dominfo;
+
+    cpumap=(xc_cpu_to_node_t *)malloc(sizeof(xc_cpu_to_node_t)*MAX_CPU_ID);
+    set_xen_guest_handle(physinfo.cpu_to_node, cpumap);
+
+    xc_physinfo (xc_handle,&physinfo);
+    nrcpus = physinfo.threads_per_core * physinfo.cores_per_socket *
+        physinfo.sockets_per_node * physinfo.nr_nodes;
+
+    nodemasks=malloc(sizeof(uint64_t)*physinfo.nr_nodes);
+    memset (nodemasks,0,sizeof(uint64_t)*physinfo.nr_nodes);
+    for (i=0;i<nrcpus;i++)
+    {
+        nodemasks[cpumap[i]]|=(1<<i);
+    }
+
+    if (xc_domain_getinfo (xc_handle, dom, 1, &dominfo) != 1)
+    {
+        ERROR("Unable to get platform info.");
+        return -1;
+    }
+
+    for (i=0;i<=dominfo.max_vcpu_id;i++)
+    {
+        node= ( i * numanodes ) / (dominfo.max_vcpu_id+1);
+        xc_vcpu_setaffinity (xc_handle, dom, i, nodemasks[node]);
+    }
+
+    return 0;
+}
+
+static int setup_numa_mem ( int xc_handle, uint32_t dom, int nr_pages,
+                       xen_pfn_t *page_array, int numanodes )
+{
+    xc_physinfo_t physinfo;
+    xc_cpu_to_node_t *cpumap;
+
+    int nrcpus, i, j, rc;
+    uint32_t firstcpu;
+    unsigned long offset;
+    unsigned long pages_per_node, curpages;
+
+    cpumap=(xc_cpu_to_node_t *)malloc(sizeof(xc_cpu_to_node_t)*MAX_CPU_ID);
+    set_xen_guest_handle(physinfo.cpu_to_node, cpumap);
+
+
+    xc_physinfo (xc_handle,&physinfo);
+    nrcpus = physinfo.threads_per_core * physinfo.cores_per_socket *
+        physinfo.sockets_per_node * physinfo.nr_nodes;
+
+    offset = 0xc0;
+
+    pages_per_node=((nr_pages+0xFF)&(~0xFFUL))/numanodes;
+    firstcpu=0;
+    for ( i=0 ; i<numanodes ; i++ )
+    {
+        for ( j=0 ; j<nrcpus ; j++ )
+        {
+            if ( cpumap[j] == i )
+            {
+                firstcpu = j;
+                break;
+            }
+        }
+
+        if ( j == nrcpus ) firstcpu=0;
+
+        if ( i == numanodes - 1 )
+            curpages = nr_pages - i * pages_per_node;
+        else curpages = pages_per_node;
+
+        if ( i == 0 ) curpages -= 0xc0;
+
+        rc = xc_domain_memory_populate_physmap(
+            xc_handle, dom, curpages, 0, 0, firstcpu, 
+            &page_array[offset]);
+
+        if ( rc != 0 ) return rc;
+
+        offset+=curpages;
+    }
+    return 0;
+}
+
 static int setup_guest(int xc_handle,
-                       uint32_t dom, int memsize,
+                       uint32_t dom, int memsize, int numanodes,
                        char *image, unsigned long image_size,
                        vcpu_guest_context_either_t *ctxt)
 {
@@ -213,13 +306,24 @@ static int setup_guest(int xc_handle,
     rc = xc_domain_memory_populate_physmap(
         xc_handle, dom, 0xa0, 0, 0, XENMEM_DEFAULT_CPU, &page_array[0x00]);
     if ( rc == 0 )
-        rc = xc_domain_memory_populate_physmap(
-            xc_handle, dom, nr_pages - 0xc0, 0, 0, XENMEM_DEFAULT_CPU,
-            &page_array[0xc0]);
+    {
+        if ( numanodes > 0 )
+            rc = setup_numa_mem (xc_handle, dom, nr_pages, page_array,
+            numanodes);
+        else
+            rc = xc_domain_memory_populate_physmap (
+                xc_handle, dom, nr_pages - 0xc0, 0, 0, XENMEM_DEFAULT_CPU,
+                &page_array[0xc0] );
+    }
     if ( rc != 0 )
     {
         PERROR("Could not allocate memory for HVM guest.\n");
         goto error_out;
+    }
+
+    if ( numanodes > 0 )
+    {
+        setup_numa_affinity (xc_handle, dom, numanodes);
     }
 
     if ( loadelfimage(&elf, xc_handle, dom, page_array) != 0 )
@@ -288,6 +392,7 @@ static int xc_hvm_build_internal(int xc_
 static int xc_hvm_build_internal(int xc_handle,
                                  uint32_t domid,
                                  int memsize,
+                                 int numanodes,
                                  char *image,
                                  unsigned long image_size)
 {
@@ -303,7 +408,8 @@ static int xc_hvm_build_internal(int xc_
 
     memset(&ctxt, 0, sizeof(ctxt));
 
-    if ( setup_guest(xc_handle, domid, memsize, image, image_size, &ctxt) < 0 )
+    if ( setup_guest(xc_handle, domid, memsize, numanodes,
+        image, image_size, &ctxt) < 0 )
     {
         goto error_out;
     }
@@ -341,6 +447,7 @@ int xc_hvm_build(int xc_handle,
 int xc_hvm_build(int xc_handle,
                  uint32_t domid,
                  int memsize,
+                 int numanodes,
                  const char *image_name)
 {
     char *image;
@@ -351,7 +458,8 @@ int xc_hvm_build(int xc_handle,
          ((image = xc_read_image(image_name, &image_size)) == NULL) )
         return -1;
 
-    sts = xc_hvm_build_internal(xc_handle, domid, memsize, image, image_size);
+    sts = xc_hvm_build_internal(xc_handle, domid, memsize, numanodes,
+        image, image_size);
 
     free(image);
 
@@ -364,6 +472,7 @@ int xc_hvm_build_mem(int xc_handle,
 int xc_hvm_build_mem(int xc_handle,
                      uint32_t domid,
                      int memsize,
+                     int numanodes,
                      const char *image_buffer,
                      unsigned long image_size)
 {
@@ -386,7 +495,7 @@ int xc_hvm_build_mem(int xc_handle,
         return -1;
     }
 
-    sts = xc_hvm_build_internal(xc_handle, domid, memsize,
+    sts = xc_hvm_build_internal(xc_handle, domid, memsize, numanodes,
                                 img, img_len);
 
     /* xc_inflate_buffer may return the original buffer pointer (for
diff -r e730c1207604 -r f5e9f20109d9 tools/libxc/xenguest.h
--- a/tools/libxc/xenguest.h    Tue Aug 07 15:11:00 2007 +0200
+++ b/tools/libxc/xenguest.h    Wed Aug 08 11:02:12 2007 +0200
@@ -128,11 +128,13 @@ int xc_hvm_build(int xc_handle,
 int xc_hvm_build(int xc_handle,
                  uint32_t domid,
                  int memsize,
+                 int numanodes,
                  const char *image_name);
 
 int xc_hvm_build_mem(int xc_handle,
                      uint32_t domid,
                      int memsize,
+                     int numanodes,
                      const char *image_buffer,
                      unsigned long image_size);
 
diff -r e730c1207604 -r f5e9f20109d9 tools/libxc/xg_private.c
--- a/tools/libxc/xg_private.c  Tue Aug 07 15:11:00 2007 +0200
+++ b/tools/libxc/xg_private.c  Wed Aug 08 11:02:12 2007 +0200
@@ -192,6 +192,7 @@ __attribute__((weak))
     int xc_hvm_build(int xc_handle,
                      uint32_t domid,
                      int memsize,
+                     int numanodes,
                      const char *image_name)
 {
     errno = ENOSYS;
diff -r e730c1207604 -r f5e9f20109d9 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Tue Aug 07 15:11:00 2007 +0200
+++ b/tools/python/xen/lowlevel/xc/xc.c Wed Aug 08 11:02:12 2007 +0200
@@ -549,7 +549,7 @@ static PyObject *pyxc_hvm_build(XcObject
                                       &numanodes) )
         return NULL;
 
-    if ( xc_hvm_build(self->xc_handle, dom, memsize, image) != 0 )
+    if ( xc_hvm_build(self->xc_handle, dom, memsize, numanodes, image) != 0 )
         return pyxc_error_to_exception();
 
 #if !defined(__ia64__)
diff -r e730c1207604 -r f5e9f20109d9 xen/common/page_alloc.c
--- a/xen/common/page_alloc.c   Tue Aug 07 15:11:00 2007 +0200
+++ b/xen/common/page_alloc.c   Wed Aug 08 11:02:12 2007 +0200
@@ -806,8 +806,12 @@ struct page_info *__alloc_domheap_pages(
 
     if ( (zone_hi + PAGE_SHIFT) >= dma_bitsize )
     {
-        pg = alloc_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi, cpu, order);
-
+        if (avail_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi,
+            cpu_to_node (cpu)) >= ( 1UL << order ))
+        {
+            pg = alloc_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi,
+                cpu, order);
+        }
         /* Failure? Then check if we can fall back to the DMA pool. */
         if ( unlikely(pg == NULL) &&
              ((order > MAX_ORDER) ||
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.