[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 4/6] xend: export NUMA topology in physinfo



This patch modifies the physinfo hypercall and the surrounding libs and
users to display the NUMA topology detected by Xen.  This includes cpu
to node mappings, memory to node mappings, and correct number of nodes.
The modified/new fields displayed in xm info are:

nr_nodes               : 2
mem_chunks             : node0:0x0000000000000000-0x0000000080000000
                         node1:0x0000000080000000-0x00000000dff60000
node_to_cpu            : node0:0
                         node1:1


-- 
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253   T/L: 678-9253
ryanh@xxxxxxxxxx


diffstat output:
 b/xen/include/public/numa_structs.h                 |   26 ++++++
 tools/libxc/xc_misc.c                               |    3 
 tools/libxc/xenctrl.h                               |    3 
 tools/python/xen/lowlevel/xc/xc.c                   |   77 ++++++++++++++++----
 tools/python/xen/xend/XendNode.py                   |   67 +++++++++++++++++
 tools/xm-test/tests/info/02_info_compiledata_pos.py |    4 -
 xen/arch/x86/dom0_ops.c                             |   32 ++++++++
 xen/include/public/arch-x86_32.h                    |    1 
 xen/include/public/arch-x86_64.h                    |    1 
 xen/include/public/dom0_ops.h                       |    3 
 xen/include/xen/numa.h                              |    7 -
 11 files changed, 202 insertions(+), 22 deletions(-)

Signed-off-by: Ryan Harper <ryanh@xxxxxxxxxx>
---
diff -r ee867f2bdc12 tools/libxc/xc_misc.c
--- a/tools/libxc/xc_misc.c     Mon Jul  3 17:36:59 2006
+++ b/tools/libxc/xc_misc.c     Mon Jul  3 12:50:32 2006
@@ -40,6 +40,9 @@
 
     op.cmd = DOM0_PHYSINFO;
     op.interface_version = DOM0_INTERFACE_VERSION;
+    /* set pointers to caller's so memcpy doesn't clobber them */
+    op.u.physinfo.memory_chunks = put_info->memory_chunks;
+    op.u.physinfo.node_to_cpu = put_info->node_to_cpu;
 
     if ( (ret = do_dom0_op(xc_handle, &op)) != 0 )
         return ret;
diff -r ee867f2bdc12 tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Mon Jul  3 17:36:59 2006
+++ b/tools/libxc/xenctrl.h     Mon Jul  3 12:50:32 2006
@@ -21,6 +21,7 @@
 #include <xen/memory.h>
 #include <xen/acm.h>
 #include <xen/acm_ops.h>
+#include <xen/numa_structs.h>
 
 #ifdef __ia64__
 #define XC_PAGE_SHIFT           14
@@ -400,6 +401,8 @@
                        int clear);
 
 typedef dom0_physinfo_t xc_physinfo_t;
+typedef node_data_t xc_memory_chunk_t;
+typedef uint64_t xc_node_to_cpu_t;
 int xc_physinfo(int xc_handle,
                 xc_physinfo_t *info);
 
diff -r ee867f2bdc12 tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Mon Jul  3 17:36:59 2006
+++ b/tools/python/xen/lowlevel/xc/xc.c Mon Jul  3 12:50:32 2006
@@ -603,8 +603,21 @@
 {
     xc_physinfo_t info;
     char cpu_cap[128], *p=cpu_cap, *q=cpu_cap;
-    int i;
-    
+    int i,j;
+    PyObject *ret_obj, *memchunk_obj, *node_to_cpu_obj;
+    xc_memory_chunk_t *chunks;
+    xc_node_to_cpu_t  *map;
+
+    /* make space for mem chunks */
+    chunks =  (xc_memory_chunk_t *)malloc( sizeof(xc_memory_chunk_t) * 
+                                     PUBLIC_MAXCHUNKS );
+    set_xen_guest_handle(info.memory_chunks, chunks);
+
+    /* make space for node_to_cpu mapping */
+    map = (xc_node_to_cpu_t *)malloc( sizeof(xc_node_to_cpu_t) *
+                                    PUBLIC_MAX_NUMNODES ); 
+    set_xen_guest_handle(info.node_to_cpu, map);
+
     if ( xc_physinfo(self->xc_handle, &info) != 0 )
         return PyErr_SetFromErrno(xc_error);
 
@@ -618,16 +631,56 @@
     if(q>cpu_cap)
         *(q-1)=0;
 
-    return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}",
-                         "threads_per_core", info.threads_per_core,
-                         "cores_per_socket", info.cores_per_socket,
-                         "sockets_per_node", info.sockets_per_node,
-                         "nr_nodes",         info.nr_nodes,
-                         "total_memory",     pages_to_kib(info.total_pages),
-                         "free_memory",      pages_to_kib(info.free_pages),
-                         "scrub_memory",     pages_to_kib(info.scrub_pages),
-                         "cpu_khz",          info.cpu_khz,
-                         "hw_caps",          cpu_cap);
+    ret_obj = Py_BuildValue("{s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}",
+                            "threads_per_core", info.threads_per_core,
+                            "cores_per_socket", info.cores_per_socket,
+                            "sockets_per_node", info.sockets_per_node,
+                            "total_memory",     pages_to_kib(info.total_pages),
+                            "free_memory",      pages_to_kib(info.free_pages),
+                            "scrub_memory",     pages_to_kib(info.scrub_pages),
+                            "cpu_khz",          info.cpu_khz,
+                            "hw_caps",          cpu_cap);
+    /* memchunks */
+    memchunk_obj = PyList_New(0);
+ 
+    /* build list of each memchunk's attributes, converting pfn to paddr */
+    for ( i=0; i<info.nr_nodes; i++ ) 
+    {
+        PyList_Append(memchunk_obj, 
+                      Py_BuildValue("{s:i,s:K,s:K}",
+                      "node"       , chunks[i].node_id,
+                      "start_paddr", chunks[i].node_start_pfn << XC_PAGE_SHIFT,
+                      "end_paddr"  , (chunks[i].node_start_pfn + 
+                      chunks[i].node_spanned_pages) << XC_PAGE_SHIFT ));
+    }
+    PyDict_SetItemString(ret_obj, "mem_chunks", memchunk_obj);
+ 
+    /* node to cpu mappings */
+    node_to_cpu_obj = PyList_New(0);
+    /* build list of node to cpu mappings */
+    for ( i=0; i<info.nr_nodes; i++ )
+    {
+        cpumap_t cpumap = (cpumap_t)map[i];
+        PyObject *cpus = PyList_New(0);
+ 
+        for ( j=0; cpumap != 0; j++ ) 
+        {
+            if ( cpumap & 1 )
+                PyList_Append(cpus, PyInt_FromLong(j));
+            cpumap >>=1;
+        }
+        PyList_Append(node_to_cpu_obj, cpus); 
+    }
+    /* add list of node to cpu mappings and nr_nodes to physinfo dictionary */
+    PyDict_SetItemString(ret_obj, "node_to_cpu",  node_to_cpu_obj);
+    PyDict_SetItemString(ret_obj, "nr_nodes", 
+             Py_BuildValue("i", info.nr_nodes));
+
+    /* free malloc'd memory */
+    free(chunks);
+    free(map);
+ 
+    return ret_obj;
 }
 
 static PyObject *pyxc_xeninfo(XcObject *self)
diff -r ee867f2bdc12 tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Mon Jul  3 17:36:59 2006
+++ b/tools/python/xen/xend/XendNode.py Mon Jul  3 12:50:32 2006
@@ -56,6 +56,69 @@
                 ['version', ver],
                 ['machine', mch]]
 
+    def list_to_rangepairs(self,cmap):
+            cmap.sort()
+            pairs = []
+            x = y = 0
+            for i in range(0,len(cmap)):
+                try:
+                    if ((cmap[y+1] - cmap[i]) > 1):
+                        pairs.append((cmap[x],cmap[y]))
+                        x = y = i+1
+                    else:
+                        y = y + 1
+                # if we go off the end, then just add x to y
+                except IndexError:
+                    pairs.append((cmap[x],cmap[y]))
+
+            return pairs
+
+    def format_pairs(self,pairs):
+            if not pairs:
+                return "no cpus"
+            out = ""
+            for f,s in pairs:
+                if (f==s):
+                    out += '%d'%f
+                else:
+                    out += '%d-%d'%(f,s)
+                out += ','
+            # trim trailing ','
+            return out[:-1]
+
+    def list_to_strrange(self,list):
+        return self.format_pairs(self.list_to_rangepairs(list))
+
+    def format_memchunks(self, pinfo):
+        str=''
+        whitespace=''
+        try:
+            chunk=pinfo['mem_chunks']
+            for i in range(0, pinfo['nr_nodes']):
+                str+='%snode%d:0x%016x-0x%016x\n' % (whitespace,
+                                                    chunk[i]['node'],
+                                                    chunk[i]['start_paddr'], 
+                                                    chunk[i]['end_paddr']) 
+                whitespace='%25s' % ''
+        except:
+            str='none\n' 
+        return str[:-1]
+        
+    def format_node_to_cpu(self, pinfo):
+        str=''
+        whitespace=''
+        try:
+            node_to_cpu=pinfo['node_to_cpu']
+            for i in range(0, pinfo['nr_nodes']):
+                str+='%snode%d:%s\n' % (whitespace,
+                                        i, 
+                                      self.list_to_strrange(node_to_cpu[i]))
+                whitespace='%25s' % ''        
+        except:
+            str='none\n'
+        return str[:-1];
+
+
     def physinfo(self):
         info = self.xc.physinfo()
 
@@ -67,6 +130,8 @@
         # physinfo is in KiB
         info['total_memory'] = info['total_memory'] / 1024
         info['free_memory']  = info['free_memory'] / 1024
+        info['mem_chunks'] = self.format_memchunks(info)
+        info['node_to_cpu'] = self.format_node_to_cpu(info)
 
         ITEM_ORDER = ['nr_cpus',
                       'nr_nodes',
@@ -77,6 +142,8 @@
                       'hw_caps',
                       'total_memory',
                       'free_memory',
+                      'mem_chunks',
+                      'node_to_cpu'
                       ]
 
         return [[k, info[k]] for k in ITEM_ORDER]
diff -r ee867f2bdc12 tools/xm-test/tests/info/02_info_compiledata_pos.py
--- a/tools/xm-test/tests/info/02_info_compiledata_pos.py       Mon Jul  3 
17:36:59 2006
+++ b/tools/xm-test/tests/info/02_info_compiledata_pos.py       Mon Jul  3 
12:50:32 2006
@@ -18,9 +18,7 @@
 for line in lines:
     pieces = line.split(" : ", 1)
 
-    if len(pieces) < 2:
-        FAIL("Found invalid line: [%s]" % line)
-    else:
+    if len(pieces) > 1:
         map[pieces[0]] = pieces[1]
 
 for field in ["cores_per_socket", "threads_per_core", "cpu_mhz",
diff -r ee867f2bdc12 xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c   Mon Jul  3 17:36:59 2006
+++ b/xen/arch/x86/dom0_ops.c   Mon Jul  3 12:50:32 2006
@@ -25,6 +25,7 @@
 #include <asm/hvm/support.h>
 #include <asm/processor.h>
 #include <public/sched_ctl.h>
+#include <asm/numa.h>
 
 #include <asm/mtrr.h>
 #include "cpu/mtrr/mtrr.h"
@@ -183,6 +184,8 @@
     case DOM0_PHYSINFO:
     {
         dom0_physinfo_t *pi = &op->u.physinfo;
+        int i,j;
+        u64 node_to_cpu_64[MAX_NUMNODES];
 
         pi->threads_per_core =
             cpus_weight(cpu_sibling_map[0]);
@@ -191,7 +194,6 @@
         pi->sockets_per_node = 
             num_online_cpus() / cpus_weight(cpu_core_map[0]);
 
-        pi->nr_nodes         = 1;
         pi->total_pages      = total_pages;
         pi->free_pages       = avail_domheap_pages();
         pi->scrub_pages      = avail_scrub_pages();
@@ -199,6 +201,34 @@
         memset(pi->hw_cap, 0, sizeof(pi->hw_cap));
         memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4);
         ret = 0;
+        for_each_online_node(i) 
+        {
+            /* copy memory chunk structs to guest */
+            if ( copy_to_guest_offset(op->u.physinfo.memory_chunks, i, 
+                                      &(node_data[i]), 1) ) {
+                ret = -EFAULT;
+                break;
+            }
+        }
+
+        /* set number of notes */
+        pi->nr_nodes = num_online_nodes();
+
+        /* copy cpu to node mapping to domU */
+        /* converting cpumask to u64 b/c userspace doesn't 
+         * know about cpumask_t and is accepting a u64 */
+        memset(node_to_cpu_64, 0, sizeof(node_to_cpu_64));
+        for ( i=0; i<pi->nr_nodes; i++) {
+            for ( j=0; j<num_online_cpus(); j++)
+                if ( cpu_isset(j, node_to_cpumask[i]) )
+                    node_to_cpu_64[i] |= (u64)1 << j;
+
+            if ( copy_to_guest_offset(op->u.physinfo.node_to_cpu, 
+                                      i, &(node_to_cpu_64[i]), 1) ) {
+                ret = -EFAULT;
+                break;
+            }
+        }
         if ( copy_to_guest(u_dom0_op, op, 1) )
             ret = -EFAULT;
     }
diff -r ee867f2bdc12 xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h  Mon Jul  3 17:36:59 2006
+++ b/xen/include/public/arch-x86_32.h  Mon Jul  3 12:50:32 2006
@@ -24,6 +24,7 @@
 __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
 __DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long);
+__DEFINE_XEN_GUEST_HANDLE(u64, uint64_t);
 DEFINE_XEN_GUEST_HANDLE(char);
 DEFINE_XEN_GUEST_HANDLE(int);
 DEFINE_XEN_GUEST_HANDLE(long);
diff -r ee867f2bdc12 xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h  Mon Jul  3 17:36:59 2006
+++ b/xen/include/public/arch-x86_64.h  Mon Jul  3 12:50:32 2006
@@ -24,6 +24,7 @@
 __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
 __DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long);
+__DEFINE_XEN_GUEST_HANDLE(u64, uint64_t);
 DEFINE_XEN_GUEST_HANDLE(char);
 DEFINE_XEN_GUEST_HANDLE(int);
 DEFINE_XEN_GUEST_HANDLE(long);
diff -r ee867f2bdc12 xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h     Mon Jul  3 17:36:59 2006
+++ b/xen/include/public/dom0_ops.h     Mon Jul  3 12:50:32 2006
@@ -13,6 +13,7 @@
 
 #include "xen.h"
 #include "sched_ctl.h"
+#include "numa_structs.h"
 
 /*
  * Make sure you increment the interface version whenever you modify this file!
@@ -233,6 +234,8 @@
     uint64_t free_pages;
     uint64_t scrub_pages;
     uint32_t hw_cap[8];
+    XEN_GUEST_HANDLE(node_data_t) memory_chunks;
+    XEN_GUEST_HANDLE(u64) node_to_cpu;
 };
 typedef struct dom0_physinfo dom0_physinfo_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_physinfo_t);
diff -r ee867f2bdc12 xen/include/xen/numa.h
--- a/xen/include/xen/numa.h    Mon Jul  3 17:36:59 2006
+++ b/xen/include/xen/numa.h    Mon Jul  3 12:50:32 2006
@@ -2,6 +2,7 @@
 #define _XEN_NUMA_H
 
 #include <xen/config.h>
+#include <public/numa_structs.h>
 
 #ifdef CONFIG_DISCONTIGMEM
 #include <asm/numnodes.h>
@@ -26,10 +27,4 @@
 #include <xen/cpumask.h>
 extern cpumask_t node_to_cpumask[];
 
-typedef struct node_data {
-    unsigned long node_start_pfn;
-    unsigned long node_spanned_pages;
-    unsigned int  node_id;
-} node_data_t;
-
 #endif /* _XEN_NUMA_H */
diff -r ee867f2bdc12 xen/include/public/numa_structs.h
--- /dev/null   Mon Jul  3 17:36:59 2006
+++ b/xen/include/public/numa_structs.h Mon Jul  3 12:50:32 2006
@@ -0,0 +1,26 @@
+/*
+ * Ryan Grimm  <grimm@xxxxxxxxxx>
+ * Ryan Harper <ryanh@xxxxxxxxxx>
+ * Copyright (c) 2006, International Business Machines Corporation.
+ *
+ */
+
+#ifndef __XEN_PUBLIC_NUMA_STRUCTS_H__
+
+#define __XEN_PUBLIC_NUMA_STRUCTS_H__
+
+#include "xen.h"
+
+/* define these for xc to use b/c MAX_NUMNODES and MAX_CHUNKS
+ * are not exposed in /public */
+#define PUBLIC_MAX_NUMNODES 16
+#define PUBLIC_MAXCHUNKS 32
+
+typedef struct node_data {
+    unsigned long node_start_pfn;
+    unsigned long node_spanned_pages;
+    unsigned int  node_id;
+} node_data_t;
+DEFINE_XEN_GUEST_HANDLE(node_data_t);
+
+#endif

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.