[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH 4/6] xend: export NUMA topology in physinfo



* Ryan Harper <ryanh@xxxxxxxxxx> [2006-07-31 15:08]:
> This patch modifies the physinfo hypercall and the surrounding libs and
> users to display the NUMA topology detected by Xen.  This includes cpu
> to node mappings, memory to node mappings, and correct number of nodes.
> The modified/new fields displayed in xm info are:
> 
> nr_nodes               : 2
> mem_chunks             : node0:0x0000000000000000-0x0000000080000000
>                          node1:0x0000000080000000-0x00000000dff60000
> node_to_cpu            : node0:0
>                          node1:1
> 

-fix a few users of xc_physinfo.  previously xentop would fail on the
 xc_physinfo call.

-- 
Ryan Harper
Software Engineer; Linux Technology Center
IBM Corp., Austin, Tx
(512) 838-9253   T/L: 678-9253
ryanh@xxxxxxxxxx


diffstat output:
 b/xen/include/public/numa_structs.h                 |   26 ++++++
 tools/libxc/xc_misc.c                               |    3 
 tools/libxc/xenctrl.h                               |    3 
 tools/python/xen/lowlevel/xc/xc.c                   |   77 ++++++++++++++++----
 tools/python/xen/xend/XendNode.py                   |   67 +++++++++++++++++
 tools/xenmon/xenbaked.c                             |    3 
 tools/xenstat/libxenstat/src/xenstat.c              |    3 
 tools/xentrace/xentrace.c                           |    3 
 tools/xm-test/tests/info/02_info_compiledata_pos.py |    4 -
 xen/arch/x86/dom0_ops.c                             |   42 ++++++++++
 xen/include/public/arch-x86_32.h                    |    1 
 xen/include/public/arch-x86_64.h                    |    1 
 xen/include/public/dom0_ops.h                       |    3 
 xen/include/xen/numa.h                              |    7 -
 14 files changed, 221 insertions(+), 22 deletions(-)

Signed-off-by: Ryan Harper <ryanh@xxxxxxxxxx>
---
Export NUMA topology in physinfo hcall

diff -r 4053cb1daebe tools/libxc/xc_misc.c
--- a/tools/libxc/xc_misc.c     Tue Aug 15 11:40:45 2006 -0500
+++ b/tools/libxc/xc_misc.c     Tue Aug 15 11:42:09 2006 -0500
@@ -40,6 +40,9 @@ int xc_physinfo(int xc_handle,
 
     op.cmd = DOM0_PHYSINFO;
     op.interface_version = DOM0_INTERFACE_VERSION;
+    /* set pointers to caller's so memcpy doesn't clobber them */
+    op.u.physinfo.memory_chunks = put_info->memory_chunks;
+    op.u.physinfo.node_to_cpu = put_info->node_to_cpu;
 
     if ( (ret = do_dom0_op(xc_handle, &op)) != 0 )
         return ret;
diff -r 4053cb1daebe tools/libxc/xenctrl.h
--- a/tools/libxc/xenctrl.h     Tue Aug 15 11:40:45 2006 -0500
+++ b/tools/libxc/xenctrl.h     Tue Aug 15 11:42:09 2006 -0500
@@ -21,6 +21,7 @@
 #include <xen/memory.h>
 #include <xen/acm.h>
 #include <xen/acm_ops.h>
+#include <xen/numa_structs.h>
 
 #ifdef __ia64__
 #define XC_PAGE_SHIFT           14
@@ -398,6 +399,8 @@ int xc_readconsolering(int xc_handle,
                        int clear);
 
 typedef dom0_physinfo_t xc_physinfo_t;
+typedef node_data_t xc_memory_chunk_t;
+typedef uint64_t xc_node_to_cpu_t;
 int xc_physinfo(int xc_handle,
                 xc_physinfo_t *info);
 
diff -r 4053cb1daebe tools/python/xen/lowlevel/xc/xc.c
--- a/tools/python/xen/lowlevel/xc/xc.c Tue Aug 15 11:40:45 2006 -0500
+++ b/tools/python/xen/lowlevel/xc/xc.c Tue Aug 15 11:42:09 2006 -0500
@@ -549,8 +549,21 @@ static PyObject *pyxc_physinfo(XcObject 
 {
     xc_physinfo_t info;
     char cpu_cap[128], *p=cpu_cap, *q=cpu_cap;
-    int i;
-    
+    int i,j;
+    PyObject *ret_obj, *memchunk_obj, *node_to_cpu_obj;
+    xc_memory_chunk_t *chunks;
+    xc_node_to_cpu_t  *map;
+
+    /* make space for mem chunks */
+    chunks =  (xc_memory_chunk_t *)malloc( sizeof(xc_memory_chunk_t) * 
+                                     PUBLIC_MAXCHUNKS );
+    set_xen_guest_handle(info.memory_chunks, chunks);
+
+    /* make space for node_to_cpu mapping */
+    map = (xc_node_to_cpu_t *)malloc( sizeof(xc_node_to_cpu_t) *
+                                    PUBLIC_MAX_NUMNODES ); 
+    set_xen_guest_handle(info.node_to_cpu, map);
+
     if ( xc_physinfo(self->xc_handle, &info) != 0 )
         return PyErr_SetFromErrno(xc_error);
 
@@ -564,16 +577,56 @@ static PyObject *pyxc_physinfo(XcObject 
     if(q>cpu_cap)
         *(q-1)=0;
 
-    return Py_BuildValue("{s:i,s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}",
-                         "threads_per_core", info.threads_per_core,
-                         "cores_per_socket", info.cores_per_socket,
-                         "sockets_per_node", info.sockets_per_node,
-                         "nr_nodes",         info.nr_nodes,
-                         "total_memory",     pages_to_kib(info.total_pages),
-                         "free_memory",      pages_to_kib(info.free_pages),
-                         "scrub_memory",     pages_to_kib(info.scrub_pages),
-                         "cpu_khz",          info.cpu_khz,
-                         "hw_caps",          cpu_cap);
+    ret_obj = Py_BuildValue("{s:i,s:i,s:i,s:l,s:l,s:l,s:i,s:s}",
+                            "threads_per_core", info.threads_per_core,
+                            "cores_per_socket", info.cores_per_socket,
+                            "sockets_per_node", info.sockets_per_node,
+                            "total_memory",     pages_to_kib(info.total_pages),
+                            "free_memory",      pages_to_kib(info.free_pages),
+                            "scrub_memory",     pages_to_kib(info.scrub_pages),
+                            "cpu_khz",          info.cpu_khz,
+                            "hw_caps",          cpu_cap);
+    /* memchunks */
+    memchunk_obj = PyList_New(0);
+ 
+    /* build list of each memchunk's attributes, converting pfn to paddr */
+    for ( i=0; i<info.nr_nodes; i++ ) 
+    {
+        PyList_Append(memchunk_obj, 
+                      Py_BuildValue("{s:i,s:K,s:K}",
+                      "node"       , chunks[i].node_id,
+                      "start_paddr", chunks[i].node_start_pfn << XC_PAGE_SHIFT,
+                      "end_paddr"  , (chunks[i].node_start_pfn + 
+                      chunks[i].node_spanned_pages) << XC_PAGE_SHIFT ));
+    }
+    PyDict_SetItemString(ret_obj, "mem_chunks", memchunk_obj);
+ 
+    /* node to cpu mappings */
+    node_to_cpu_obj = PyList_New(0);
+    /* build list of node to cpu mappings */
+    for ( i=0; i<info.nr_nodes; i++ )
+    {
+        cpumap_t cpumap = (cpumap_t)map[i];
+        PyObject *cpus = PyList_New(0);
+ 
+        for ( j=0; cpumap != 0; j++ ) 
+        {
+            if ( cpumap & 1 )
+                PyList_Append(cpus, PyInt_FromLong(j));
+            cpumap >>=1;
+        }
+        PyList_Append(node_to_cpu_obj, cpus); 
+    }
+    /* add list of node to cpu mappings and nr_nodes to physinfo dictionary */
+    PyDict_SetItemString(ret_obj, "node_to_cpu",  node_to_cpu_obj);
+    PyDict_SetItemString(ret_obj, "nr_nodes", 
+             Py_BuildValue("i", info.nr_nodes));
+
+    /* free malloc'd memory */
+    free(chunks);
+    free(map);
+ 
+    return ret_obj;
 }
 
 static PyObject *pyxc_xeninfo(XcObject *self)
diff -r 4053cb1daebe tools/python/xen/xend/XendNode.py
--- a/tools/python/xen/xend/XendNode.py Tue Aug 15 11:40:45 2006 -0500
+++ b/tools/python/xen/xend/XendNode.py Tue Aug 15 11:48:39 2006 -0500
@@ -56,6 +56,69 @@ class XendNode:
                 ['version', ver],
                 ['machine', mch]]
 
+    def list_to_rangepairs(self,cmap):
+            cmap.sort()
+            pairs = []
+            x = y = 0
+            for i in range(0,len(cmap)):
+                try:
+                    if ((cmap[y+1] - cmap[i]) > 1):
+                        pairs.append((cmap[x],cmap[y]))
+                        x = y = i+1
+                    else:
+                        y = y + 1
+                # if we go off the end, then just add x to y
+                except IndexError:
+                    pairs.append((cmap[x],cmap[y]))
+
+            return pairs
+
+    def format_pairs(self,pairs):
+            if not pairs:
+                return "no cpus"
+            out = ""
+            for f,s in pairs:
+                if (f==s):
+                    out += '%d'%f
+                else:
+                    out += '%d-%d'%(f,s)
+                out += ','
+            # trim trailing ','
+            return out[:-1]
+
+    def list_to_strrange(self,list):
+        return self.format_pairs(self.list_to_rangepairs(list))
+
+    def format_memchunks(self, pinfo):
+        str=''
+        whitespace=''
+        try:
+            chunk=pinfo['mem_chunks']
+            for i in range(0, pinfo['nr_nodes']):
+                str+='%snode%d:0x%016x-0x%016x\n' % (whitespace,
+                                                    chunk[i]['node'],
+                                                    chunk[i]['start_paddr'], 
+                                                    chunk[i]['end_paddr']) 
+                whitespace='%25s' % ''
+        except:
+            str='none\n' 
+        return str[:-1]
+        
+    def format_node_to_cpu(self, pinfo):
+        str=''
+        whitespace=''
+        try:
+            node_to_cpu=pinfo['node_to_cpu']
+            for i in range(0, pinfo['nr_nodes']):
+                str+='%snode%d:%s\n' % (whitespace,
+                                        i, 
+                                      self.list_to_strrange(node_to_cpu[i]))
+                whitespace='%25s' % ''        
+        except:
+            str='none\n'
+        return str[:-1];
+
+
     def physinfo(self):
         info = self.xc.physinfo()
 
@@ -67,6 +130,8 @@ class XendNode:
         # physinfo is in KiB
         info['total_memory'] = info['total_memory'] / 1024
         info['free_memory']  = info['free_memory'] / 1024
+        info['mem_chunks']   = self.format_memchunks(info)
+        info['node_to_cpu']  = self.format_node_to_cpu(info)
 
         ITEM_ORDER = ['nr_cpus',
                       'nr_nodes',
@@ -77,6 +142,8 @@ class XendNode:
                       'hw_caps',
                       'total_memory',
                       'free_memory',
+                      'mem_chunks',
+                      'node_to_cpu'
                       ]
 
         return [[k, info[k]] for k in ITEM_ORDER]
diff -r 4053cb1daebe tools/xenmon/xenbaked.c
--- a/tools/xenmon/xenbaked.c   Tue Aug 15 11:40:45 2006 -0500
+++ b/tools/xenmon/xenbaked.c   Tue Aug 15 11:45:07 2006 -0500
@@ -448,6 +448,9 @@ unsigned int get_num_cpus(void)
     int xc_handle = xc_interface_open();
     int ret;
 
+    /* ensure memory_chunks and node_to_cpu are NULL */
+    memset(&physinfo, 0, sizeof(physinfo));
+
     ret = xc_physinfo(xc_handle, &physinfo);
 
     if ( ret != 0 )
diff -r 4053cb1daebe tools/xenstat/libxenstat/src/xenstat.c
--- a/tools/xenstat/libxenstat/src/xenstat.c    Tue Aug 15 11:40:45 2006 -0500
+++ b/tools/xenstat/libxenstat/src/xenstat.c    Tue Aug 15 11:46:34 2006 -0500
@@ -222,6 +222,9 @@ xenstat_node *xenstat_get_node(xenstat_h
 
        /* Store the handle in the node for later access */
        node->handle = handle;
+
+   /* ensure memory_chunks and node_to_cpu are NULL */
+   memset(&physinfo, 0, sizeof(physinfo));
 
        /* Get information about the physical system */
        if (xc_physinfo(handle->xc_handle, &physinfo) < 0) {
diff -r 4053cb1daebe tools/xentrace/xentrace.c
--- a/tools/xentrace/xentrace.c Tue Aug 15 11:40:45 2006 -0500
+++ b/tools/xentrace/xentrace.c Tue Aug 15 11:47:07 2006 -0500
@@ -255,6 +255,9 @@ unsigned int get_num_cpus(void)
     int xc_handle = xc_interface_open();
     int ret;
     
+    /* ensure memory_chunks and node_to_cpu are NULL */
+    memset(&physinfo, 0, sizeof(physinfo));
+
     ret = xc_physinfo(xc_handle, &physinfo);
     
     if ( ret != 0 )
diff -r 4053cb1daebe tools/xm-test/tests/info/02_info_compiledata_pos.py
--- a/tools/xm-test/tests/info/02_info_compiledata_pos.py       Tue Aug 15 
11:40:45 2006 -0500
+++ b/tools/xm-test/tests/info/02_info_compiledata_pos.py       Tue Aug 15 
11:42:09 2006 -0500
@@ -18,9 +18,7 @@ for line in lines:
 for line in lines:
     pieces = line.split(" : ", 1)
 
-    if len(pieces) < 2:
-        FAIL("Found invalid line: [%s]" % line)
-    else:
+    if len(pieces) > 1:
         map[pieces[0]] = pieces[1]
 
 for field in ["cores_per_socket", "threads_per_core", "cpu_mhz",
diff -r 4053cb1daebe xen/arch/x86/dom0_ops.c
--- a/xen/arch/x86/dom0_ops.c   Tue Aug 15 11:40:45 2006 -0500
+++ b/xen/arch/x86/dom0_ops.c   Tue Aug 15 11:43:41 2006 -0500
@@ -25,6 +25,8 @@
 #include <asm/hvm/support.h>
 #include <asm/processor.h>
 #include <public/sched_ctl.h>
+#include <asm/numa.h>
+#include <asm/topology.h>
 
 #include <asm/mtrr.h>
 #include "cpu/mtrr/mtrr.h"
@@ -182,7 +184,11 @@ long arch_do_dom0_op(struct dom0_op *op,
 
     case DOM0_PHYSINFO:
     {
+        #define get_xen_guest_handle(val, hnd)  do { val = (hnd).p; } while (0)
         dom0_physinfo_t *pi = &op->u.physinfo;
+        int i,j;
+        node_data_t *chunks;
+        u64 *map, node_to_cpu_64[MAX_NUMNODES];
 
         pi->threads_per_core =
             cpus_weight(cpu_sibling_map[0]);
@@ -191,7 +197,6 @@ long arch_do_dom0_op(struct dom0_op *op,
         pi->sockets_per_node = 
             num_online_cpus() / cpus_weight(cpu_core_map[0]);
 
-        pi->nr_nodes         = 1;
         pi->total_pages      = total_pages;
         pi->free_pages       = avail_domheap_pages();
         pi->scrub_pages      = avail_scrub_pages();
@@ -199,6 +204,41 @@ long arch_do_dom0_op(struct dom0_op *op,
         memset(pi->hw_cap, 0, sizeof(pi->hw_cap));
         memcpy(pi->hw_cap, boot_cpu_data.x86_capability, NCAPINTS*4);
         ret = 0;
+
+        get_xen_guest_handle(chunks, op->u.physinfo.memory_chunks);
+        if ( chunks != NULL )
+            for_each_online_node(i) 
+            {
+                /* copy memory chunk structs to guest */
+                if ( copy_to_guest_offset(op->u.physinfo.memory_chunks, i, 
+                                          &(node_data[i]), 1) ) {
+                    ret = -EFAULT;
+                    break;
+                }
+            }
+
+        /* set number of notes */
+        pi->nr_nodes = num_online_nodes();
+
+        get_xen_guest_handle(map, op->u.physinfo.node_to_cpu);
+        if ( map != NULL )
+        {
+            /* copy cpu to node mapping to domU */
+            /* converting cpumask to u64 b/c userspace doesn't 
+             * know about cpumask_t and is accepting a u64 */
+            memset(node_to_cpu_64, 0, sizeof(node_to_cpu_64));
+            for ( i = 0; i < pi->nr_nodes; i++ ) {
+                for ( j = 0; j < num_online_cpus(); j++ )
+                    if ( cpu_isset(j, node_to_cpumask(i)) )
+                        node_to_cpu_64[i] |= (u64)1 << j;
+
+                if ( copy_to_guest_offset(op->u.physinfo.node_to_cpu, 
+                                          i, &(node_to_cpu_64[i]), 1) ) {
+                    ret = -EFAULT;
+                    break;
+                }
+            }
+        }
         if ( copy_to_guest(u_dom0_op, op, 1) )
             ret = -EFAULT;
     }
diff -r 4053cb1daebe xen/include/public/arch-x86_32.h
--- a/xen/include/public/arch-x86_32.h  Tue Aug 15 11:40:45 2006 -0500
+++ b/xen/include/public/arch-x86_32.h  Tue Aug 15 11:42:09 2006 -0500
@@ -30,6 +30,7 @@ __DEFINE_XEN_GUEST_HANDLE(uchar, unsigne
 __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
 __DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long);
+__DEFINE_XEN_GUEST_HANDLE(u64, uint64_t);
 DEFINE_XEN_GUEST_HANDLE(char);
 DEFINE_XEN_GUEST_HANDLE(int);
 DEFINE_XEN_GUEST_HANDLE(long);
diff -r 4053cb1daebe xen/include/public/arch-x86_64.h
--- a/xen/include/public/arch-x86_64.h  Tue Aug 15 11:40:45 2006 -0500
+++ b/xen/include/public/arch-x86_64.h  Tue Aug 15 11:42:09 2006 -0500
@@ -30,6 +30,7 @@ __DEFINE_XEN_GUEST_HANDLE(uchar, unsigne
 __DEFINE_XEN_GUEST_HANDLE(uchar, unsigned char);
 __DEFINE_XEN_GUEST_HANDLE(uint,  unsigned int);
 __DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long);
+__DEFINE_XEN_GUEST_HANDLE(u64, uint64_t);
 DEFINE_XEN_GUEST_HANDLE(char);
 DEFINE_XEN_GUEST_HANDLE(int);
 DEFINE_XEN_GUEST_HANDLE(long);
diff -r 4053cb1daebe xen/include/public/dom0_ops.h
--- a/xen/include/public/dom0_ops.h     Tue Aug 15 11:40:45 2006 -0500
+++ b/xen/include/public/dom0_ops.h     Tue Aug 15 11:42:09 2006 -0500
@@ -13,6 +13,7 @@
 
 #include "xen.h"
 #include "sched_ctl.h"
+#include "numa_structs.h"
 
 /*
  * Make sure you increment the interface version whenever you modify this file!
@@ -233,6 +234,8 @@ struct dom0_physinfo {
     uint64_t free_pages;
     uint64_t scrub_pages;
     uint32_t hw_cap[8];
+    XEN_GUEST_HANDLE(node_data_t) memory_chunks;
+    XEN_GUEST_HANDLE(u64) node_to_cpu;
 };
 typedef struct dom0_physinfo dom0_physinfo_t;
 DEFINE_XEN_GUEST_HANDLE(dom0_physinfo_t);
diff -r 4053cb1daebe xen/include/xen/numa.h
--- a/xen/include/xen/numa.h    Tue Aug 15 11:40:45 2006 -0500
+++ b/xen/include/xen/numa.h    Tue Aug 15 11:42:09 2006 -0500
@@ -2,6 +2,7 @@
 #define _XEN_NUMA_H
 
 #include <xen/config.h>
+#include <public/numa_structs.h>
 
 #ifdef CONFIG_DISCONTIGMEM
 #include <asm/numnodes.h>
@@ -26,10 +27,4 @@ extern unsigned int cpu_to_node[];
 #include <xen/cpumask.h>
 extern cpumask_t node_to_cpumask[];
 
-typedef struct node_data {
-    unsigned long node_start_pfn;
-    unsigned long node_spanned_pages;
-    unsigned int  node_id;
-} node_data_t;
-
 #endif /* _XEN_NUMA_H */
diff -r 4053cb1daebe xen/include/public/numa_structs.h
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/xen/include/public/numa_structs.h Tue Aug 15 11:42:09 2006 -0500
@@ -0,0 +1,26 @@
+/*
+ * Ryan Grimm  <grimm@xxxxxxxxxx>
+ * Ryan Harper <ryanh@xxxxxxxxxx>
+ * Copyright (c) 2006, International Business Machines Corporation.
+ *
+ */
+
+#ifndef __XEN_PUBLIC_NUMA_STRUCTS_H__
+
+#define __XEN_PUBLIC_NUMA_STRUCTS_H__
+
+#include "xen.h"
+
+/* define these for xc to use b/c MAX_NUMNODES and MAX_CHUNKS
+ * are not exposed in /public */
+#define PUBLIC_MAX_NUMNODES 16
+#define PUBLIC_MAXCHUNKS 32
+
+typedef struct node_data {
+    unsigned long node_start_pfn;
+    unsigned long node_spanned_pages;
+    unsigned int  node_id;
+} node_data_t;
+DEFINE_XEN_GUEST_HANDLE(node_data_t);
+
+#endif

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.