WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 5/5] [POST-4.0]: HVM NUMA guest: add xc_nodeload() fu

To: Keir Fraser <keir.fraser@xxxxxxxxxxxxx>, "Kamble, Nitin A" <nitin.a.kamble@xxxxxxxxx>
Subject: [Xen-devel] [PATCH 5/5] [POST-4.0]: HVM NUMA guest: add xc_nodeload() function
From: Andre Przywara <andre.przywara@xxxxxxx>
Date: Thu, 4 Feb 2010 22:56:21 +0100
Cc: "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Delivery-date: Thu, 04 Feb 2010 13:56:00 -0800
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
User-agent: Thunderbird 2.0.0.18 (X11/20081105)
This is actually work in progress, as it's just the base function which should be used libxc's setup_guest(). It mimics the current XendDomainInfo.py:find_relaxed_node() code, which can eventually go away. The function iterates over all VCPUs in all domains to determine each node's load. It considers VCPUs pinned to single nodes vs. unpinned VCPUs which result in a lower load for each node (although the sum is equal). Also added is the amount of free memory, this can be used to sequentially determine the best host node for each guest node.

Signed-off-by: Andre Przywara <andre.przywara@xxxxxxx>

Regards,
Andre.

--
Andre Przywara
AMD-Operating System Research Center (OSRC), Dresden, Germany
Tel: +49 351 488-3567-12
----to satisfy European Law for business letters:
Advanced Micro Devices GmbH
Karl-Hammerschmidt-Str. 34, 85609 Dornach b. Muenchen
Geschaeftsfuehrer: Andrew Bowd; Thomas M. McCoy; Giuliano Meroni
Sitz: Dornach, Gemeinde Aschheim, Landkreis Muenchen
Registergericht Muenchen, HRB Nr. 43632
commit 3199a0f02fdd8379bd8694ac57f307234075d0c2
Author: Andre Przywara <andre.przywara@xxxxxxx>
Date:   Thu Feb 4 13:51:53 2010 +0100

    added xc_nodeload() function

diff --git a/tools/libxc/xc_misc.c b/tools/libxc/xc_misc.c
index 04da3d2..9ae61b6 100644
--- a/tools/libxc/xc_misc.c
+++ b/tools/libxc/xc_misc.c
@@ -424,6 +424,70 @@ xc_map_foreign_bulk(int xc_handle, uint32_t dom, int prot,
     return ret;
 }
 
+#define MAX_DOMS 8
+#define MAX_CPU_ID 255
+
+struct xc_nodeload* xc_getnodeload(int xc_handle,
+                                   int *ret_nrnodes)
+{
+       xc_dominfo_t dominfo[MAX_DOMS];
+       xc_vcpuinfo_t vcpuinfo;
+       int nextdom = 0;
+       int nrdoms, dom, vcpu, i;
+       xc_physinfo_t physinfo;
+       uint32_t nodemap[MAX_CPU_ID + 1];
+       int nrcpus, nrnodes, sum;
+       uint64_t cpumap;
+       int *curload;
+    struct xc_nodeload *nodeload;
+
+       set_xen_guest_handle(physinfo.cpu_to_node, nodemap);
+       physinfo.max_cpu_id = MAX_CPU_ID;
+
+    if (xc_physinfo(xc_handle, &physinfo) != 0)
+        return NULL;
+    nrnodes = physinfo.max_node_id + 1;
+    nrcpus = physinfo.max_cpu_id + 1;
+    curload = malloc(nrnodes * sizeof(int));
+    nodeload = malloc(nrnodes * sizeof(struct xc_nodeload));
+    for (i = 0; i < nrnodes; i++) {
+        nodeload[i].node = i;
+        nodeload[i].load = 0;
+        xc_availheap(xc_handle, 0, 0, i, &nodeload[i].freemem);
+        nodeload[i].freemem /= 1024;
+    }
+
+    while ((nrdoms = xc_domain_getinfo(xc_handle,
+                                       nextdom, MAX_DOMS, dominfo)) != -1) {
+        for (dom = 0; dom < nrdoms; dom++) {
+            for (vcpu = 0; vcpu <= dominfo[dom].max_vcpu_id; vcpu++) {
+                xc_vcpu_getinfo(xc_handle, dominfo[dom].domid, vcpu, 
&vcpuinfo);
+                if (!vcpuinfo.online)
+                    continue;
+                   xc_vcpu_getaffinity(xc_handle, dominfo[dom].domid, vcpu,
+                                       &cpumap);
+                memset(curload, 0, sizeof(int) * nrnodes);
+                for (i = sum = 0; i < nrcpus; i++) {
+                               if ((1ULL << i) & cpumap) {
+                                       if (curload[nodemap[i]] == 0)
+                                               sum++;
+                                       curload[nodemap[i]] = 1;
+                               }
+                }
+                for (i = 0; i < nrnodes; i++)
+                   nodeload[i].load += curload[i] * nrnodes / sum;
+            }
+        }
+        if (nrdoms < 2)
+            break;
+        nextdom = dominfo[nrdoms - 1].domid + 1;
+    }
+    free(curload);
+    if (ret_nrnodes != NULL)
+        *ret_nrnodes = nrnodes;
+    return nodeload;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
index 79c1fd4..0b6ef9a 100644
--- a/tools/libxc/xenctrl.h
+++ b/tools/libxc/xenctrl.h
@@ -613,6 +613,22 @@ typedef uint32_t xc_cpu_to_node_t;
 int xc_physinfo(int xc_handle,
                 xc_physinfo_t *info);
 
+struct xc_nodeload {
+    int node;
+    int load;
+    uint64_t freemem;
+};
+
+/* Iterate over all domains and VCPUs to get an estimate of the load
+ * distribution of the NUMA nodes. Returns the malloced array with
+ * an entry for each node. ret_nrnodes is optional (can be NULL) and
+ * contains the number of entries in the array.
+ * The node member contains the node number, useful if you sort the array.
+ * The load value is a relative metric, freemem is given in KB.
+ */
+struct xc_nodeload* xc_getnodeload(int xc_handle,
+                                   int *ret_nrnodes);
+
 int xc_sched_id(int xc_handle,
                 int *sched_id);
 
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel
<Prev in Thread] Current Thread [Next in Thread>