[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1/2] xen: vnuma support for PV guests running as domU.



Issues Xen hypercall subop XENMEM_get_vnumainfo and sets the
NUMA topology, otherwise sets dummy NUMA node and prevents
numa_init from calling other numa initializators as they may
break other guests.

Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx>
---
 arch/x86/include/asm/xen/vnuma.h |   12 ++++
 arch/x86/mm/numa.c               |    5 ++
 arch/x86/xen/Makefile            |    2 +-
 arch/x86/xen/vnuma.c             |  119 ++++++++++++++++++++++++++++++++++++++
 include/xen/interface/memory.h   |   28 +++++++++
 5 files changed, 165 insertions(+), 1 deletion(-)
 create mode 100644 arch/x86/include/asm/xen/vnuma.h
 create mode 100644 arch/x86/xen/vnuma.c

diff --git a/arch/x86/include/asm/xen/vnuma.h b/arch/x86/include/asm/xen/vnuma.h
new file mode 100644
index 0000000..1ba1e06
--- /dev/null
+++ b/arch/x86/include/asm/xen/vnuma.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_X86_VNUMA_H
+#define _ASM_X86_VNUMA_H
+
+#ifdef CONFIG_XEN
+int xen_vnuma_supported(void);
+int xen_numa_init(void);
+#else
+int xen_vnuma_supported(void) { return 0; };
+int xen_numa_init(void) { return -1; };
+#endif
+
+#endif /* _ASM_X86_VNUMA_H */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 8bf93ba..c8a61dc 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -19,6 +19,7 @@
 #include <asm/amd_nb.h>
 
 #include "numa_internal.h"
+#include "asm/xen/vnuma.h"
 
 int __initdata numa_off;
 nodemask_t numa_nodes_parsed __initdata;
@@ -621,6 +622,10 @@ static int __init dummy_numa_init(void)
 void __init x86_numa_init(void)
 {
        if (!numa_off) {
+#ifdef CONFIG_XEN
+               if (xen_vnuma_supported() && !numa_init(xen_numa_init))
+                       return;
+#endif
 #ifdef CONFIG_X86_NUMAQ
                if (!numa_init(numaq_numa_init))
                        return;
diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile
index 96ab2c0..de9deab 100644
--- a/arch/x86/xen/Makefile
+++ b/arch/x86/xen/Makefile
@@ -13,7 +13,7 @@ CFLAGS_mmu.o                  := $(nostackp)
 obj-y          := enlighten.o setup.o multicalls.o mmu.o irq.o \
                        time.o xen-asm.o xen-asm_$(BITS).o \
                        grant-table.o suspend.o platform-pci-unplug.o \
-                       p2m.o
+                       p2m.o vnuma.o
 
 obj-$(CONFIG_EVENT_TRACING) += trace.o
 
diff --git a/arch/x86/xen/vnuma.c b/arch/x86/xen/vnuma.c
new file mode 100644
index 0000000..b4fc667
--- /dev/null
+++ b/arch/x86/xen/vnuma.c
@@ -0,0 +1,119 @@
+#include <linux/err.h>
+#include <linux/memblock.h>
+#include <xen/interface/xen.h>
+#include <xen/interface/memory.h>
+#include <asm/xen/interface.h>
+#include <asm/xen/hypercall.h>
+#include <asm/xen/vnuma.h>
+
+#ifdef CONFIG_NUMA
+
+/* Checks if hypercall is suported */
+int xen_vnuma_supported()
+{
+       return HYPERVISOR_memory_op(XENMEM_get_vnuma_info, NULL) == -ENOSYS ? 0 
: 1;
+}
+
+int __init xen_numa_init(void)
+{
+       int rc;
+       unsigned int i, j, nr_nodes, cpu, idx, pcpus;
+       u64 physm, physd, physc;
+       unsigned int *vdistance, *cpu_to_node;
+       unsigned long mem_size, dist_size, cpu_to_node_size;
+       struct vmemrange *vblock;
+
+       struct vnuma_topology_info numa_topo = {
+               .domid = DOMID_SELF,
+               .__pad = 0
+       };
+       rc = -EINVAL;
+
+       /* For now only PV guests are supported */
+       if (!xen_pv_domain())
+               return rc;
+
+       pcpus = num_possible_cpus();
+
+       mem_size =  pcpus * sizeof(struct vmemrange);
+       dist_size = pcpus * pcpus * sizeof(*numa_topo.vdistance);
+       cpu_to_node_size = pcpus * sizeof(*numa_topo.cpu_to_node);
+
+       physm = memblock_alloc(mem_size, PAGE_SIZE);
+       vblock = __va(physm);
+
+       physd = memblock_alloc(dist_size, PAGE_SIZE);
+       vdistance  = __va(physd);
+
+       physc = memblock_alloc(cpu_to_node_size, PAGE_SIZE);
+       cpu_to_node  = __va(physc);
+
+       if (!physm || !physc || !physd)
+               goto vnumaout;
+
+       set_xen_guest_handle(numa_topo.nr_nodes, &nr_nodes);
+       set_xen_guest_handle(numa_topo.vmemblks, vblock);
+       set_xen_guest_handle(numa_topo.vdistance, vdistance);
+       set_xen_guest_handle(numa_topo.cpu_to_node, cpu_to_node);
+
+       rc = HYPERVISOR_memory_op(XENMEM_get_vnuma_info, &numa_topo);
+
+       if (rc < 0)
+               goto vnumaout;
+       if (*numa_topo.nr_nodes == 0) {
+               /* will pass to dummy_numa_init */
+               goto vnumaout;
+       }
+       if (*numa_topo.nr_nodes > num_possible_cpus()) {
+               pr_debug("vNUMA: Node without cpu is not supported in this 
version.\n");
+               goto vnumaout;
+       }
+       /*
+        * NUMA nodes memory ranges are in pfns, constructed and
+        * aligned based on e820 ram domain map.
+        */
+       for (i = 0; i < *numa_topo.nr_nodes; i++) {
+               if (numa_add_memblk(i, vblock[i].start, vblock[i].end))
+                       /* pass to numa_dummy_init */
+                       goto vnumaout;
+               node_set(i, numa_nodes_parsed);
+       }
+       setup_nr_node_ids();
+       /* Setting the cpu, apicid to node */
+       for_each_cpu(cpu, cpu_possible_mask) {
+               set_apicid_to_node(cpu, cpu_to_node[cpu]);
+               numa_set_node(cpu, cpu_to_node[cpu]);
+               cpumask_set_cpu(cpu, node_to_cpumask_map[cpu_to_node[cpu]]);
+       }
+       for (i = 0; i < *numa_topo.nr_nodes; i++) {
+               for (j = 0; j < *numa_topo.nr_nodes; j++) {
+                       idx = (j * *numa_topo.nr_nodes) + i;
+                       numa_set_distance(i, j, *(vdistance + idx));
+               }
+       }
+       rc = 0;
+vnumaout:
+       if (physm)
+               memblock_free(__pa(physm), mem_size);
+       if (physd)
+               memblock_free(__pa(physd), dist_size);
+       if (physc)
+               memblock_free(__pa(physc), cpu_to_node_size);
+       /*
+        * Set the "dummy" node and exit without error so Linux
+        * will not try any NUMA init functions which might break
+        * guests in the future. This will discard all previous
+        * settings.
+        */
+       if (rc != 0) {
+               for (i = 0; i < MAX_LOCAL_APIC; i++)
+                       set_apicid_to_node(i, NUMA_NO_NODE);
+               nodes_clear(numa_nodes_parsed);
+               nodes_clear(node_possible_map);
+               nodes_clear(node_online_map);
+               node_set(0, numa_nodes_parsed);
+               numa_add_memblk(0, 0, PFN_PHYS(max_pfn));
+       }
+       return 0;
+}
+#endif
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index 2ecfe4f..3974e9a 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -263,4 +263,32 @@ struct xen_remove_from_physmap {
 };
 DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
 
+/* vNUMA structures */
+struct vmemrange {
+       uint64_t start, end;
+       struct vmemrange *next;
+};
+DEFINE_GUEST_HANDLE_STRUCT(vmemrange);
+
+struct vnuma_topology_info {
+       /* OUT */
+       domid_t domid;
+       uint32_t __pad;
+       /* IN */
+       GUEST_HANDLE(uint) nr_nodes; /* number of virtual numa nodes */
+       /* distance table */
+       GUEST_HANDLE(uint) vdistance;
+       /* cpu mapping to vnodes */
+       GUEST_HANDLE(uint) cpu_to_node;
+       /*
+       * array of numa memory areas constructed by Xen
+       * where start and end are pfn numbers of the area
+       * Xen takes into account domains e820 map
+       */
+       GUEST_HANDLE(vmemrange) vmemblks;
+};
+DEFINE_GUEST_HANDLE_STRUCT(vnuma_topology_info);
+
+#define XENMEM_get_vnuma_info  25
+
 #endif /* __XEN_PUBLIC_MEMORY_H__ */
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.