[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH RFC 1/2] linux/vnuma: vnuma support for pv guest



Uses subop hypercall to request XEN about vnuma topology.
Sets the memory blocks (aligned by XEN), cpus, distance table
on boot. NUMA support should be compiled in kernel.

NUMA support should be compiled in kernel.

Memory blocks are aligned by xen. Xen is aware of guest initial
RAM layout.
If the memory blocks are incorrect, call for default linux numa
dummy init.

Requires XEN with vNUMA support.

Applies to 3.10 git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen.git
latest commit: 8bb495e3f02401ee6f76d1b1d77f3ac9f079e376

TODO:
Change subop from XENMEM to SYSCTL.

Signed-off-by: Elena Ufimtseva <ufimtseva@xxxxxxxxx>
---
 arch/x86/include/asm/xen/vnuma-xen.h |   36 +++++++++++
 arch/x86/mm/numa.c                   |    8 +++
 arch/x86/xen/enlighten.c             |  115 ++++++++++++++++++++++++++++++++++
 include/xen/interface/memory.h       |    1 +
 4 files changed, 160 insertions(+)
 create mode 100644 arch/x86/include/asm/xen/vnuma-xen.h

diff --git a/arch/x86/include/asm/xen/vnuma-xen.h 
b/arch/x86/include/asm/xen/vnuma-xen.h
new file mode 100644
index 0000000..73c1bde
--- /dev/null
+++ b/arch/x86/include/asm/xen/vnuma-xen.h
@@ -0,0 +1,36 @@
+#ifndef _ASM_X86_VNUMA_XEN_H
+#define _ASM_X86_VNUMA_XEN_H
+
+#ifdef CONFIG_XEN
+int __initdata xen_numa_init(void);
+
+struct vnuma_memblk {
+                 uint64_t start, end;
+};
+DEFINE_GUEST_HANDLE_STRUCT(vnuma_memblk);
+
+struct vnuma_topology_info {
+       domid_t domid;
+       uint16_t nr_nodes;
+       GUEST_HANDLE(vnuma_memblk) vmemblk;
+       GUEST_HANDLE(int) vdistance;
+       GUEST_HANDLE(int) cpu_to_node;
+       GUEST_HANDLE(int) vnode_to_pnode;
+};
+DEFINE_GUEST_HANDLE_STRUCT(vnuma_topology_info);
+
+struct xen_domctl {
+       uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
+       domid_t  domain;
+       union {
+               struct vnuma_topology_info vnuma;
+               } u;
+};
+typedef struct xen_domctl xen_domctl_t;
+DEFINE_GUEST_HANDLE_STRUCT(xen_domctl_t);
+
+#else
+int __init xen_numa_init(void) {}
+#endif
+
+#endif /* _ASM_X86_VNUMA_XEN_H */
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 8bf93ba..3ec7855 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -20,6 +20,10 @@
 
 #include "numa_internal.h"
 
+#ifdef CONFIG_XEN
+#include "asm/xen/vnuma-xen.h"
+#endif
+
 int __initdata numa_off;
 nodemask_t numa_nodes_parsed __initdata;
 
@@ -621,6 +625,10 @@ static int __init dummy_numa_init(void)
 void __init x86_numa_init(void)
 {
        if (!numa_off) {
+#ifdef CONFIG_XEN
+               if (xen_pv_domain() && !numa_init(xen_numa_init))
+                       return;
+#endif
 #ifdef CONFIG_X86_NUMAQ
                if (!numa_init(numaq_numa_init))
                        return;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 193097e..4658e9d 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -33,6 +33,7 @@
 #include <linux/memblock.h>
 #include <linux/edd.h>
 
+#include <asm/numa.h>
 #include <xen/xen.h>
 #include <xen/events.h>
 #include <xen/interface/xen.h>
@@ -69,6 +70,7 @@
 #include <asm/mwait.h>
 #include <asm/pci_x86.h>
 #include <asm/pat.h>
+#include <asm/xen/vnuma-xen.h>
 
 #ifdef CONFIG_ACPI
 #include <linux/acpi.h>
@@ -1750,4 +1752,117 @@ const struct hypervisor_x86 x86_hyper_xen_hvm 
__refconst = {
        .x2apic_available       = xen_x2apic_para_available,
 };
 EXPORT_SYMBOL(x86_hyper_xen_hvm);
+
+/* Xen PV NUMA topology initialization */
+int __initdata xen_numa_init(void)
+{
+       struct vnuma_memblk *vblk;
+       struct vnuma_topology_info numa_topo;
+       uint64_t size, start, end;
+       int i, j, cpu, rc;
+       u64 phys, physd, physc;
+       u64 new_end_pfn;
+       size_t mem_size;
+
+       int *vdistance, *cpu_to_node;
+       unsigned long dist_size, cpu_to_node_size;
+       numa_topo.domid = DOMID_SELF;
+       rc = -EINVAL;
+       /* block mem reservation for memblks */
+       mem_size = num_possible_cpus() * sizeof(struct vnuma_memblk);
+       phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), mem_size, 
PAGE_SIZE);
+       if (!phys) {
+               pr_info("vNUMA: can't allocate memory for membloks size %zu\n", 
mem_size);
+               rc = -ENOMEM;
+               goto errout;
+       }
+       if (memblock_reserve(phys, mem_size) < 0) {
+               pr_info("vNUMA: Cannot reserver mem for blocks of size %zu \n", 
mem_size);
+               rc = -ENOMEM;
+               goto errout;
+       }
+       vblk = __va(phys);
+       set_xen_guest_handle(numa_topo.vmemblk, vblk);
+
+       dist_size = num_possible_cpus() * num_possible_cpus() * 
sizeof(*numa_topo.vdistance);
+       physd = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), dist_size, 
PAGE_SIZE);
+       if (!physd) {
+               pr_info("vNUMA: can't allocate memory for distance size %zu\n", 
dist_size);
+               rc = -ENOMEM;
+               goto errout;
+       }
+       if (memblock_reserve(physd, dist_size) < 0) {
+               pr_info("vNUMA: Cannot reserver mem for blocks of size %zu \n", 
dist_size);
+               rc = -ENOMEM;
+               goto errout;
+       }
+       vdistance  = __va(physd);
+       set_xen_guest_handle(numa_topo.vdistance, (int *)vdistance);
+
+       /* allocate memblock for vcpu to node mask of max size */
+       cpu_to_node_size = num_possible_cpus() * sizeof(*numa_topo.cpu_to_node);
+       physc = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), 
cpu_to_node_size, PAGE_SIZE);
+       if (!physc) {
+               pr_info("vNUMA: can't allocate memory for distance size %zu\n", 
cpu_to_node_size);
+               rc = -ENOMEM;
+               goto errout;
+       }
+       if (memblock_reserve(physc, cpu_to_node_size) < 0) {
+               pr_info("vNUMA: Cannot reserver mem for blocks of size %zu\n", 
cpu_to_node_size);
+               goto errout;
+       }
+       cpu_to_node  = __va(physc);
+       set_xen_guest_handle(numa_topo.cpu_to_node, (int *)cpu_to_node);
+       if (HYPERVISOR_memory_op(XENMEM_get_vnuma_info, &numa_topo) < 0)
+               goto errout;
+       if (numa_topo.nr_nodes == 0)
+               /* Pass to DUMMY numa */
+               goto errout;
+       if (numa_topo.nr_nodes > num_possible_cpus()) {
+               pr_info("vNUMA: Node without cpu is not supported, nodes = 
%d\n", numa_topo.nr_nodes);
+               goto errout;
+       }
+       new_end_pfn = 0;
+       /* We have sizes in pages received from hypervisor, no holes and 
ordered */
+       for (i = 0; i < numa_topo.nr_nodes; i++) {
+               start = vblk[i].start;
+               end = vblk[i].end;
+               size = end - start;
+               pr_info("VNMA blk[%d] size is %#010Lx start = %#010Lx end = 
%#010Lx\n",
+                       i, size, start, end);
+               if (numa_add_memblk(i, start, end)) {
+                       pr_info("vNUMA: Could not add memblk[%d]\n", i);
+                       rc = -EAGAIN;
+                       goto errout;
+               }
+               node_set(i, numa_nodes_parsed);
+       }
+       setup_nr_node_ids();
+       /* Setting the cpu, apicid to node */
+       for_each_cpu(cpu, cpu_possible_mask) {
+               pr_info("Setting up vcpu[%d] to node[%d]\n", cpu, 
cpu_to_node[cpu]);
+               set_apicid_to_node(cpu, cpu_to_node[cpu]);
+               numa_set_node(cpu, cpu_to_node[cpu]);
+               __apicid_to_node[cpu] = cpu_to_node[cpu];
+               cpumask_set_cpu(cpu, node_to_cpumask_map[cpu_to_node[cpu]]);
+       }
+       setup_nr_node_ids();
+       rc = 0;
+       for (i = 0; i < numa_topo.nr_nodes; i++)
+               for (j = 0; j < numa_topo.nr_nodes; j++) {
+                       numa_set_distance(i, j, *(vdistance + 
j*numa_topo.nr_nodes + i));
+                       pr_info("distance %d <-> %d = %d\n", i, j, *(vdistance 
+ j*numa_topo.nr_nodes + i));
+               }
+errout:
+       if (phys)
+               memblock_free(__pa(phys), mem_size);
+       if (physd)
+               memblock_free(__pa(physd), dist_size);
+       if (physc)
+               memblock_free(__pa(physc), cpu_to_node_size);
+       if (rc)
+               pr_debug("XEN: hypercall failed to get vNUMA topology.\n");
+       return rc;
+}
+
 #endif
diff --git a/include/xen/interface/memory.h b/include/xen/interface/memory.h
index 2ecfe4f..16b8d87 100644
--- a/include/xen/interface/memory.h
+++ b/include/xen/interface/memory.h
@@ -263,4 +263,5 @@ struct xen_remove_from_physmap {
 };
 DEFINE_GUEST_HANDLE_STRUCT(xen_remove_from_physmap);
 
+#define XENMEM_get_vnuma_info  25
 #endif /* __XEN_PUBLIC_MEMORY_H__ */
-- 
1.7.10.4


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.