[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [XEN RFC PATCH 12/40] xen/x86: Move numa_initmem_init to common



Hi Wei,

On 11/08/2021 11:23, Wei Chen wrote:
This function can be reused by Arm device tree based
NUMA support. So we move it from x86 to common, as well
as its related variables and functions:
setup_node_bootmem, numa_init_array and numa_emulation.

As numa_initmem_init has been moved to common, _memnodemap
is not used cross files. We can restore _memnodemap to
static.

As we discussed on a previous patch, we should try to avoid this kind of dance. I can help to find a split that would achieve that.


Signed-off-by: Wei Chen <wei.chen@xxxxxxx>
---
  xen/arch/x86/numa.c         | 118 ----------------------------------
  xen/common/numa.c           | 122 +++++++++++++++++++++++++++++++++++-
  xen/include/asm-x86/numa.h  |   5 --
  xen/include/asm-x86/setup.h |   1 -
  xen/include/xen/numa.h      |   8 ++-
  5 files changed, 128 insertions(+), 126 deletions(-)

diff --git a/xen/arch/x86/numa.c b/xen/arch/x86/numa.c
index f2626b3968..6908738305 100644
--- a/xen/arch/x86/numa.c
+++ b/xen/arch/x86/numa.c
@@ -38,7 +38,6 @@ nodeid_t apicid_to_node[MAX_LOCAL_APIC] = {
nodemask_t __read_mostly node_online_map = { { [0] = 1UL } }; -bool numa_off;
  s8 acpi_numa = 0;
int srat_disabled(void)
@@ -46,123 +45,6 @@ int srat_disabled(void)
      return numa_off || acpi_numa < 0;
  }
-/* initialize NODE_DATA given nodeid and start/end */
-void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end)
-{
-    unsigned long start_pfn, end_pfn;
-
-    start_pfn = start >> PAGE_SHIFT;
-    end_pfn = end >> PAGE_SHIFT;
-
-    NODE_DATA(nodeid)->node_start_pfn = start_pfn;
-    NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
-
-    node_set_online(nodeid);
-}
-
-void __init numa_init_array(void)
-{
-    int rr, i;
-
-    /* There are unfortunately some poorly designed mainboards around
-       that only connect memory to a single CPU. This breaks the 1:1 cpu->node
-       mapping. To avoid this fill in the mapping for all possible
-       CPUs, as the number of CPUs is not known yet.
-       We round robin the existing nodes. */
-    rr = first_node(node_online_map);
-    for ( i = 0; i < nr_cpu_ids; i++ )
-    {
-        if ( cpu_to_node[i] != NUMA_NO_NODE )
-            continue;
-        numa_set_node(i, rr);
-        rr = cycle_node(rr, node_online_map);
-    }
-}
-
-#ifdef CONFIG_NUMA_EMU
-static int numa_fake __initdata = 0;
-
-/* Numa emulation */
-static int __init numa_emulation(u64 start_pfn, u64 end_pfn)
-{
-    int i;
-    struct node nodes[MAX_NUMNODES];
-    u64 sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;
-
-    /* Kludge needed for the hash function */
-    if ( hweight64(sz) > 1 )
-    {
-        u64 x = 1;
-        while ( (x << 1) < sz )
-            x <<= 1;
-        if ( x < sz/2 )
-            printk(KERN_ERR "Numa emulation unbalanced. Complain to 
maintainer\n");
-        sz = x;
-    }
-
-    memset(&nodes,0,sizeof(nodes));
-    for ( i = 0; i < numa_fake; i++ )
-    {
-        nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
-        if ( i == numa_fake - 1 )
-            sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
-        nodes[i].end = nodes[i].start + sz;
-        printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64" 
(%"PRIu64"MB)\n",
-               i,
-               nodes[i].start, nodes[i].end,
-               (nodes[i].end - nodes[i].start) >> 20);
-        node_set_online(i);
-    }
-    memnode_shift = compute_hash_shift(nodes, numa_fake, NULL);
-    if ( memnode_shift < 0 )
-    {
-        memnode_shift = 0;
-        printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
-        return -1;
-    }
-    for_each_online_node ( i )
-        setup_node_bootmem(i, nodes[i].start, nodes[i].end);
-    numa_init_array();
-
-    return 0;
-}
-#endif
-
-void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
-{
-    int i;
-
-#ifdef CONFIG_NUMA_EMU
-    if ( numa_fake && !numa_emulation(start_pfn, end_pfn) )
-        return;
-#endif
-
-#ifdef CONFIG_ACPI_NUMA
-    if ( !numa_off && !acpi_scan_nodes((u64)start_pfn << PAGE_SHIFT,
-         (u64)end_pfn << PAGE_SHIFT) )
-        return;
-#endif
-
-    printk(KERN_INFO "%s\n",
-           numa_off ? "NUMA turned off" : "No NUMA configuration found");
-
-    printk(KERN_INFO "Faking a node at %016"PRIx64"-%016"PRIx64"\n",
-           (u64)start_pfn << PAGE_SHIFT,
-           (u64)end_pfn << PAGE_SHIFT);
-    /* setup dummy node covering all memory */
-    memnode_shift = BITS_PER_LONG - 1;
-    memnodemap = _memnodemap;
-    memnodemapsize = ARRAY_SIZE(_memnodemap);
-
-    nodes_clear(node_online_map);
-    node_set_online(0);
-    for ( i = 0; i < nr_cpu_ids; i++ )
-        numa_set_node(i, 0);
-    cpumask_copy(&node_to_cpumask[0], cpumask_of(0));
-    setup_node_bootmem(0, (u64)start_pfn << PAGE_SHIFT,
-                    (u64)end_pfn << PAGE_SHIFT);
-}
-
  void numa_set_node(int cpu, nodeid_t node)
  {
      cpu_to_node[cpu] = node;
diff --git a/xen/common/numa.c b/xen/common/numa.c
index 1facc8fe2b..26c0006d04 100644
--- a/xen/common/numa.c
+++ b/xen/common/numa.c
@@ -14,12 +14,13 @@
  #include <xen/smp.h>
  #include <xen/pfn.h>
  #include <xen/sched.h>

NIT: We tend to add a newline betwen <xen/...> headers and <asm/...> headers.

+#include <asm/acpi.h>
struct node_data node_data[MAX_NUMNODES]; /* Mapping from pdx to node id */
  int memnode_shift;
-typeof(*memnodemap) _memnodemap[64];
+static typeof(*memnodemap) _memnodemap[64];
  unsigned long memnodemapsize;
  u8 *memnodemap;
@@ -34,6 +35,8 @@ int num_node_memblks;
  struct node node_memblk_range[NR_NODE_MEMBLKS];
  nodeid_t memblk_nodeid[NR_NODE_MEMBLKS];
+bool numa_off;
+
  /*
   * Given a shift value, try to populate memnodemap[]
   * Returns :
@@ -191,3 +194,120 @@ void numa_add_cpu(int cpu)
  {
      cpumask_set_cpu(cpu, &node_to_cpumask[cpu_to_node(cpu)]);
  }
+
+/* initialize NODE_DATA given nodeid and start/end */
+void __init setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end)

From an abstract PoV, start and end should be paddr_t. This should be done on a separate patch though.

+{
+    unsigned long start_pfn, end_pfn;
+
+    start_pfn = start >> PAGE_SHIFT;
+    end_pfn = end >> PAGE_SHIFT;
+
+    NODE_DATA(nodeid)->node_start_pfn = start_pfn;
+    NODE_DATA(nodeid)->node_spanned_pages = end_pfn - start_pfn;
+
+    node_set_online(nodeid);
+}
+
+void __init numa_init_array(void)
+{
+    int rr, i;
+
+    /* There are unfortunately some poorly designed mainboards around
+       that only connect memory to a single CPU. This breaks the 1:1 cpu->node
+       mapping. To avoid this fill in the mapping for all possible
+       CPUs, as the number of CPUs is not known yet.
+       We round robin the existing nodes. */
+    rr = first_node(node_online_map);
+    for ( i = 0; i < nr_cpu_ids; i++ )
+    {
+        if ( cpu_to_node[i] != NUMA_NO_NODE )
+            continue;
+        numa_set_node(i, rr);
+        rr = cycle_node(rr, node_online_map);
+    }
+}
+
+#ifdef CONFIG_NUMA_EMU
+int numa_fake __initdata = 0;
+
+/* Numa emulation */
+static int __init numa_emulation(u64 start_pfn, u64 end_pfn)

Here, this should be either "unsigned long" or ideally "mfn_t". Although, if you use "unsigned long", you will need to...

+{
+    int i;
+    struct node nodes[MAX_NUMNODES];
+    u64 sz = ((end_pfn - start_pfn)<<PAGE_SHIFT) / numa_fake;

... cast "(end_pfn - start_pfn)" to uin64_t or use pfn_to_paddr().

+
+    /* Kludge needed for the hash function */
+    if ( hweight64(sz) > 1 )
+    {
+        u64 x = 1;
+        while ( (x << 1) < sz )
+            x <<= 1;
+        if ( x < sz/2 )
+            printk(KERN_ERR "Numa emulation unbalanced. Complain to 
maintainer\n");
+        sz = x;
+    }
+
+    memset(&nodes,0,sizeof(nodes));
+    for ( i = 0; i < numa_fake; i++ )
+    {
+        nodes[i].start = (start_pfn<<PAGE_SHIFT) + i*sz;
+        if ( i == numa_fake - 1 )
+            sz = (end_pfn<<PAGE_SHIFT) - nodes[i].start;
+        nodes[i].end = nodes[i].start + sz;
+        printk(KERN_INFO "Faking node %d at %"PRIx64"-%"PRIx64" 
(%"PRIu64"MB)\n",
+               i,
+               nodes[i].start, nodes[i].end,
+               (nodes[i].end - nodes[i].start) >> 20);
+        node_set_online(i);
+    }
+    memnode_shift = compute_hash_shift(nodes, numa_fake, NULL);
+    if ( memnode_shift < 0 )
+    {
+        memnode_shift = 0;
+        printk(KERN_ERR "No NUMA hash function found. Emulation disabled.\n");
+        return -1;
+    }
+    for_each_online_node ( i )
+        setup_node_bootmem(i, nodes[i].start, nodes[i].end);
+    numa_init_array();
+
+    return 0;
+}
+#endif
+
+void __init numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn)
+{
+    int i;
+
+#ifdef CONFIG_NUMA_EMU
+    if ( numa_fake && !numa_emulation(start_pfn, end_pfn) )
+        return;
+#endif
+
+#ifdef CONFIG_ACPI_NUMA
+    if ( !numa_off && !acpi_scan_nodes((u64)start_pfn << PAGE_SHIFT,
+         (u64)end_pfn << PAGE_SHIFT) )

(u64)v << PAGE_SHIFT should be switched to use pfn_to_paddr() or mfn_to_paddr() if you decide to make start_pfn and end_pfn typesafe.

+        return;
+#endif
+
+    printk(KERN_INFO "%s\n",
+           numa_off ? "NUMA turned off" : "No NUMA configuration found");
+
+    printk(KERN_INFO "Faking a node at %016"PRIx64"-%016"PRIx64"\n",
+           (u64)start_pfn << PAGE_SHIFT,
+           (u64)end_pfn << PAGE_SHIFT);

Same remark here. PRIx64 would also have to be switched to PRIpaddr.

+    /* setup dummy node covering all memory */
+    memnode_shift = BITS_PER_LONG - 1;
+    memnodemap = _memnodemap;
+    memnodemapsize = ARRAY_SIZE(_memnodemap);
+
+    nodes_clear(node_online_map);
+    node_set_online(0);
+    for ( i = 0; i < nr_cpu_ids; i++ )
+        numa_set_node(i, 0);
+    cpumask_copy(&node_to_cpumask[0], cpumask_of(0));
+    setup_node_bootmem(0, (u64)start_pfn << PAGE_SHIFT,
+                    (u64)end_pfn << PAGE_SHIFT);
+}
diff --git a/xen/include/asm-x86/numa.h b/xen/include/asm-x86/numa.h
index e8a92ad9df..f8e4e15586 100644
--- a/xen/include/asm-x86/numa.h
+++ b/xen/include/asm-x86/numa.h
@@ -21,16 +21,11 @@ extern nodeid_t pxm_to_node(unsigned int pxm);
#define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) -extern void numa_init_array(void);
-extern bool numa_off;
-
-
  extern int srat_disabled(void);
  extern void numa_set_node(int cpu, nodeid_t node);
  extern nodeid_t setup_node(unsigned int pxm);
  extern void srat_detect_node(int cpu);
-extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);
  extern nodeid_t apicid_to_node[];
  extern void init_cpu_to_node(void);
diff --git a/xen/include/asm-x86/setup.h b/xen/include/asm-x86/setup.h
index 24be46115d..63838ba2d1 100644
--- a/xen/include/asm-x86/setup.h
+++ b/xen/include/asm-x86/setup.h
@@ -17,7 +17,6 @@ void early_time_init(void);
void set_nr_cpu_ids(unsigned int max_cpus); -void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
  void arch_init_memory(void);
  void subarch_init_memory(void);
diff --git a/xen/include/xen/numa.h b/xen/include/xen/numa.h
index 67b79a73a3..258a5cb3db 100644
--- a/xen/include/xen/numa.h
+++ b/xen/include/xen/numa.h
@@ -26,7 +26,6 @@
  extern int memnode_shift;
  extern unsigned long memnodemapsize;
  extern u8 *memnodemap;
-extern typeof(*memnodemap) _memnodemap[64];
struct node_data {
      unsigned long node_start_pfn;
@@ -69,6 +68,13 @@ extern int conflicting_memblks(u64 start, u64 end);
  extern void cutoff_node(int i, u64 start, u64 end);
  extern int valid_numa_range(u64 start, u64 end, nodeid_t node);
+extern void numa_init_array(void);
+extern void numa_initmem_init(unsigned long start_pfn, unsigned long end_pfn);
+extern bool numa_off;
+extern int numa_fake;
+
+extern void setup_node_bootmem(nodeid_t nodeid, u64 start, u64 end);
+
  #endif /* CONFIG_NUMA */
#endif /* _XEN_NUMA_H */


Cheers,

--
Julien Grall



 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.