[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] [PATCH v10 01/13] x86: add socket_cpumask



On 06/26/2015 04:43 AM, Chao Peng wrote:
Maintain socket_cpumask which contains all the HT and core siblings
in the same socket.

Signed-off-by: Chao Peng <chao.p.peng@xxxxxxxxxxxxxxx>
Acked-by: Jan Beulich <jbeulich@xxxxxxxx>
---
Changes in v9:
* Add comments for set_nr_sockets.
* Move set_nr_sockets() invocation from __start_xen() to smp_prepare_cpus().
Changes in v8:
* Remove total_cpus and retrofit the algorithm for calculating nr_sockets.
* Change per-socket cpumask allocation as on demand.
* socket_to_cpumask => socket_cpumask.
Changes in v7:
* Introduce total_cpus to calculate nr_sockets.
* Minor code sequence improvement in set_cpu_sibling_map.
* Improve comments for nr_sockets.
---
  xen/arch/x86/mpparse.c    | 17 +++++++++++++++++
  xen/arch/x86/smpboot.c    | 26 +++++++++++++++++++++++++-
  xen/include/asm-x86/smp.h | 11 +++++++++++
  3 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/mpparse.c b/xen/arch/x86/mpparse.c
index 003c56e..8609f4a 100644
--- a/xen/arch/x86/mpparse.c
+++ b/xen/arch/x86/mpparse.c
@@ -87,6 +87,23 @@ void __init set_nr_cpu_ids(unsigned int max_cpus)
  #endif
  }
+void __init set_nr_sockets(void)
+{
+    /*
+     * Count the actual cpus in the socket 0 and use it to calculate nr_sockets
+     * so that the latter will be always >= the actual socket number in the
+     * system even when APIC IDs from MP table are too sparse.
+     */
+    unsigned int cpus = bitmap_weight(phys_cpu_present_map.mask,
+                                      boot_cpu_data.x86_max_cores *
+                                      boot_cpu_data.x86_num_siblings);
+
+    if ( cpus == 0 )
+        cpus = 1;
+
+    nr_sockets = DIV_ROUND_UP(num_processors + disabled_cpus, cpus);
+}
+
  /*
   * Intel MP BIOS table parsing routines:
   */
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 2289284..e75bbd3 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -60,6 +60,9 @@ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_core_mask);
  cpumask_t cpu_online_map __read_mostly;
  EXPORT_SYMBOL(cpu_online_map);
+unsigned int __read_mostly nr_sockets;
+cpumask_var_t *__read_mostly socket_cpumask;
+
  struct cpuinfo_x86 cpu_data[NR_CPUS];
u32 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
@@ -245,6 +248,8 @@ static void set_cpu_sibling_map(int cpu)
cpumask_set_cpu(cpu, &cpu_sibling_setup_map); + cpumask_set_cpu(cpu, socket_cpumask[cpu_to_socket(cpu)]);

This patch crashes Xen on my 32-cpu Intel box here for cpu 16, which is the first CPU on the second socket (i.e. on socket 1).

The reason appears to be that cpu_to_socket(16) is (correctly) 1 here, but ...

+
      if ( c[cpu].x86_num_siblings > 1 )
      {
          for_each_cpu ( i, &cpu_sibling_setup_map )
@@ -649,7 +654,13 @@ void cpu_exit_clear(unsigned int cpu)
static void cpu_smpboot_free(unsigned int cpu)
  {
-    unsigned int order;
+    unsigned int order, socket = cpu_to_socket(cpu);
+
+    if ( cpumask_empty(socket_cpumask[socket]) )
+    {
+        free_cpumask_var(socket_cpumask[socket]);
+        socket_cpumask[socket] = NULL;
+    }
free_cpumask_var(per_cpu(cpu_sibling_mask, cpu));
      free_cpumask_var(per_cpu(cpu_core_mask, cpu));
@@ -694,6 +705,7 @@ static int cpu_smpboot_alloc(unsigned int cpu)
      nodeid_t node = cpu_to_node(cpu);
      struct desc_struct *gdt;
      unsigned long stub_page;
+    unsigned int socket = cpu_to_socket(cpu);

... is zero here, meaning that socket_cpumask[1] is NULL. I suspect that phys_proc_id is probably not set at this point but is by the time we get to set_cpu_sibling_map(). I haven't looked any further yet. I might do this tomorrow unless Chao does it before me.

Here is what I have

[root@ovs104 ~]# numactl --hardware
available: 2 nodes (0-1)
node 0 cpus: 0 1 2 3 4 5 6 7 16 17 18 19 20 21 22 23
node 0 size: 15748 MB
node 0 free: 15466 MB
node 1 cpus: 8 9 10 11 12 13 14 15 24 25 26 27 28 29 30 31
node 1 size: 16118 MB
node 1 free: 15896 MB
node distances:
node   0   1
  0:  10  21
  1:  21  10
[root@ovs104 ~]# grep processor /proc/cpuinfo |wc -l
32
[root@ovs104 ~]# head -20 /proc/cpuinfo
processor    : 0
vendor_id    : GenuineIntel
cpu family    : 6
model        : 45
model name    : Genuine Intel(R) CPU  @ 2.30GHz
stepping    : 2
microcode    : 0x8000020c
cpu MHz        : 1231.937
cache size    : 20480 KB
physical id    : 0
siblings    : 16
core id        : 0
cpu cores    : 8
apicid        : 0
initial apicid    : 0
fpu        : yes
fpu_exception    : yes
cpuid level    : 13
wp        : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq dtes64 monitor ds_cpl vmx smx est tm2 ssse3 cx16 xtpr pdcm pcid dca sse4_1 sse4_2 x2apic popcnt tsc_deadline_timer aes xsave avx lahf_lm ida arat epb pln pts dtherm tpr_shadow vnmi flexpriority ept vpid xsaveopt

-boris


if ( node != NUMA_NO_NODE )
          memflags = MEMF_node(node);
@@ -736,6 +748,10 @@ static int cpu_smpboot_alloc(unsigned int cpu)
          goto oom;
      per_cpu(stubs.addr, cpu) = stub_page + STUB_BUF_CPU_OFFS(cpu);
+ if ( !socket_cpumask[socket] &&
+         !zalloc_cpumask_var(socket_cpumask + socket) )
+        goto oom;
+
      if ( zalloc_cpumask_var(&per_cpu(cpu_sibling_mask, cpu)) &&
           zalloc_cpumask_var(&per_cpu(cpu_core_mask, cpu)) )
          return 0;
@@ -786,6 +802,12 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
stack_base[0] = stack_start; + set_nr_sockets();
+
+    socket_cpumask = xzalloc_array(cpumask_var_t, nr_sockets);
+    if ( !socket_cpumask || !zalloc_cpumask_var(socket_cpumask) )
+        panic("No memory for socket CPU siblings map");
+
      if ( !zalloc_cpumask_var(&per_cpu(cpu_sibling_mask, 0)) ||
           !zalloc_cpumask_var(&per_cpu(cpu_core_mask, 0)) )
          panic("No memory for boot CPU sibling/core maps");
@@ -851,6 +873,8 @@ remove_siblinginfo(int cpu)
      int sibling;
      struct cpuinfo_x86 *c = cpu_data;
+ cpumask_clear_cpu(cpu, socket_cpumask[cpu_to_socket(cpu)]);
+
      for_each_cpu ( sibling, per_cpu(cpu_core_mask, cpu) )
      {
          cpumask_clear_cpu(cpu, per_cpu(cpu_core_mask, sibling));
diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h
index 67518cf..e594062 100644
--- a/xen/include/asm-x86/smp.h
+++ b/xen/include/asm-x86/smp.h
@@ -58,6 +58,17 @@ int hard_smp_processor_id(void);
void __stop_this_cpu(void); +/*
+ * The value may be greater than the actual socket number in the system and
+ * is required not to change from the initial startup.
+ */
+extern unsigned int nr_sockets;
+
+void set_nr_sockets(void);
+
+/* Representing HT and core siblings in each socket. */
+extern cpumask_var_t *socket_cpumask;
+
  #endif /* !__ASSEMBLY__ */
#endif


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.