[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 1/4] x86/mcheck: allow varying bank counts per CPU



Up to now we've been assuming that all CPUs would have the same number
of reporting banks. However, on upcoming AMD CPUs this isn't the case,
and one can observe

(XEN) mce.c:666: Different bank number on cpu <N>

indicating that Machine Check support would not be enabled on the
affected CPUs. Convert the count variable to a per-CPU one, and adjust
code where needed to cope with the values not being the same. In
particular the mcabanks_alloc() invocations during AP bringup need to
now allocate maximum-size bitmaps, because the truly needed size can't
be known until we actually execute on that CPU, yet mcheck_init() gets
called too early to do any allocations itself.

Take the liberty and also
- make mca_cap_init() static,
- replace several __get_cpu_var() uses when a local variable suitable
  for use with per_cpu() appears,
- correct which CPU's cpu_data[] entry x86_mc_msrinject_verify() uses,
- replace a BUG() by panic().

Signed-off-by: Jan Beulich <jbeulich@xxxxxxxx>

--- a/xen/arch/x86/cpu/mcheck/mce.c
+++ b/xen/arch/x86/cpu/mcheck/mce.c
@@ -34,7 +34,7 @@ bool __read_mostly opt_mce = true;
 boolean_param("mce", opt_mce);
 bool __read_mostly mce_broadcast;
 bool is_mc_panic;
-unsigned int __read_mostly nr_mce_banks;
+DEFINE_PER_CPU_READ_MOSTLY(unsigned int, nr_mce_banks);
 unsigned int __read_mostly firstbank;
 uint8_t __read_mostly cmci_apic_vector;
 
@@ -120,7 +120,7 @@ void mce_recoverable_register(mce_recove
     mc_recoverable_scan = cbfunc;
 }
 
-struct mca_banks *mcabanks_alloc(void)
+struct mca_banks *mcabanks_alloc(unsigned int nr_mce_banks)
 {
     struct mca_banks *mb;
 
@@ -128,6 +128,13 @@ struct mca_banks *mcabanks_alloc(void)
     if ( !mb )
         return NULL;
 
+    /*
+     * For APs allocations get done by the BSP, i.e. when the bank count may
+     * may not be known yet. A zero bank count is a clear indication of this.
+     */
+    if ( !nr_mce_banks )
+        nr_mce_banks = MCG_CAP_COUNT;
+
     mb->bank_map = xzalloc_array(unsigned long,
                                  BITS_TO_LONGS(nr_mce_banks));
     if ( !mb->bank_map )
@@ -319,7 +326,7 @@ mcheck_mca_logout(enum mca_source who, s
      */
     recover = mc_recoverable_scan ? 1 : 0;
 
-    for ( i = 0; i < nr_mce_banks; i++ )
+    for ( i = 0; i < this_cpu(nr_mce_banks); i++ )
     {
         /* Skip bank if corresponding bit in bankmask is clear */
         if ( !mcabanks_test(i, bankmask) )
@@ -565,7 +572,7 @@ void mcheck_mca_clearbanks(struct mca_ba
 {
     int i;
 
-    for ( i = 0; i < nr_mce_banks; i++ )
+    for ( i = 0; i < this_cpu(nr_mce_banks); i++ )
     {
         if ( !mcabanks_test(i, bankmask) )
             continue;
@@ -638,54 +645,56 @@ static void set_poll_bankmask(struct cpu
 
     if ( cmci_support && opt_mce )
     {
-        mb->num = per_cpu(no_cmci_banks, cpu)->num;
-        bitmap_copy(mb->bank_map, per_cpu(no_cmci_banks, cpu)->bank_map,
-                    nr_mce_banks);
+        const struct mca_banks *cmci = per_cpu(no_cmci_banks, cpu);
+
+        if ( unlikely(cmci->num < mb->num) )
+            bitmap_fill(mb->bank_map, mb->num);
+        bitmap_copy(mb->bank_map, cmci->bank_map, min(mb->num, cmci->num));
     }
     else
     {
-        bitmap_copy(mb->bank_map, mca_allbanks->bank_map, nr_mce_banks);
+        bitmap_copy(mb->bank_map, mca_allbanks->bank_map,
+                    per_cpu(nr_mce_banks, cpu));
         if ( mce_firstbank(c) )
             mcabanks_clear(0, mb);
     }
 }
 
 /* The perbank ctl/status init is platform specific because of AMD's quirk */
-int mca_cap_init(void)
+static int mca_cap_init(void)
 {
     uint64_t msr_content;
+    unsigned int nr, cpu = smp_processor_id();
 
     rdmsrl(MSR_IA32_MCG_CAP, msr_content);
 
     if ( msr_content & MCG_CTL_P ) /* Control register present ? */
         wrmsrl(MSR_IA32_MCG_CTL, 0xffffffffffffffffULL);
 
-    if ( nr_mce_banks && (msr_content & MCG_CAP_COUNT) != nr_mce_banks )
-    {
-        dprintk(XENLOG_WARNING, "Different bank number on cpu %x\n",
-                smp_processor_id());
-        return -ENODEV;
-    }
-    nr_mce_banks = msr_content & MCG_CAP_COUNT;
+    per_cpu(nr_mce_banks, cpu) = nr = MASK_EXTR(msr_content, MCG_CAP_COUNT);
 
-    if ( !nr_mce_banks )
+    if ( !nr )
     {
-        printk(XENLOG_INFO "CPU%u: No MCE banks present. "
-               "Machine check support disabled\n", smp_processor_id());
+        printk(XENLOG_INFO
+               "CPU%u: No MCE banks present. Machine check support disabled\n",
+               cpu);
         return -ENODEV;
     }
 
     /* mcabanks_alloc depends on nr_mce_banks */
-    if ( !mca_allbanks )
+    if ( !mca_allbanks || nr > mca_allbanks->num )
     {
-        int i;
+        unsigned int i;
+        struct mca_banks *all = mcabanks_alloc(nr);
 
-        mca_allbanks = mcabanks_alloc();
-        for ( i = 0; i < nr_mce_banks; i++ )
+        if ( !all )
+            return -ENOMEM;
+        for ( i = 0; i < nr; i++ )
             mcabanks_set(i, mca_allbanks);
+        mcabanks_free(xchg(&mca_allbanks, all));
     }
 
-    return mca_allbanks ? 0 : -ENOMEM;
+    return 0;
 }
 
 static void cpu_bank_free(unsigned int cpu)
@@ -702,8 +711,9 @@ static void cpu_bank_free(unsigned int c
 
 static int cpu_bank_alloc(unsigned int cpu)
 {
-    struct mca_banks *poll = per_cpu(poll_bankmask, cpu) ?: mcabanks_alloc();
-    struct mca_banks *clr = per_cpu(mce_clear_banks, cpu) ?: mcabanks_alloc();
+    unsigned int nr = per_cpu(nr_mce_banks, cpu);
+    struct mca_banks *poll = per_cpu(poll_bankmask, cpu) ?: mcabanks_alloc(nr);
+    struct mca_banks *clr = per_cpu(mce_clear_banks, cpu) ?: 
mcabanks_alloc(nr);
 
     if ( !poll || !clr )
     {
@@ -752,6 +762,7 @@ static struct notifier_block cpu_nfb = {
 void mcheck_init(struct cpuinfo_x86 *c, bool bsp)
 {
     enum mcheck_type inited = mcheck_none;
+    unsigned int cpu = smp_processor_id();
 
     if ( !opt_mce )
     {
@@ -762,8 +773,7 @@ void mcheck_init(struct cpuinfo_x86 *c,
 
     if ( !mce_available(c) )
     {
-        printk(XENLOG_INFO "CPU%i: No machine check support available\n",
-               smp_processor_id());
+        printk(XENLOG_INFO "CPU%i: No machine check support available\n", cpu);
         return;
     }
 
@@ -771,9 +781,13 @@ void mcheck_init(struct cpuinfo_x86 *c,
     if ( mca_cap_init() )
         return;
 
-    /* Early MCE initialisation for BSP. */
-    if ( bsp && cpu_bank_alloc(smp_processor_id()) )
-        BUG();
+    if ( !bsp )
+    {
+        per_cpu(poll_bankmask, cpu)->num = per_cpu(nr_mce_banks, cpu);
+        per_cpu(mce_clear_banks, cpu)->num = per_cpu(nr_mce_banks, cpu);
+    }
+    else if ( cpu_bank_alloc(cpu) )
+        panic("Insufficient memory for MCE bank allocations\n");
 
     switch ( c->x86_vendor )
     {
@@ -1111,24 +1125,22 @@ bool intpose_inval(unsigned int cpu_nr,
     return true;
 }
 
-#define IS_MCA_BANKREG(r) \
+#define IS_MCA_BANKREG(r, cpu) \
     ((r) >= MSR_IA32_MC0_CTL && \
-    (r) <= MSR_IA32_MCx_MISC(nr_mce_banks - 1) && \
-    ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */
+     (r) <= MSR_IA32_MCx_MISC(per_cpu(nr_mce_banks, cpu) - 1) && \
+     ((r) - MSR_IA32_MC0_CTL) % 4) /* excludes MCi_CTL */
 
 static bool x86_mc_msrinject_verify(struct xen_mc_msrinject *mci)
 {
-    struct cpuinfo_x86 *c;
+    const struct cpuinfo_x86 *c = &cpu_data[mci->mcinj_cpunr];
     int i, errs = 0;
 
-    c = &cpu_data[smp_processor_id()];
-
     for ( i = 0; i < mci->mcinj_count; i++ )
     {
         uint64_t reg = mci->mcinj_msr[i].reg;
         const char *reason = NULL;
 
-        if ( IS_MCA_BANKREG(reg) )
+        if ( IS_MCA_BANKREG(reg, mci->mcinj_cpunr) )
         {
             if ( c->x86_vendor == X86_VENDOR_AMD )
             {
@@ -1448,7 +1460,7 @@ long do_mca(XEN_GUEST_HANDLE_PARAM(xen_m
         break;
 
     case XEN_MC_msrinject:
-        if ( nr_mce_banks == 0 )
+        if ( !mca_allbanks || !mca_allbanks->num )
             return x86_mcerr("do_mca inject", -ENODEV);
 
         mc_msrinject = &op->u.mc_msrinject;
@@ -1461,6 +1473,9 @@ long do_mca(XEN_GUEST_HANDLE_PARAM(xen_m
             return x86_mcerr("do_mca inject: target offline",
                              -EINVAL);
 
+        if ( !per_cpu(nr_mce_banks, target) )
+            return x86_mcerr("do_mca inject: no banks", -ENOENT);
+
         if ( mc_msrinject->mcinj_count == 0 )
             return 0;
 
@@ -1521,7 +1536,7 @@ long do_mca(XEN_GUEST_HANDLE_PARAM(xen_m
         break;
 
     case XEN_MC_mceinject:
-        if ( nr_mce_banks == 0 )
+        if ( !mca_allbanks || !mca_allbanks->num )
             return x86_mcerr("do_mca #MC", -ENODEV);
 
         mc_mceinject = &op->u.mc_mceinject;
@@ -1533,6 +1548,9 @@ long do_mca(XEN_GUEST_HANDLE_PARAM(xen_m
         if ( !cpu_online(target) )
             return x86_mcerr("do_mca #MC: target offline", -EINVAL);
 
+        if ( !per_cpu(nr_mce_banks, target) )
+            return x86_mcerr("do_mca #MC: no banks", -ENOENT);
+
         add_taint(TAINT_ERROR_INJECT);
 
         if ( mce_broadcast )
@@ -1548,7 +1566,7 @@ long do_mca(XEN_GUEST_HANDLE_PARAM(xen_m
         cpumask_var_t cmv;
         bool broadcast = op->u.mc_inject_v2.flags & 
XEN_MC_INJECT_CPU_BROADCAST;
 
-        if ( nr_mce_banks == 0 )
+        if ( !mca_allbanks || !mca_allbanks->num )
             return x86_mcerr("do_mca #MC", -ENODEV);
 
         if ( broadcast )
@@ -1570,6 +1588,16 @@ long do_mca(XEN_GUEST_HANDLE_PARAM(xen_m
                         "Not all required CPUs are online\n");
         }
 
+        for_each_cpu(target, cpumap)
+            if ( cpu_online(target) && !per_cpu(nr_mce_banks, target) )
+            {
+                ret = x86_mcerr("do_mca #MC: CPU%u has no banks",
+                                -ENOENT, target);
+                break;
+            }
+        if ( ret )
+            break;
+
         switch ( op->u.mc_inject_v2.flags & XEN_MC_INJECT_TYPE_MASK )
         {
         case XEN_MC_INJECT_TYPE_MCE:
--- a/xen/arch/x86/cpu/mcheck/mce_amd.c
+++ b/xen/arch/x86/cpu/mcheck/mce_amd.c
@@ -297,7 +297,7 @@ amd_mcheck_init(struct cpuinfo_x86 *ci)
     x86_mce_vector_register(mcheck_cmn_handler);
     mce_need_clearbank_register(amd_need_clearbank_scan);
 
-    for ( i = 0; i < nr_mce_banks; i++ )
+    for ( i = 0; i < this_cpu(nr_mce_banks); i++ )
     {
         if ( quirkflag == MCEQUIRK_K8_GART && i == 4 )
             mcequirk_amd_apply(quirkflag);
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
@@ -535,16 +535,16 @@ out:
 static void cmci_discover(void)
 {
     unsigned long flags;
-    int i;
+    unsigned int i, cpu = smp_processor_id();
     mctelem_cookie_t mctc;
     struct mca_summary bs;
 
-    mce_printk(MCE_VERBOSE, "CMCI: find owner on CPU%d\n", smp_processor_id());
+    mce_printk(MCE_VERBOSE, "CMCI: find owner on CPU%u\n", cpu);
 
     spin_lock_irqsave(&cmci_discover_lock, flags);
 
-    for ( i = 0; i < nr_mce_banks; i++ )
-        if ( !mcabanks_test(i, __get_cpu_var(mce_banks_owned)) )
+    for ( i = 0; i < per_cpu(nr_mce_banks, cpu); i++ )
+        if ( !mcabanks_test(i, per_cpu(mce_banks_owned, cpu)) )
             do_cmci_discover(i);
 
     spin_unlock_irqrestore(&cmci_discover_lock, flags);
@@ -557,7 +557,7 @@ static void cmci_discover(void)
      */
 
     mctc = mcheck_mca_logout(
-        MCA_CMCI_HANDLER, __get_cpu_var(mce_banks_owned), &bs, NULL);
+        MCA_CMCI_HANDLER, per_cpu(mce_banks_owned, cpu), &bs, NULL);
 
     if ( bs.errcnt && mctc != NULL )
     {
@@ -576,9 +576,9 @@ static void cmci_discover(void)
         mctelem_dismiss(mctc);
 
     mce_printk(MCE_VERBOSE, "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n",
-               smp_processor_id(),
-               *((unsigned long *)__get_cpu_var(mce_banks_owned)->bank_map),
-               *((unsigned long *)__get_cpu_var(no_cmci_banks)->bank_map));
+               cpu,
+               per_cpu(mce_banks_owned, cpu)->bank_map[0],
+               per_cpu(no_cmci_banks, cpu)->bank_map[0]);
 }
 
 /*
@@ -613,24 +613,24 @@ static void cpu_mcheck_distribute_cmci(v
 
 static void clear_cmci(void)
 {
-    int i;
+    unsigned int i, cpu = smp_processor_id();
 
     if ( !cmci_support || !opt_mce )
         return;
 
-    mce_printk(MCE_VERBOSE, "CMCI: clear_cmci support on CPU%d\n",
-               smp_processor_id());
+    mce_printk(MCE_VERBOSE, "CMCI: clear_cmci support on CPU%u\n", cpu);
 
-    for ( i = 0; i < nr_mce_banks; i++ )
+    for ( i = 0; i < per_cpu(nr_mce_banks, cpu); i++ )
     {
         unsigned msr = MSR_IA32_MCx_CTL2(i);
         u64 val;
-        if ( !mcabanks_test(i, __get_cpu_var(mce_banks_owned)) )
+
+        if ( !mcabanks_test(i, per_cpu(mce_banks_owned, cpu)) )
             continue;
         rdmsrl(msr, val);
         if ( val & (CMCI_EN|CMCI_THRESHOLD_MASK) )
             wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK));
-        mcabanks_clear(i, __get_cpu_var(mce_banks_owned));
+        mcabanks_clear(i, per_cpu(mce_banks_owned, cpu));
     }
 }
 
@@ -826,7 +826,7 @@ static void intel_init_mce(void)
     intel_mce_post_reset();
 
     /* clear all banks */
-    for ( i = firstbank; i < nr_mce_banks; i++ )
+    for ( i = firstbank; i < this_cpu(nr_mce_banks); i++ )
     {
         /*
          * Some banks are shared across cores, use MCi_CTRL to judge whether
@@ -866,8 +866,9 @@ static void cpu_mcabank_free(unsigned in
 
 static int cpu_mcabank_alloc(unsigned int cpu)
 {
-    struct mca_banks *cmci = mcabanks_alloc();
-    struct mca_banks *owned = mcabanks_alloc();
+    unsigned int nr = per_cpu(nr_mce_banks, cpu);
+    struct mca_banks *cmci = mcabanks_alloc(nr);
+    struct mca_banks *owned = mcabanks_alloc(nr);
 
     if ( !cmci || !owned )
         goto out;
@@ -924,6 +925,13 @@ enum mcheck_type intel_mcheck_init(struc
         register_cpu_notifier(&cpu_nfb);
         mcheck_intel_therm_init();
     }
+    else
+    {
+        unsigned int cpu = smp_processor_id();
+
+        per_cpu(no_cmci_banks, cpu)->num = per_cpu(nr_mce_banks, cpu);
+        per_cpu(mce_banks_owned, cpu)->num = per_cpu(nr_mce_banks, cpu);
+    }
 
     intel_init_mca(c);
 
--- a/xen/arch/x86/cpu/mcheck/x86_mca.h
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h
@@ -125,7 +125,7 @@ static inline int mcabanks_test(int bit,
     return test_bit(bit, banks->bank_map);
 }
 
-struct mca_banks *mcabanks_alloc(void);
+struct mca_banks *mcabanks_alloc(unsigned int nr);
 void mcabanks_free(struct mca_banks *banks);
 extern struct mca_banks *mca_allbanks;
 
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -2374,7 +2374,7 @@ static int svm_is_erratum_383(struct cpu
         return 0;
     
     /* Clear MCi_STATUS registers */
-    for (i = 0; i < nr_mce_banks; i++)
+    for (i = 0; i < this_cpu(nr_mce_banks); i++)
         wrmsrl(MSR_IA32_MCx_STATUS(i), 0ULL);
     
     rdmsrl(MSR_IA32_MCG_STATUS, msr_content);
--- a/xen/include/asm-x86/mce.h
+++ b/xen/include/asm-x86/mce.h
@@ -40,6 +40,6 @@ extern int vmce_rdmsr(uint32_t msr, uint
 extern bool vmce_has_lmce(const struct vcpu *v);
 extern int vmce_enable_mca_cap(struct domain *d, uint64_t cap);
 
-extern unsigned int nr_mce_banks;
+DECLARE_PER_CPU(unsigned int, nr_mce_banks);
 
 #endif




_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxx
https://lists.xenproject.org/mailman/listinfo/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.