[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC] [PATCH 1/2] Change the method to get the extended MCA information



arch/x86/cpu/mcheck/amd_f10.c     |   43 ++++++++++--------
 arch/x86/cpu/mcheck/mce.c         |   14 ++++--
 arch/x86/cpu/mcheck/mce.h         |    8 ---
 arch/x86/cpu/mcheck/mce_intel.c   |   88 ++++++++++++++++----------------------
 include/public/arch-x86/xen-mca.h |   15 ++----
 5 files changed, 79 insertions(+), 89 deletions(-)

Change the method to get the extended MCA information.

Several changes to get the extended MCA information:
a) Change definition of strucutre mcinfo_extended to be various
   length. A 0 size index will make it can cover both AMD and Intel
   requirement without waste.
b) Use the x86_mcinfo_reserve in mcinfo_extended to reserve buffer from
   mc_info, instead of using the stack
c) For intel's extended MSR, we don't need write them one
   by one as the MSR are continous
d) We don't need enum mca_extinfo, since we can consider
   the extended MSR as either per bank, or global. Currently
   we add a hook in global data collection, and didn't call
   register intel_get_extended_msrs as callback. Later that
   hook can be replaced by cleaner way

    Signed-off-by: Jiang, Yunhong <yunhong.jiang@xxxxxxxx>

diff -r e859f6c9d656 -r da1165144ac4 xen/arch/x86/cpu/mcheck/amd_f10.c
--- a/xen/arch/x86/cpu/mcheck/amd_f10.c Fri Apr 16 19:08:22 2010 +0800
+++ b/xen/arch/x86/cpu/mcheck/amd_f10.c Mon Apr 19 15:31:29 2010 +0800
@@ -49,37 +49,44 @@
 #include "x86_mca.h"
 
 
-static enum mca_extinfo
+static struct mcinfo_extended *
 amd_f10_handler(struct mc_info *mi, uint16_t bank, uint64_t status)
 {
-       struct mcinfo_extended mc_ext;
+       struct mcinfo_extended *mc_ext;
 
        /* Family 0x10 introduced additional MSR that belong to the
         * northbridge bank (4). */
        if (mi == NULL || bank != 4)
-               return MCA_EXTINFO_IGNORED;
+               return NULL;
 
        if (!(status & MCi_STATUS_VAL))
-               return MCA_EXTINFO_IGNORED;
+               return NULL;
 
        if (!(status & MCi_STATUS_MISCV))
-               return MCA_EXTINFO_IGNORED;
+               return NULL;
 
-       memset(&mc_ext, 0, sizeof(mc_ext));
-       mc_ext.common.type = MC_TYPE_EXTENDED;
-       mc_ext.common.size = sizeof(mc_ext);
-       mc_ext.mc_msrs = 3;
+    mc_ext = x86_mcinfo_reserve(mi, sizeof(struct mcinfo_extended) +
+            3 * sizeof(struct mcinfo_msr));
+    if (!mc_ext)
+    {
+        mi->flags |= MCINFO_FLAGS_UNCOMPLETE;
+        return NULL;
+    }
 
-       mc_ext.mc_msr[0].reg = MSR_F10_MC4_MISC1;
-       mc_ext.mc_msr[1].reg = MSR_F10_MC4_MISC2;
-       mc_ext.mc_msr[2].reg = MSR_F10_MC4_MISC3;
+       memset(mc_ext, 0, sizeof(mc_ext));
+       mc_ext->common.type = MC_TYPE_EXTENDED;
+       mc_ext->common.size = sizeof(mc_ext);
+       mc_ext->mc_msrs = 3;
 
-       mca_rdmsrl(MSR_F10_MC4_MISC1, mc_ext.mc_msr[0].value);
-       mca_rdmsrl(MSR_F10_MC4_MISC2, mc_ext.mc_msr[1].value);
-       mca_rdmsrl(MSR_F10_MC4_MISC3, mc_ext.mc_msr[2].value);
-       
-       x86_mcinfo_add(mi, &mc_ext);
-       return MCA_EXTINFO_LOCAL;
+       mc_ext->mc_msr[0].reg = MSR_F10_MC4_MISC1;
+       mc_ext->mc_msr[1].reg = MSR_F10_MC4_MISC2;
+       mc_ext->mc_msr[2].reg = MSR_F10_MC4_MISC3;
+
+       mca_rdmsrl(MSR_F10_MC4_MISC1, mc_ext->mc_msr[0].value);
+       mca_rdmsrl(MSR_F10_MC4_MISC2, mc_ext->mc_msr[1].value);
+       mca_rdmsrl(MSR_F10_MC4_MISC3, mc_ext->mc_msr[2].value);
+
+       return mc_ext;
 }
 
 /* AMD Family10 machine check */
diff -r e859f6c9d656 -r da1165144ac4 xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c     Fri Apr 16 19:08:22 2010 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce.c     Mon Apr 19 15:31:29 2010 +0800
@@ -229,7 +229,6 @@ mctelem_cookie_t mcheck_mca_logout(enum 
        mctelem_class_t which = MC_URGENT;      /* XXXgcc */
        int errcnt = 0;
        int i;
-       enum mca_extinfo cbret = MCA_EXTINFO_IGNORED;
 
        mca_rdmsrl(MSR_IA32_MCG_STATUS, gstatus);
        switch (who) {
@@ -291,6 +290,14 @@ mctelem_cookie_t mcheck_mca_logout(enum 
                                /* mc_info should at least hold up the global 
information */
                                ASSERT(mig);
                                mca_init_global(mc_flags, mig);
+                /* A hook here to get global extended msrs */
+                {
+                    struct mcinfo_extended *intel_get_extended_msrs(
+                              struct mcinfo_global *mig, struct mc_info *mi);
+                    struct cpuinfo_x86 *c;
+                    c = &cpu_data[smp_processor_id()];
+                    intel_get_extended_msrs(mig, mci);
+                }
                        }
                }
 
@@ -309,9 +316,8 @@ mctelem_cookie_t mcheck_mca_logout(enum 
 
                mib = mca_init_bank(who, mci, i);
 
-               if (mc_callback_bank_extended && cbret != MCA_EXTINFO_GLOBAL) {
-                       cbret = mc_callback_bank_extended(mci, i, status);
-               }
+               if (mc_callback_bank_extended)
+                       mc_callback_bank_extended(mci, i, status);
 
                /* By default, need_clear = 1 */
                if (who != MCA_MCE_SCAN && need_clear)
diff -r e859f6c9d656 -r da1165144ac4 xen/arch/x86/cpu/mcheck/mce.h
--- a/xen/arch/x86/cpu/mcheck/mce.h     Fri Apr 16 19:08:22 2010 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce.h     Mon Apr 19 15:31:29 2010 +0800
@@ -110,12 +110,6 @@ enum mca_source {
        MCA_MCE_SCAN
 };
 
-enum mca_extinfo {
-       MCA_EXTINFO_LOCAL,
-       MCA_EXTINFO_GLOBAL,
-       MCA_EXTINFO_IGNORED
-};
-
 struct mca_summary {
        uint32_t        errcnt; /* number of banks with valid errors */
        int             ripv;   /* meaningful on #MC */
@@ -157,7 +151,7 @@ typedef int (*mce_need_clearbank_t)(enum
 typedef int (*mce_need_clearbank_t)(enum mca_source who, u64 status);
 extern void mce_need_clearbank_register(mce_need_clearbank_t);
 
-typedef enum mca_extinfo (*x86_mce_callback_t)
+typedef struct mcinfo_extended *(*x86_mce_callback_t)
     (struct mc_info *, uint16_t, uint64_t);
 extern void x86_mce_callback_register(x86_mce_callback_t);
 
diff -r e859f6c9d656 -r da1165144ac4 xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c       Fri Apr 16 19:08:22 2010 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c       Mon Apr 19 15:31:29 2010 +0800
@@ -149,57 +149,46 @@ static void intel_init_thermal(struct cp
 }
 #endif /* CONFIG_X86_MCE_THERMAL */
 
-static inline void intel_get_extended_msr(struct mcinfo_extended *ext, u32 msr)
-{
-    if ( ext->mc_msrs < ARRAY_SIZE(ext->mc_msr)
-         && msr < MSR_IA32_MCG_EAX + nr_intel_ext_msrs ) {
-        ext->mc_msr[ext->mc_msrs].reg = msr;
-        mca_rdmsrl(msr, ext->mc_msr[ext->mc_msrs].value);
-        ++ext->mc_msrs;
-    }
-}
-
-static enum mca_extinfo
-intel_get_extended_msrs(struct mc_info *mci, uint16_t bank, uint64_t status)
-{
-    struct mcinfo_extended mc_ext;
-
-    if (mci == NULL || nr_intel_ext_msrs == 0 || !(status & MCG_STATUS_EIPV))
-        return MCA_EXTINFO_IGNORED;
+#define INTEL_EXTENDED_MCA_MSRs (MSR_IA32_MCG_R15 - MSR_IA32_MCG_EAX + 1)
+struct mcinfo_extended *
+intel_get_extended_msrs(struct mcinfo_global *mig, struct mc_info *mi)
+{
+    struct mcinfo_extended *mc_ext;
+    struct mcinfo_msr *msr;
+    int num = nr_intel_ext_msrs, i, length;
+
+    /*
+     * According to spec, processor _support_ 64 bit will always
+     * have MSR beyond IA32_MCG_MISC
+     */
+    if (!mi || !mig || !num || num != INTEL_EXTENDED_MCA_MSRs||
+            !(mig->mc_gstatus & MCG_STATUS_EIPV))
+        return NULL;
+
+    length = sizeof(struct mcinfo_extended) + num * sizeof(struct mcinfo_msr);
+    mc_ext = x86_mcinfo_reserve(mi, length);
+    if (!mc_ext)
+    {
+        mi->flags |= MCINFO_FLAGS_UNCOMPLETE;
+        return NULL;
+    }
 
     /* this function will called when CAP(9).MCG_EXT_P = 1 */
-    memset(&mc_ext, 0, sizeof(struct mcinfo_extended));
-    mc_ext.common.type = MC_TYPE_EXTENDED;
-    mc_ext.common.size = sizeof(mc_ext);
-
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EAX);
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EBX);
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_ECX);
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EDX);
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_ESI);
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EDI);
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EBP);
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_ESP);
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EFLAGS);
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_EIP);
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_MISC);
-
-#ifdef __x86_64__
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R8);
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R9);
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R10);
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R11);
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R12);
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R13);
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R14);
-    intel_get_extended_msr(&mc_ext, MSR_IA32_MCG_R15);
-#endif
-
-    x86_mcinfo_add(mci, &mc_ext);
-
-    return MCA_EXTINFO_GLOBAL;
-}
-
+    memset(&mc_ext, 0, length);
+    mc_ext->common.type = MC_TYPE_EXTENDED;
+    mc_ext->common.size = length;
+
+    msr = mc_ext->mc_msr;
+    for (i = 0; i < num; i++)
+    {
+        msr->reg = i + MSR_IA32_MCG_EAX;
+        mca_rdmsrl(msr->reg, msr->value);
+        mc_ext->mc_msrs ++;
+        msr ++;
+    }
+
+    return mc_ext;
+}
 
 static void intel_UCR_handler(struct mcinfo_bank *bank,
              struct mcinfo_global *global,
@@ -959,7 +948,6 @@ enum mcheck_type intel_mcheck_init(struc
 
     /* machine check is available */
     x86_mce_vector_register(intel_machine_check);
-    x86_mce_callback_register(intel_get_extended_msrs);
     mce_recoverable_register(intel_recoverable_scan);
     mce_need_clearbank_register(intel_need_clearbank_scan);
 
diff -r e859f6c9d656 -r da1165144ac4 xen/include/public/arch-x86/xen-mca.h
--- a/xen/include/public/arch-x86/xen-mca.h     Fri Apr 16 19:08:22 2010 +0800
+++ b/xen/include/public/arch-x86/xen-mca.h     Mon Apr 19 15:31:29 2010 +0800
@@ -150,6 +150,7 @@ struct mcinfo_bank {
 };
 
 
+/* Should be aligned */
 struct mcinfo_msr {
     uint64_t reg;   /* MSR */
     uint64_t value; /* MSR value */
@@ -160,17 +161,11 @@ struct mcinfo_extended {
 struct mcinfo_extended {
     struct mcinfo_common common;
 
-    /* You can fill up to five registers.
-     * If you need more, then use this structure
-     * multiple times. */
-
     uint32_t mc_msrs; /* Number of msr with valid values. */
-    /*
-     * Currently Intel extended MSR (32/64) include all gp registers
-     * and E(R)FLAGS, E(R)IP, E(R)MISC, up to 11/19 of them might be
-     * useful at present. So expand this array to 16/32 to leave room.
-     */
-    struct mcinfo_msr mc_msr[sizeof(void *) * 4];
+
+    /* The number of MSR saved is determined by mc_msrs, maxium maybe 32 */
+    /* XXX Will this have compatibility issue? */
+    struct mcinfo_msr mc_msr[0];
 };
 
 /* Recovery Action flags. Giving recovery result information to DOM0 */


Attachment: mce_extended.patch
Description: mce_extended.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.