[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 6/6] Dump the MCE information in mc_panic and softirq


  • To: Keir Fraser <keir.fraser@xxxxxxxxxxxxx>
  • From: "Jiang, Yunhong" <yunhong.jiang@xxxxxxxxx>
  • Date: Wed, 9 Jun 2010 22:32:19 +0800
  • Accept-language: en-US
  • Acceptlanguage: en-US
  • Cc: Christoph Egger <Christoph.Egger@xxxxxxx>, xen-devel <xen-devel@xxxxxxxxxxxxxxxxxxx>
  • Delivery-date: Wed, 09 Jun 2010 07:39:16 -0700
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>
  • Thread-index: AcsH4JDmorFDFvjeQ5azhWDNL6cb2w==
  • Thread-topic: [PATCH 6/6] Dump the MCE information in mc_panic and softirq

Dump the MCE information in mc_panic and softirq

We should not dump the mcinfo in mce handler, instead, we should do that in 
mc_panic for fatal error or softirq for other errors.

Signed-off-by: Jiang, Yunhong <yunhong.jiang@xxxxxxxxx>

 mce.c       |   24 ++++++++++++++++++++++++
 mce_intel.c |    7 +++----

diff -r 898279853894 xen/arch/x86/cpu/mcheck/mce.c
--- a/xen/arch/x86/cpu/mcheck/mce.c     Wed Jun 09 21:09:24 2010 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce.c     Wed Jun 09 21:32:32 2010 +0800
@@ -1509,15 +1509,39 @@ long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u
     return ret;
 }
 
+int mcinfo_dumpped;
+static int x86_mcinfo_dump_panic(mctelem_cookie_t mctc)
+{
+    struct mc_info *mcip = mctelem_dataptr(mctc);
+
+    x86_mcinfo_dump(mcip);
+    mcinfo_dumpped++;
+
+    return 0;
+}
+
+/* XXX shall we dump commited mc_info?? */
+static void mc_panic_dump(void)
+{
+    int cpu;
+
+    dprintk(XENLOG_ERR, "Begin dump mc_info\n");
+    for_each_online_cpu(cpu)
+        mctelem_process_deferred(cpu, x86_mcinfo_dump_panic);
+    dprintk(XENLOG_ERR, "End dump mc_info, %x mcinfo dumped\n", 
mcinfo_dumpped);
+}
+
 void mc_panic(char *s)
 {
     is_mc_panic = 1;
     console_force_unlock();
+
     printk("Fatal machine check: %s\n", s);
     printk("\n"
            "****************************************\n"
            "\n"
            "   The processor has reported a hardware error which cannot\n"
            "   be recovered from.  Xen will now reboot the machine.\n");
+    mc_panic_dump();
     panic("HARDWARE ERROR");
 }
diff -r 898279853894 xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c       Wed Jun 09 21:09:24 2010 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c       Wed Jun 09 21:23:08 2010 +0800
@@ -257,6 +257,8 @@ static int mce_delayed_action(mctelem_co
     switch (result)
     {
     case MCER_RESET:
+        dprintk(XENLOG_ERR, "MCE delayed action failed\n");
+        x86_mcinfo_dump(mctelem_dataptr(mctc));
         panic("MCE: Software recovery failed for the UCR\n");
         break;
     case MCER_RECOVERED:
@@ -266,6 +268,7 @@ static int mce_delayed_action(mctelem_co
     case MCER_CONTINUE:
         dprintk(XENLOG_INFO, "MCE: Error can't be recovered, "
             "system is tainted\n");
+        x86_mcinfo_dump(mctelem_dataptr(mctc));
         ret = 1;
         break;
     default:
@@ -755,10 +758,6 @@ static void intel_machine_check(struct c
     mctc = mcheck_mca_logout(MCA_MCE_SCAN, mca_allbanks, &bs, clear_bank);
 
     if (bs.errcnt) {
-        /* dump MCE error */
-        if (mctc != NULL)
-            x86_mcinfo_dump(mctelem_dataptr(mctc));
-
         /*
          * Uncorrected errors must be dealth with in softirq context.
          */



_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.