WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 7] MCA: simplify mce actoin, and some update of mem e

To: Keir Fraser <keir.xen@xxxxxxxxx>, "xen-devel@xxxxxxxxxxxxxxxxxxx" <xen-devel@xxxxxxxxxxxxxxxxxxx>
Subject: [Xen-devel] [PATCH 7] MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension
From: "Liu, Jinsong" <jinsong.liu@xxxxxxxxx>
Date: Sun, 8 May 2011 19:23:12 +0800
Accept-language: en-US
Acceptlanguage: en-US
Cc: "Jiang, Yunhong" <yunhong.jiang@xxxxxxxxx>, "Li, Xin" <xin.li@xxxxxxxxx>
Delivery-date: Sun, 08 May 2011 04:24:34 -0700
Envelope-to: www-data@xxxxxxxxxxxxxxxxxxx
List-help: <mailto:xen-devel-request@lists.xensource.com?subject=help>
List-id: Xen developer discussion <xen-devel.lists.xensource.com>
List-post: <mailto:xen-devel@lists.xensource.com>
List-subscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=subscribe>
List-unsubscribe: <http://lists.xensource.com/mailman/listinfo/xen-devel>, <mailto:xen-devel-request@lists.xensource.com?subject=unsubscribe>
Sender: xen-devel-bounces@xxxxxxxxxxxxxxxxxxx
Thread-index: AcwNclEwEvLQfpHVSmSzkx8mJ5h67g==
Thread-topic: [PATCH 7] MCA: simplify mce actoin, and some update of mem err handler for future SRAR extension
MCA: simplify mce actoin, and some update of mem err handler for future SRAR 
extension

This patch simplify mce_action(). Basically the 2 set of mce status flags 
MCA_... and MCER_... are highly functional similar. This patch remove the 
redundant middle-level flag MCA_..., and its related data structure and 
function, so that mce_action() logic is more clean and simpler.
This patch also update mem err handler. intel_memerr_dhandler() will be 
commonly used by SRAO and SRAR, so for the extension of future SRAR case, this 
patch remove the default fail flag from intel_memerr_dhandler() outside to 
SRAO/SRAR level, since only SRAO/SRAR handler itself has the knowledge of how 
to handle the failure when mem err handler fail.

Signed-off-by: Liu, Jinsong <jinsong.liu@xxxxxxxxx>

diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/mce_intel.c
--- a/xen/arch/x86/cpu/mcheck/mce_intel.c       Sun May 08 13:39:40 2011 +0800
+++ b/xen/arch/x86/cpu/mcheck/mce_intel.c       Sun May 08 18:15:13 2011 +0800
@@ -172,23 +172,14 @@ static unsigned int __read_mostly mce_dh
 static unsigned int __read_mostly mce_dhandler_num;
 static unsigned int __read_mostly mce_uhandler_num;
 
-enum mce_result
-{
-    MCER_NOERROR,
-    MCER_RECOVERED,
-    /* Not recoverd, but can continue */
-    MCER_CONTINUE,
-    MCER_RESET,
-};
-
 /* Maybe called in MCE context, no lock, no printk */
 static enum mce_result mce_action(struct cpu_user_regs *regs,
                       mctelem_cookie_t mctc)
 {
     struct mc_info *local_mi;
-    enum mce_result ret = MCER_NOERROR;
+    enum mce_result bank_result = MCER_NOERROR;
+    enum mce_result worst_result = MCER_NOERROR;
     struct mcinfo_common *mic = NULL;
-    struct mca_handle_result mca_res;
     struct mca_binfo binfo;
     const struct mca_error_handler *handlers = mce_dhandlers;
     unsigned int i, handler_num = mce_dhandler_num;
@@ -217,7 +208,7 @@ static enum mce_result mce_action(struct
     /* Processing bank information */
     x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK);
 
-    for ( ; ret != MCER_RESET && mic && mic->size;
+    for ( ; bank_result != MCER_RESET && mic && mic->size;
           mic = x86_mcinfo_next(mic) )
     {
         if (mic->type != MC_TYPE_BANK) {
@@ -225,34 +216,20 @@ static enum mce_result mce_action(struct
         }
         binfo.mib = (struct mcinfo_bank*)mic;
         binfo.bank = binfo.mib->mc_bank;
-        memset(&mca_res, 0x0f, sizeof(mca_res));
+        bank_result = MCER_NOERROR;
         for ( i = 0; i < handler_num; i++ ) {
             if (handlers[i].owned_error(binfo.mib->mc_status))
             {
-                handlers[i].recovery_handler(&binfo, &mca_res);
-
-                if (mca_res.result & MCA_OWNER)
-                    binfo.mib->mc_domid = mca_res.owner;
-
-                if (mca_res.result == MCA_NEED_RESET)
-                    ret = MCER_RESET;
-                else if (mca_res.result == MCA_RECOVERED)
-                {
-                    if (ret < MCER_RECOVERED)
-                        ret = MCER_RECOVERED;
-                }
-                else if (mca_res.result == MCA_NO_ACTION)
-                {
-                    if (ret < MCER_CONTINUE)
-                        ret = MCER_CONTINUE;
-                }
+                handlers[i].recovery_handler(&binfo, &bank_result);
+                if (worst_result < bank_result)
+                    worst_result = bank_result;
                 break;
             }
         }
         ASSERT(i != handler_num);
     }
 
-    return ret;
+    return worst_result;
 }
 
 /*
@@ -608,7 +585,7 @@ struct mcinfo_recovery *mci_add_pageoff_
 
 static void intel_memerr_dhandler(
              struct mca_binfo *binfo,
-             struct mca_handle_result *result)
+             enum mce_result *result)
 {
     struct mcinfo_bank *bank = binfo->mib;
     struct mcinfo_global *global = binfo->mig;
@@ -618,7 +595,6 @@ static void intel_memerr_dhandler(
     uint64_t mc_status, mc_misc;
 
     mce_printk(MCE_VERBOSE, "MCE: Enter UCR recovery action\n");
-    result->result = MCA_NEED_RESET;
 
     mc_status = bank->mc_status;
     mc_misc = bank->mc_misc;
@@ -626,7 +602,6 @@ static void intel_memerr_dhandler(
         !(mc_status & MCi_STATUS_MISCV) ||
         ((mc_misc & MCi_MISC_ADDRMOD_MASK) != MCi_MISC_PHYSMOD) )
     {
-        result->result |= MCA_NO_ACTION;
         dprintk(XENLOG_WARNING,
             "No physical address provided for memory error\n");
         return;
@@ -644,23 +619,19 @@ static void intel_memerr_dhandler(
 
     /* This is free page */
     if (status & PG_OFFLINE_OFFLINED)
-        result->result = MCA_RECOVERED;
+        *result = MCER_RECOVERED;
     else if (status & PG_OFFLINE_PENDING) {
         /* This page has owner */
         if (status & PG_OFFLINE_OWNED) {
-            result->result |= MCA_OWNER;
-            result->owner = status >> PG_OFFLINE_OWNER_SHIFT;
+            bank->mc_domid = status >> PG_OFFLINE_OWNER_SHIFT;
             mce_printk(MCE_QUIET, "MCE: This error page is ownded"
-              " by DOM %d\n", result->owner);
-            /* Fill vMCE# injection and vMCE# MSR virtualization "
-             * "related data */
-            bank->mc_domid = result->owner;
+              " by DOM %d\n", bank->mc_domid);
             /* XXX: Cannot handle shared pages yet 
              * (this should identify all domains and gfn mapping to
              *  the mfn in question) */
-            BUG_ON( result->owner == DOMID_COW );
-            if ( result->owner != DOMID_XEN ) {
-                d = get_domain_by_id(result->owner);
+            BUG_ON( bank->mc_domid == DOMID_COW );
+            if ( bank->mc_domid != DOMID_XEN ) {
+                d = get_domain_by_id(bank->mc_domid);
                 ASSERT(d);
                 gfn = get_gpfn_from_mfn((bank->mc_addr) >> PAGE_SHIFT);
 
@@ -683,7 +654,7 @@ static void intel_memerr_dhandler(
                       global->mc_gstatus) == -1 )
                 {
                     mce_printk(MCE_QUIET, "Fill vMCE# data for DOM%d "
-                      "failed\n", result->owner);
+                      "failed\n", bank->mc_domid);
                     goto vmce_failed;
                 }
 
@@ -699,7 +670,7 @@ static void intel_memerr_dhandler(
                  * For xen, it has contained the error and finished
                  * its own recovery job.
                  */
-                result->result = MCA_RECOVERED;
+                *result = MCER_RECOVERED;
                 put_domain(d);
 
                 return;
@@ -718,11 +689,13 @@ static int intel_srao_check(uint64_t sta
 
 static void intel_srao_dhandler(
              struct mca_binfo *binfo,
-             struct mca_handle_result *result)
+             enum mce_result *result)
 {
     uint64_t status = binfo->mib->mc_status;
 
     /* For unknown srao error code, no action required */
+    *result = MCER_CONTINUE;
+
     if ( status & MCi_STATUS_VAL )
     {
         switch ( status & INTEL_MCCOD_MASK )
@@ -744,7 +717,7 @@ static int intel_default_check(uint64_t 
 
 static void intel_default_mce_dhandler(
              struct mca_binfo *binfo,
-             struct mca_handle_result *result)
+             enum mce_result *result)
 {
     uint64_t status = binfo->mib->mc_status;
     enum intel_mce_type type;
@@ -752,9 +725,9 @@ static void intel_default_mce_dhandler(
     type = intel_check_mce_type(status);
 
     if (type == intel_mce_fatal || type == intel_mce_ucr_srar)
-        result->result = MCA_NEED_RESET;
-    else if (type == intel_mce_ucr_srao)
-        result->result = MCA_NO_ACTION;
+        *result = MCER_RESET;
+    else
+        *result = MCER_CONTINUE;
 }
 
 static const struct mca_error_handler intel_mce_dhandlers[] = {
@@ -764,7 +737,7 @@ static const struct mca_error_handler in
 
 static void intel_default_mce_uhandler(
              struct mca_binfo *binfo,
-             struct mca_handle_result *result)
+             enum mce_result *result)
 {
     uint64_t status = binfo->mib->mc_status;
     enum intel_mce_type type;
@@ -776,10 +749,10 @@ static void intel_default_mce_uhandler(
     /* Panic if no handler for SRAR error */
     case intel_mce_ucr_srar:
     case intel_mce_fatal:
-        result->result = MCA_NEED_RESET;
+        *result = MCER_RESET;
         break;
     default:
-        result->result = MCA_NO_ACTION;
+        *result = MCER_CONTINUE;
         break;
     }
 }
diff -r e7cd79cd6626 xen/arch/x86/cpu/mcheck/x86_mca.h
--- a/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 13:39:40 2011 +0800
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Sun May 08 18:15:13 2011 +0800
@@ -124,36 +124,6 @@ void mcabanks_free(struct mca_banks *ban
 void mcabanks_free(struct mca_banks *banks);
 extern struct mca_banks *mca_allbanks;
 
-/* Below interfaces are defined for MCA internal processing:
- * a. pre_handler will be called early in MCA ISR context, mainly for early
- *    need_reset detection for avoiding log missing. Also, it is used to judge
- *    impacted DOMAIN if possible.
- * b. mca_error_handler is actually a (error_action_index,
- *    recovery_hanlder pointer) pair. The defined recovery_handler
- *    performs the actual recovery operations such as page_offline, cpu_offline
- *    in softIRQ context when the per_bank MCA error matching the corresponding
- *    mca_code index. If pre_handler can't judge the impacted domain,
- *    recovery_handler must figure it out.
-*/
-
-/* MCA error has been recovered successfully by the recovery action*/
-#define MCA_RECOVERED (0x1 << 0)
-/* MCA error impact the specified DOMAIN in owner field below */
-#define MCA_OWNER (0x1 << 1)
-/* MCA error can't be recovered and need reset */
-#define MCA_NEED_RESET (0x1 << 2)
-/* MCA error did not have any action yet */
-#define MCA_NO_ACTION (0x1 << 3)
-
-struct mca_handle_result
-{
-    uint32_t result;
-    /* Used one result & MCA_OWNER */
-    domid_t owner;
-    /* Used by mca_error_handler, result & MCA_RECOVRED */
-    struct recovery_action *action;
-};
-
 /*Keep bank so that we can get staus even if mib is NULL */
 struct mca_binfo {
     int bank;
@@ -163,8 +133,14 @@ struct mca_binfo {
     struct cpu_user_regs *regs;
 };
 
-extern void (*mca_prehandler)( struct cpu_user_regs *regs,
-                        struct mca_handle_result *result);
+enum mce_result
+{
+    MCER_NOERROR,
+    MCER_RECOVERED,
+    /* Not recoverd, but can continue */
+    MCER_CONTINUE,
+    MCER_RESET,
+};
 
 struct mca_error_handler
 {
@@ -175,7 +151,7 @@ struct mca_error_handler
     */
     int (*owned_error)(uint64_t status);
     void (*recovery_handler)(struct mca_binfo *binfo,
-                    struct mca_handle_result *result);
+                    enum mce_result *result);
 };
 
 /* Global variables */

Attachment: mca-cleanup-7.patch
Description: mca-cleanup-7.patch

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel