# HG changeset patch
# User Keir Fraser <keir.fraser@xxxxxxxxxx>
# Date 1237569929 0
# Node ID 891af2c54155afc4ca47a8e8eb8f6865b2f76f0f
# Parent cc60defe5b9697ab0e068caa4fd1f8798bfe5104
MCA interfaces between XEN/DOM0, let DOM0 know the MCA recovery action
Signed-off-by: Jiang, yunhong <yunhong.jiang@xxxxxxxxx>
Signed-off-by: Ke, liping <liping.ke@xxxxxxxxx>
---
xen/arch/x86/cpu/mcheck/x86_mca.h | 47 +++++++++++++++++++++++++
xen/include/public/arch-x86/xen-mca.h | 63 ++++++++++++++++++++++++++++++++++
2 files changed, 110 insertions(+)
diff -r cc60defe5b96 -r 891af2c54155 xen/arch/x86/cpu/mcheck/x86_mca.h
--- a/xen/arch/x86/cpu/mcheck/x86_mca.h Fri Mar 20 17:24:53 2009 +0000
+++ b/xen/arch/x86/cpu/mcheck/x86_mca.h Fri Mar 20 17:25:29 2009 +0000
@@ -87,6 +87,53 @@ typedef DECLARE_BITMAP(cpu_banks_t, MAX_
typedef DECLARE_BITMAP(cpu_banks_t, MAX_NR_BANKS);
DECLARE_PER_CPU(cpu_banks_t, mce_banks_owned);
+/* Below interfaces are defined for MCA internal processing:
+ * a. pre_handler will be called early in MCA ISR context, mainly for early
+ * need_reset detection for avoiding log missing. Also, it is used to judge
+ * impacted DOMAIN if possible.
+ * b. mca_error_handler is actually a (error_action_index,
+ * recovery_hanlder pointer) pair. The defined recovery_handler
+ * performs the actual recovery operations such as page_offline, cpu_offline
+ * in softIRQ context when the per_bank MCA error matching the corresponding
+ * mca_code index. If pre_handler can't judge the impacted domain,
+ * recovery_handler must figure it out.
+*/
+
+/* MCA error has been recovered successfully by the recovery action*/
+#define MCA_RECOVERED (0x1 < 0)
+/* MCA error impact the specified DOMAIN in owner field below */
+#define MCA_OWNER (0x1 < 1)
+/* MCA error can't be recovered and need reset */
+#define MCA_NEED_RESET (0x1 < 2)
+/* MCA error need further actions in softIRQ context for recovery */
+#define MCA_MORE_ACTION (0x1 < 3)
+
+struct mca_handle_result
+{
+ uint32_t result;
+ /* Used one result & MCA_OWNER */
+ domid_t owner;
+ /* Used by mca_error_handler, result & MCA_RECOVRED */
+ struct recovery_action *action;
+};
+
+extern void (*mca_prehandler)( struct cpu_user_regs *regs,
+ struct mca_handle_result *result);
+
+struct mca_error_handler
+{
+ /* Assume corresponding recovery action could be uniquely
+ * identified by mca_code. Otherwise, we might need to have
+ * a seperate function to decode the corresponding actions
+ * for the particular mca error later.
+ */
+ uint16_t mca_code;
+ void (*recovery_handler)( struct mcinfo_bank *bank,
+ struct mcinfo_global *global,
+ struct mcinfo_extended *extension,
+ struct mca_handle_result *result);
+};
+
/* Global variables */
extern int mce_disabled;
extern unsigned int nr_mce_banks;
diff -r cc60defe5b96 -r 891af2c54155 xen/include/public/arch-x86/xen-mca.h
--- a/xen/include/public/arch-x86/xen-mca.h Fri Mar 20 17:24:53 2009 +0000
+++ b/xen/include/public/arch-x86/xen-mca.h Fri Mar 20 17:25:29 2009 +0000
@@ -104,6 +104,7 @@
#define MC_TYPE_GLOBAL 0
#define MC_TYPE_BANK 1
#define MC_TYPE_EXTENDED 2
+#define MC_TYPE_RECOVERY 3
struct mcinfo_common {
uint16_t type; /* structure type */
@@ -171,6 +172,68 @@ struct mcinfo_extended {
*/
struct mcinfo_msr mc_msr[10];
};
+
+/* Recovery Action flags. Giving recovery result information to DOM0 */
+
+/* Xen takes successful recovery action, the error is recovered */
+#define REC_ACTION_RECOVERED (0x1 << 0)
+/* No action is performed by XEN */
+#define REC_ACTION_NONE (0x1 << 1)
+/* It's possible DOM0 might take action ownership in some case */
+#define REC_ACTION_NEED_RESET (0x1 << 2)
+
+/* Different Recovery Action types, if the action is performed successfully,
+ * REC_ACTION_RECOVERED flag will be returned.
+ */
+
+/* Page Offline Action */
+#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
+/* CPU offline Action */
+#define MC_ACTION_CPU_OFFLINE (0x1 << 1)
+/* L3 cache disable Action */
+#define MC_ACTION_CACHE_SHRINK (0x1 << 2)
+
+/* Below interface used between XEN/DOM0 for passing XEN's recovery action
+ * information to DOM0.
+ * usage Senario: After offlining broken page, XEN might pass its page offline
+ * recovery action result to DOM0. DOM0 will save the information in
+ * non-volatile memory for further proactive actions, such as offlining the
+ * easy broken page earlier when doing next reboot.
+*/
+struct page_offline_action
+{
+ /* Params for passing the offlined page number to DOM0 */
+ uint64_t mfn;
+ uint64_t status;
+};
+
+struct cpu_offline_action
+{
+ /* Params for passing the identity of the offlined CPU to DOM0 */
+ uint32_t mc_socketid;
+ uint16_t mc_coreid;
+ uint16_t mc_core_threadid;
+};
+
+#define MAX_UNION_SIZE 16
+struct mc_recovery
+{
+ uint16_t mc_bank; /* bank nr */
+ uint8_t action_flags;
+ uint8_t action_types;
+ union {
+ struct page_offline_action page_retire;
+ struct cpu_offline_action cpu_offline;
+ uint8_t pad[MAX_UNION_SIZE];
+ } action_info;
+};
+
+struct mcinfo_recovery
+{
+ struct mcinfo_common common;
+ struct mc_recovery mc_action;
+};
+
#define MCINFO_HYPERCALLSIZE 1024
#define MCINFO_MAXSIZE 768
_______________________________________________
Xen-changelog mailing list
Xen-changelog@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-changelog
|