At least the MSR handling for guests can easily be made shared between the two vendors; likely a lot of the other code in mce_intel.c could also be made common. The goal here, however, is to eliminate the annoying guest-tried-to-modify-msr messages that result from enabling the MCE code on the Linux side. Additionally (in order for not having to make the same change twice to basically identical code) the patch also merges amd_{fam10,k8}_mcheck_init(), enables the former to also be used for Fam11 (I'd suppose that Fam12 would also need to go here, but I have no data to confirm that), and does some minor (mostly coding style for the code moved around) adjustments. Signed-off-by: Jan Beulich --- 2009-07-10.orig/xen/arch/x86/cpu/mcheck/amd_f10.c 2009-07-15 11:53:41.000000000 +0200 +++ 2009-07-10/xen/arch/x86/cpu/mcheck/amd_f10.c 2009-07-15 10:58:44.000000000 +0200 @@ -82,45 +82,16 @@ amd_f10_handler(struct mc_info *mi, uint return MCA_EXTINFO_LOCAL; } - -extern void k8_machine_check(struct cpu_user_regs *regs, long error_code); - /* AMD Family10 machine check */ int amd_f10_mcheck_init(struct cpuinfo_x86 *c) { - uint64_t value; - uint32_t i; - int cpu_nr; - - if (!cpu_has(c, X86_FEATURE_MCA)) + if (!amd_k8_mcheck_init(c)) return 0; - x86_mce_vector_register(k8_machine_check); x86_mce_callback_register(amd_f10_handler); - cpu_nr = smp_processor_id(); - - rdmsrl(MSR_IA32_MCG_CAP, value); - if (value & MCG_CTL_P) /* Control register present ? */ - wrmsrl (MSR_IA32_MCG_CTL, 0xffffffffffffffffULL); - nr_mce_banks = value & MCG_CAP_COUNT; - - for (i = 0; i < nr_mce_banks; i++) { - switch (i) { - case 4: /* Northbridge */ - /* Enable error reporting of all errors */ - wrmsrl(MSR_IA32_MC4_CTL, 0xffffffffffffffffULL); - wrmsrl(MSR_IA32_MC4_STATUS, 0x0ULL); - break; - default: - /* Enable error reporting of all errors */ - wrmsrl(MSR_IA32_MC0_CTL + 4 * i, 0xffffffffffffffffULL); - wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL); - break; - } - } + printk("CPU%i: AMD Family%xh machine check reporting enabled\n", + smp_processor_id(), c->x86); - set_in_cr4(X86_CR4_MCE); - printk("CPU%i: AMD Family10h machine check reporting enabled.\n", cpu_nr); return 1; } --- 2009-07-10.orig/xen/arch/x86/cpu/mcheck/amd_k8.c 2009-07-15 11:53:41.000000000 +0200 +++ 2009-07-10/xen/arch/x86/cpu/mcheck/amd_k8.c 2009-07-15 12:28:09.000000000 +0200 @@ -70,7 +70,7 @@ /* Machine Check Handler for AMD K8 family series */ -void k8_machine_check(struct cpu_user_regs *regs, long error_code) +static void k8_machine_check(struct cpu_user_regs *regs, long error_code) { mcheck_cmn_handler(regs, error_code, mca_allbanks); } @@ -78,29 +78,30 @@ void k8_machine_check(struct cpu_user_re /* AMD K8 machine check */ int amd_k8_mcheck_init(struct cpuinfo_x86 *c) { - uint64_t value; uint32_t i; - int cpu_nr; /* Check for PPro style MCA; our caller has confirmed MCE support. */ if (!cpu_has(c, X86_FEATURE_MCA)) return 0; + mce_cap_init(); x86_mce_vector_register(k8_machine_check); - cpu_nr = smp_processor_id(); - - rdmsrl(MSR_IA32_MCG_CAP, value); - if (value & MCG_CTL_P) /* Control register present ? */ - wrmsrl (MSR_IA32_MCG_CTL, 0xffffffffffffffffULL); - nr_mce_banks = value & MCG_CAP_COUNT; for (i = 0; i < nr_mce_banks; i++) { switch (i) { case 4: /* Northbridge */ - /* Enable error reporting of all errors */ - wrmsrl(MSR_IA32_MC4_CTL, 0xffffffffffffffffULL); - wrmsrl(MSR_IA32_MC4_STATUS, 0x0ULL); - break; + if (c->x86 == 0xf) { + /* + * Enable error reporting of all errors except + * for GART TBL walk error reporting, which + * trips off incorrectly with IOMMU & 3ware & + * Cerberus. + */ + wrmsrl(MSR_IA32_MC4_CTL, ~(1ULL << 10)); + wrmsrl(MSR_IA32_MC4_STATUS, 0x0ULL); + break; + } + /* fall through */ default: /* Enable error reporting of all errors */ @@ -111,7 +112,9 @@ int amd_k8_mcheck_init(struct cpuinfo_x8 } set_in_cr4(X86_CR4_MCE); - printk("CPU%i: AMD K8 machine check reporting enabled.\n", cpu_nr); + if (c->x86 < 0x10 || c->x86 > 0x11) + printk("CPU%i: AMD K8 machine check reporting enabled\n", + smp_processor_id()); return 1; } --- 2009-07-10.orig/xen/arch/x86/cpu/mcheck/mce.c 2009-07-15 11:53:41.000000000 +0200 +++ 2009-07-10/xen/arch/x86/cpu/mcheck/mce.c 2009-07-15 12:27:41.000000000 +0200 @@ -23,10 +23,12 @@ #include "mce.h" int mce_disabled = 0; +invbool_param("mce", mce_disabled); + int is_mc_panic = 0; unsigned int nr_mce_banks; -EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ +static uint64_t g_mcg_cap; static void intpose_init(void); static void mcinfo_clear(struct mc_info *); @@ -545,18 +547,17 @@ static int amd_mcheck_init(struct cpuinf rc = amd_k7_mcheck_init(ci); break; + default: + /* Assume that machine check support is available. + * The minimum provided support is at least the K8. */ case 0xf: rc = amd_k8_mcheck_init(ci); break; case 0x10: + case 0x11: rc = amd_f10_mcheck_init(ci); break; - - default: - /* Assume that machine check support is available. - * The minimum provided support is at least the K8. */ - rc = amd_k8_mcheck_init(ci); } return rc; @@ -647,19 +648,273 @@ void mcheck_init(struct cpuinfo_x86 *c) smp_processor_id()); } - -static void __init mcheck_disable(char *str) +u64 mce_cap_init(void) { - mce_disabled = 1; + u32 l, h; + u64 value; + + rdmsr(MSR_IA32_MCG_CAP, l, h); + value = ((u64)h << 32) | l; + /* For Guest vMCE usage */ + g_mcg_cap = value & ~MCG_CMCI_P; + + if (l & MCG_CTL_P) /* Control register present ? */ + wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); + + nr_mce_banks = l & MCG_CAP_COUNT; + if ( nr_mce_banks > MAX_NR_BANKS ) + { + printk(KERN_WARNING "MCE: exceed max mce banks\n"); + g_mcg_cap = (g_mcg_cap & ~MCG_CAP_COUNT) | MAX_NR_BANKS; + } + + return value; } -static void __init mcheck_enable(char *str) +/* Guest vMCE# MSRs virtualization ops (rdmsr/wrmsr) */ +void mce_init_msr(struct domain *d) { - mce_disabled = 0; + d->arch.vmca_msrs.mcg_status = 0x0; + d->arch.vmca_msrs.mcg_cap = g_mcg_cap; + d->arch.vmca_msrs.mcg_ctl = ~(uint64_t)0x0; + d->arch.vmca_msrs.nr_injection = 0; + memset(d->arch.vmca_msrs.mci_ctl, ~0, + sizeof(d->arch.vmca_msrs.mci_ctl)); + INIT_LIST_HEAD(&d->arch.vmca_msrs.impact_header); + spin_lock_init(&d->arch.vmca_msrs.lock); +} + +int mce_rdmsr(u32 msr, u32 *lo, u32 *hi) +{ + struct domain *d = current->domain; + int ret = 1; + unsigned int bank; + struct bank_entry *entry = NULL; + + *lo = *hi = 0x0; + spin_lock(&d->arch.vmca_msrs.lock); + + switch ( msr ) + { + case MSR_IA32_MCG_STATUS: + *lo = (u32)d->arch.vmca_msrs.mcg_status; + *hi = (u32)(d->arch.vmca_msrs.mcg_status >> 32); + gdprintk(XENLOG_DEBUG, "MCE: rd MCG_STATUS lo %x hi %x\n", *lo, *hi); + break; + case MSR_IA32_MCG_CAP: + *lo = (u32)d->arch.vmca_msrs.mcg_cap; + *hi = (u32)(d->arch.vmca_msrs.mcg_cap >> 32); + gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCG_CAP lo %x hi %x\n", *lo, *hi); + break; + case MSR_IA32_MCG_CTL: + *lo = (u32)d->arch.vmca_msrs.mcg_ctl; + *hi = (u32)(d->arch.vmca_msrs.mcg_ctl >> 32); + gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCG_CTL lo %x hi %x\n", *lo, *hi); + break; + case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1: + bank = (msr - MSR_IA32_MC0_CTL) / 4; + if ( bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT) ) + { + gdprintk(XENLOG_WARNING, "MCE: bank %u does not exist\n", bank); + ret = -1; + break; + } + switch (msr & (MSR_IA32_MC0_CTL | 3)) + { + case MSR_IA32_MC0_CTL: + *lo = (u32)d->arch.vmca_msrs.mci_ctl[bank]; + *hi = (u32)(d->arch.vmca_msrs.mci_ctl[bank] >> 32); + gdprintk(XENLOG_DEBUG, "MCE: rd MC%u_CTL lo %x hi %x\n", + bank, *lo, *hi); + break; + case MSR_IA32_MC0_STATUS: + /* Only error bank is read. Non-error banks simply return. */ + if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) + { + entry = list_entry(d->arch.vmca_msrs.impact_header.next, + struct bank_entry, list); + if (entry->bank == bank) { + *lo = entry->mci_status; + *hi = entry->mci_status >> 32; + gdprintk(XENLOG_DEBUG, + "MCE: rd MC%u_STATUS in vMCE# context " + "lo %x hi %x\n", bank, *lo, *hi); + } + else + entry = NULL; + } + if ( !entry ) + gdprintk(XENLOG_DEBUG, "MCE: rd MC%u_STATUS\n", bank); + break; + case MSR_IA32_MC0_ADDR: + if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) + { + entry = list_entry(d->arch.vmca_msrs.impact_header.next, + struct bank_entry, list); + if ( entry->bank == bank ) + { + *lo = entry->mci_addr; + *hi = entry->mci_addr >> 32; + gdprintk(XENLOG_DEBUG, + "MCE: rd MC%u_ADDR in vMCE# context lo %x hi %x\n", + bank, *lo, *hi); + } + } + break; + case MSR_IA32_MC0_MISC: + if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) + { + entry = list_entry(d->arch.vmca_msrs.impact_header.next, + struct bank_entry, list); + if ( entry->bank == bank ) + { + *lo = entry->mci_misc; + *hi = entry->mci_misc >> 32; + gdprintk(XENLOG_DEBUG, + "MCE: rd MC%u_MISC in vMCE# context lo %x hi %x\n", + bank, *lo, *hi); + } + } + break; + } + break; + default: + switch ( boot_cpu_data.x86_vendor ) + { + case X86_VENDOR_INTEL: + ret = intel_mce_rdmsr(msr, lo, hi); + break; + default: + ret = 0; + break; + } + break; + } + + spin_unlock(&d->arch.vmca_msrs.lock); + return ret; } -custom_param("nomce", mcheck_disable); -custom_param("mce", mcheck_enable); +int mce_wrmsr(u32 msr, u64 value) +{ + struct domain *d = current->domain; + struct bank_entry *entry = NULL; + unsigned int bank; + int ret = 1; + + if ( !g_mcg_cap ) + return 0; + + spin_lock(&d->arch.vmca_msrs.lock); + + switch ( msr ) + { + case MSR_IA32_MCG_CTL: + if ( value && (value + 1) ) + { + gdprintk(XENLOG_WARNING, "MCE: value written to MCG_CTL" + "should be all 0s or 1s\n"); + ret = -1; + break; + } + d->arch.vmca_msrs.mcg_ctl = value; + break; + case MSR_IA32_MCG_STATUS: + d->arch.vmca_msrs.mcg_status = value; + gdprintk(XENLOG_DEBUG, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", value); + /* For HVM guest, this is the point for deleting vMCE injection node */ + if ( d->is_hvm && (d->arch.vmca_msrs.nr_injection > 0) ) + { + d->arch.vmca_msrs.nr_injection--; /* Should be 0 */ + if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) + { + entry = list_entry(d->arch.vmca_msrs.impact_header.next, + struct bank_entry, list); + if ( entry->mci_status & MCi_STATUS_VAL ) + gdprintk(XENLOG_ERR, "MCE: MCi_STATUS MSR should have " + "been cleared before write MCG_STATUS MSR\n"); + + gdprintk(XENLOG_DEBUG, "MCE: Delete HVM last injection " + "Node, nr_injection %u\n", + d->arch.vmca_msrs.nr_injection); + list_del(&entry->list); + } + else + gdprintk(XENLOG_DEBUG, "MCE: Not found HVM guest" + " last injection Node, something Wrong!\n"); + } + break; + case MSR_IA32_MCG_CAP: + gdprintk(XENLOG_WARNING, "MCE: MCG_CAP is read-only\n"); + ret = -1; + break; + case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1: + bank = (msr - MSR_IA32_MC0_CTL) / 4; + if ( bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT) ) + { + gdprintk(XENLOG_WARNING, "MCE: bank %u does not exist\n", bank); + ret = -1; + break; + } + switch ( msr & (MSR_IA32_MC0_CTL | 3) ) + { + case MSR_IA32_MC0_CTL: + if ( value && (value + 1) ) + { + gdprintk(XENLOG_WARNING, "MCE: value written to MC%u_CTL" + "should be all 0s or 1s (is %"PRIx64")\n", + bank, value); + ret = -1; + break; + } + d->arch.vmca_msrs.mci_ctl[bank] = value; + break; + case MSR_IA32_MC0_STATUS: + /* Give the first entry of the list, it corresponds to current + * vMCE# injection. When vMCE# is finished processing by the + * the guest, this node will be deleted. + * Only error bank is written. Non-error banks simply return. + */ + if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) + { + entry = list_entry(d->arch.vmca_msrs.impact_header.next, + struct bank_entry, list); + if ( entry->bank == bank ) + entry->mci_status = value; + gdprintk(XENLOG_DEBUG, + "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", + bank, value); + } + else + gdprintk(XENLOG_DEBUG, + "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, value); + break; + case MSR_IA32_MC0_ADDR: + gdprintk(XENLOG_WARNING, "MCE: MC%u_ADDR is read-only\n", bank); + ret = -1; + break; + case MSR_IA32_MC0_MISC: + gdprintk(XENLOG_WARNING, "MCE: MC%u_MISC is read-only\n", bank); + ret = -1; + break; + } + break; + default: + switch ( boot_cpu_data.x86_vendor ) + { + case X86_VENDOR_INTEL: + ret = intel_mce_wrmsr(msr, value); + break; + default: + ret = 0; + break; + } + break; + } + + spin_unlock(&d->arch.vmca_msrs.lock); + return ret; +} static void mcinfo_clear(struct mc_info *mi) { --- 2009-07-10.orig/xen/arch/x86/cpu/mcheck/mce.h 2009-07-15 11:53:41.000000000 +0200 +++ 2009-07-10/xen/arch/x86/cpu/mcheck/mce.h 2009-07-15 11:52:08.000000000 +0200 @@ -25,6 +25,11 @@ void intel_mcheck_timer(struct cpuinfo_x void mce_intel_feature_init(struct cpuinfo_x86 *c); void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c); +u64 mce_cap_init(void); + +int intel_mce_rdmsr(u32 msr, u32 *lo, u32 *hi); +int intel_mce_wrmsr(u32 msr, u64 value); + int mce_available(struct cpuinfo_x86 *c); int mce_firstbank(struct cpuinfo_x86 *c); /* Helper functions used for collecting error telemetry */ --- 2009-07-10.orig/xen/arch/x86/cpu/mcheck/mce_intel.c 2009-07-15 11:53:41.000000000 +0200 +++ 2009-07-10/xen/arch/x86/cpu/mcheck/mce_intel.c 2009-07-15 12:27:34.000000000 +0200 @@ -995,14 +995,9 @@ void mce_intel_feature_init(struct cpuin intel_init_cmci(c); } -static uint64_t g_mcg_cap; -static void mce_cap_init(struct cpuinfo_x86 *c) +static void _mce_cap_init(struct cpuinfo_x86 *c) { - u32 l, h; - - rdmsr (MSR_IA32_MCG_CAP, l, h); - /* For Guest vMCE usage */ - g_mcg_cap = ((u64)h << 32 | l) & (~MCG_CMCI_P); + u32 l = mce_cap_init(); if ((l & MCG_CMCI_P) && cpu_has_apic) cmci_support = 1; @@ -1011,12 +1006,6 @@ static void mce_cap_init(struct cpuinfo_ if (l & MCG_SER_P) ser_support = 1; - nr_mce_banks = l & MCG_CAP_COUNT; - if (nr_mce_banks > MAX_NR_BANKS) - { - printk(KERN_WARNING "MCE: exceed max mce banks\n"); - g_mcg_cap = (g_mcg_cap & ~MCG_CAP_COUNT) | MAX_NR_BANKS; - } if (l & MCG_EXT_P) { nr_intel_ext_msrs = (l >> MCG_EXT_CNT) & 0xff; @@ -1052,9 +1041,6 @@ static void mce_init(void) } set_in_cr4(X86_CR4_MCE); - rdmsr (MSR_IA32_MCG_CAP, l, h); - if (l & MCG_CTL_P) /* Control register present ? */ - wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); for (i = firstbank; i < nr_mce_banks; i++) { @@ -1076,7 +1062,7 @@ static void mce_init(void) /* p4/p6 family have similar MCA initialization process */ int intel_mcheck_init(struct cpuinfo_x86 *c) { - mce_cap_init(c); + _mce_cap_init(c); printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", smp_processor_id()); @@ -1094,220 +1080,39 @@ int intel_mcheck_init(struct cpuinfo_x86 return 1; } -/* Guest vMCE# MSRs virtualization ops (rdmsr/wrmsr) */ -void intel_mce_init_msr(struct domain *d) -{ - d->arch.vmca_msrs.mcg_status = 0x0; - d->arch.vmca_msrs.mcg_cap = g_mcg_cap; - d->arch.vmca_msrs.mcg_ctl = (uint64_t)~0x0; - d->arch.vmca_msrs.nr_injection = 0; - memset(d->arch.vmca_msrs.mci_ctl, ~0, - sizeof(d->arch.vmca_msrs.mci_ctl)); - INIT_LIST_HEAD(&d->arch.vmca_msrs.impact_header); - spin_lock_init(&d->arch.vmca_msrs.lock); -} - int intel_mce_wrmsr(u32 msr, u64 value) { - struct domain *d = current->domain; - struct bank_entry *entry = NULL; - unsigned int bank; int ret = 1; - spin_lock(&d->arch.vmca_msrs.lock); - switch(msr) + switch ( msr ) { - case MSR_IA32_MCG_CTL: - if (value != (u64)~0x0 && value != 0x0) { - gdprintk(XENLOG_WARNING, "MCE: value written to MCG_CTL" - "should be all 0s or 1s\n"); - ret = -1; - break; - } - d->arch.vmca_msrs.mcg_ctl = value; - break; - case MSR_IA32_MCG_STATUS: - d->arch.vmca_msrs.mcg_status = value; - gdprintk(XENLOG_DEBUG, "MCE: wrmsr MCG_STATUS %"PRIx64"\n", value); - /* For HVM guest, this is the point for deleting vMCE injection node */ - if ( (d->is_hvm) && (d->arch.vmca_msrs.nr_injection >0) ) - { - d->arch.vmca_msrs.nr_injection--; /* Should be 0 */ - if (!list_empty(&d->arch.vmca_msrs.impact_header)) { - entry = list_entry(d->arch.vmca_msrs.impact_header.next, - struct bank_entry, list); - if (entry->mci_status & MCi_STATUS_VAL) - gdprintk(XENLOG_ERR, "MCE: MCi_STATUS MSR should have " - "been cleared before write MCG_STATUS MSR\n"); - - gdprintk(XENLOG_DEBUG, "MCE: Delete HVM last injection " - "Node, nr_injection %u\n", - d->arch.vmca_msrs.nr_injection); - list_del(&entry->list); - } - else - gdprintk(XENLOG_DEBUG, "MCE: Not found HVM guest" - " last injection Node, something Wrong!\n"); - } - break; - case MSR_IA32_MCG_CAP: - gdprintk(XENLOG_WARNING, "MCE: MCG_CAP is read-only\n"); - ret = -1; - break; case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC0_CTL2 + MAX_NR_BANKS - 1: gdprintk(XENLOG_WARNING, "We have disabled CMCI capability, " "Guest should not write this MSR!\n"); break; - case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1: - bank = (msr - MSR_IA32_MC0_CTL) / 4; - if (bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT)) { - gdprintk(XENLOG_WARNING, "MCE: bank %u does not exist\n", bank); - ret = -1; - break; - } - switch (msr & (MSR_IA32_MC0_CTL | 3)) - { - case MSR_IA32_MC0_CTL: - if (value != (u64)~0x0 && value != 0x0) { - gdprintk(XENLOG_WARNING, "MCE: value written to MC%u_CTL" - "should be all 0s or 1s (is %"PRIx64")\n", - bank, value); - ret = -1; - break; - } - d->arch.vmca_msrs.mci_ctl[(msr - MSR_IA32_MC0_CTL)/4] = value; - break; - case MSR_IA32_MC0_STATUS: - /* Give the first entry of the list, it corresponds to current - * vMCE# injection. When vMCE# is finished processing by the - * the guest, this node will be deleted. - * Only error bank is written. Non-error banks simply return. - */ - if (!list_empty(&d->arch.vmca_msrs.impact_header)) { - entry = list_entry(d->arch.vmca_msrs.impact_header.next, - struct bank_entry, list); - if ( entry->bank == bank ) - entry->mci_status = value; - gdprintk(XENLOG_DEBUG, - "MCE: wr MC%u_STATUS %"PRIx64" in vMCE#\n", - bank, value); - } else - gdprintk(XENLOG_DEBUG, - "MCE: wr MC%u_STATUS %"PRIx64"\n", bank, value); - break; - case MSR_IA32_MC0_ADDR: - gdprintk(XENLOG_WARNING, "MCE: MC%u_ADDR is read-only\n", bank); - ret = -1; - break; - case MSR_IA32_MC0_MISC: - gdprintk(XENLOG_WARNING, "MCE: MC%u_MISC is read-only\n", bank); - ret = -1; - break; - } - break; default: ret = 0; break; } - spin_unlock(&d->arch.vmca_msrs.lock); + return ret; } int intel_mce_rdmsr(u32 msr, u32 *lo, u32 *hi) { - struct domain *d = current->domain; int ret = 1; - unsigned int bank; - struct bank_entry *entry = NULL; - *lo = *hi = 0x0; - spin_lock(&d->arch.vmca_msrs.lock); - switch(msr) + switch ( msr ) { - case MSR_IA32_MCG_STATUS: - *lo = (u32)d->arch.vmca_msrs.mcg_status; - *hi = (u32)(d->arch.vmca_msrs.mcg_status >> 32); - gdprintk(XENLOG_DEBUG, "MCE: rd MCG_STATUS lo %x hi %x\n", *lo, *hi); - break; - case MSR_IA32_MCG_CAP: - *lo = (u32)d->arch.vmca_msrs.mcg_cap; - *hi = (u32)(d->arch.vmca_msrs.mcg_cap >> 32); - gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCG_CAP lo %x hi %x\n", *lo, *hi); - break; - case MSR_IA32_MCG_CTL: - *lo = (u32)d->arch.vmca_msrs.mcg_ctl; - *hi = (u32)(d->arch.vmca_msrs.mcg_ctl >> 32); - gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCG_CTL lo %x hi %x\n", *lo, *hi); - break; case MSR_IA32_MC0_CTL2 ... MSR_IA32_MC0_CTL2 + MAX_NR_BANKS - 1: gdprintk(XENLOG_WARNING, "We have disabled CMCI capability, " "Guest should not read this MSR!\n"); break; - case MSR_IA32_MC0_CTL ... MSR_IA32_MC0_CTL + 4 * MAX_NR_BANKS - 1: - bank = (msr - MSR_IA32_MC0_CTL) / 4; - if (bank >= (d->arch.vmca_msrs.mcg_cap & MCG_CAP_COUNT)) { - gdprintk(XENLOG_WARNING, "MCE: bank %u does not exist\n", bank); - ret = -1; - break; - } - switch (msr & (MSR_IA32_MC0_CTL | 3)) - { - case MSR_IA32_MC0_CTL: - *lo = (u32)d->arch.vmca_msrs.mci_ctl[bank]; - *hi = (u32)(d->arch.vmca_msrs.mci_ctl[bank] >> 32); - gdprintk(XENLOG_DEBUG, "MCE: rd MC%u_CTL lo %x hi %x\n", - bank, *lo, *hi); - break; - case MSR_IA32_MC0_STATUS: - /* Only error bank is read. Non-error banks simply return. */ - if (!list_empty(&d->arch.vmca_msrs.impact_header)) { - entry = list_entry(d->arch.vmca_msrs.impact_header.next, - struct bank_entry, list); - if (entry->bank == bank) { - *lo = entry->mci_status; - *hi = entry->mci_status >> 32; - gdprintk(XENLOG_DEBUG, - "MCE: rd MC%u_STATUS in vmCE# context " - "lo %x hi %x\n", bank, *lo, *hi); - } else - entry = NULL; - } - if (!entry) - gdprintk(XENLOG_DEBUG, "MCE: rd MC%u_STATUS\n", bank); - break; - case MSR_IA32_MC0_ADDR: - if (!list_empty(&d->arch.vmca_msrs.impact_header)) { - entry = list_entry(d->arch.vmca_msrs.impact_header.next, - struct bank_entry, list); - if (entry->bank == bank) { - *lo = entry->mci_addr; - *hi = entry->mci_addr >> 32; - gdprintk(XENLOG_DEBUG, - "MCE: rd MC%u_ADDR in vMCE# context lo %x hi %x\n", - bank, *lo, *hi); - } - } - break; - case MSR_IA32_MC0_MISC: - if (!list_empty(&d->arch.vmca_msrs.impact_header)) { - entry = list_entry(d->arch.vmca_msrs.impact_header.next, - struct bank_entry, list); - if (entry->bank == bank) { - *lo = entry->mci_misc; - *hi = entry->mci_misc >> 32; - gdprintk(XENLOG_DEBUG, - "MCE: rd MC%u_MISC in vMCE# context lo %x hi %x\n", - bank, *lo, *hi); - } - } - break; - } - break; default: ret = 0; break; } - spin_unlock(&d->arch.vmca_msrs.lock); + return ret; } --- 2009-07-10.orig/xen/arch/x86/domain.c 2009-07-15 11:53:41.000000000 +0200 +++ 2009-07-10/xen/arch/x86/domain.c 2009-07-15 11:18:24.000000000 +0200 @@ -493,8 +493,7 @@ int arch_domain_create(struct domain *d, goto fail; /* For Guest vMCE MSRs virtualization */ - if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) - intel_mce_init_msr(d); + mce_init_msr(d); } if ( is_hvm_domain(d) ) --- 2009-07-10.orig/xen/arch/x86/hvm/hvm.c 2009-07-15 11:53:41.000000000 +0200 +++ 2009-07-10/xen/arch/x86/hvm/hvm.c 2009-07-15 12:07:48.000000000 +0200 @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -1773,8 +1774,6 @@ void hvm_rdtsc_intercept(struct cpu_user regs->edx = (uint32_t)(tsc >> 32); } -extern int intel_mce_rdmsr(u32 msr, u32 *lo, u32 *hi); -extern int intel_mce_wrmsr(u32 msr, u64 value); int hvm_msr_read_intercept(struct cpu_user_regs *regs) { uint32_t ecx = regs->ecx; @@ -1852,7 +1851,7 @@ int hvm_msr_read_intercept(struct cpu_us break; default: - ret = intel_mce_rdmsr(ecx, &lo, &hi); + ret = mce_rdmsr(ecx, &lo, &hi); if ( ret < 0 ) goto gp_fault; else if ( ret ) @@ -1951,7 +1950,7 @@ int hvm_msr_write_intercept(struct cpu_u break; default: - ret = intel_mce_wrmsr(ecx, msr_content); + ret = mce_wrmsr(ecx, msr_content); if ( ret < 0 ) goto gp_fault; else if ( ret ) --- 2009-07-10.orig/xen/arch/x86/traps.c 2009-07-15 11:53:41.000000000 +0200 +++ 2009-07-10/xen/arch/x86/traps.c 2009-07-15 12:07:19.000000000 +0200 @@ -1681,7 +1681,8 @@ static int emulate_privileged_op(struct unsigned long *reg, eip = regs->eip, res; u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0, lock = 0, rex = 0; enum { lm_seg_none, lm_seg_fs, lm_seg_gs } lm_ovr = lm_seg_none; - unsigned int port, i, data_sel, ar, data, rc, bpmatch = 0; + int rc; + unsigned int port, i, data_sel, ar, data, bpmatch = 0; unsigned int op_bytes, op_default, ad_bytes, ad_default; #define rd_ad(reg) (ad_bytes >= sizeof(regs->reg) \ ? regs->reg \ @@ -2246,14 +2247,12 @@ static int emulate_privileged_op(struct default: if ( wrmsr_hypervisor_regs(regs->ecx, eax, edx) ) break; - if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) - { - int rc = intel_mce_wrmsr(regs->ecx, res); - if ( rc < 0 ) - goto fail; - if ( rc ) - break; - } + + rc = mce_wrmsr(regs->ecx, res); + if ( rc < 0 ) + goto fail; + if ( rc ) + break; if ( (rdmsr_safe(regs->ecx, l, h) != 0) || (eax != l) || (edx != h) ) @@ -2335,15 +2334,11 @@ static int emulate_privileged_op(struct break; } - if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL ) - { - int rc = intel_mce_rdmsr(regs->ecx, &l, &h); - - if ( rc < 0 ) - goto fail; - if ( rc ) - goto rdmsr_writeback; - } + rc = mce_rdmsr(regs->ecx, &l, &h); + if ( rc < 0 ) + goto fail; + if ( rc ) + goto rdmsr_writeback; /* Everyone can read the MSR space. */ /* gdprintk(XENLOG_WARNING,"Domain attempted RDMSR %p.\n", --- 2009-07-10.orig/xen/include/asm-x86/traps.h 2009-07-15 11:53:41.000000000 +0200 +++ 2009-07-10/xen/include/asm-x86/traps.h 2009-07-15 11:19:14.000000000 +0200 @@ -47,9 +47,9 @@ extern int guest_has_trap_callback(struc extern int send_guest_trap(struct domain *d, uint16_t vcpuid, unsigned int trap_nr); -/* Intel vMCE MSRs virtualization */ -extern void intel_mce_init_msr(struct domain *d); -extern int intel_mce_wrmsr(u32 msr, u64 value); -extern int intel_mce_rdmsr(u32 msr, u32 *lo, u32 *hi); +/* Guest vMCE MSRs virtualization */ +extern void mce_init_msr(struct domain *d); +extern int mce_wrmsr(u32 msr, u64 value); +extern int mce_rdmsr(u32 msr, u32 *lo, u32 *hi); #endif /* ASM_TRAP_H */