Hi,
this patch fixes an issue on Nehalem cpus discussed on the list
http://lists.xensource.com/archives/html/xen-devel/2009-10/msg01015.html
and further
http://lists.xensource.com/archives/html/xen-devel/2009-11/msg00011.html
but never a fix found it's way into xen sources. We had a fix in our private
tree but now we use the official SLES xen and so we met the issue again :-(
We didn't use the fix proposed on
http://lists.xensource.com/archives/html/xen-devel/2009-11/msg00100.html
because we saw some counter overflows got lost. So we tried the solution
proposed in the patch.
Thanks.
Dietmar
Fix an issue on Nehalem cpus where performance counter overflows may lead to
endless NMIs on this cpu.
Signed-off-by: Dietmar Hahn <dietmar.hahn@xxxxxxxxxxxxxx>
diff -r 7d2fdc083c9c -r 0ff5de47951c xen/arch/x86/hvm/vmx/vpmu_core2.c
--- a/xen/arch/x86/hvm/vmx/vpmu_core2.c Thu Nov 18 12:28:31 2010 +0000
+++ b/xen/arch/x86/hvm/vmx/vpmu_core2.c Fri Nov 19 10:34:31 2010 +0100
@@ -36,6 +36,70 @@
#include <asm/hvm/vpmu.h>
#include <asm/hvm/vmx/vpmu_core2.h>
+/*
+ * QUIRK to workaround an issue on Nehalem processors currently seen
+ * on family 6 cpus E5520 (model 26) and X7542 (model 46).
+ * The issue leads to endless NMI loops on the processor.
+ * If a counter triggers an NMI and while the NMI handler is running another
+ * counter overflows the second counter triggers endless new NMIs.
+ * A solution is to read all flagged counters and if the value is 0 write
+ * 1 into it.
+ */
+static int is_nmi_quirk;
+
+static void check_nmi_quirk(void)
+{
+ u8 family = current_cpu_data.x86;
+ u8 cpu_model = current_cpu_data.x86_model;
+ is_nmi_quirk = 0;
+ if ( family == 6 )
+ {
+ if ( cpu_model == 46 || cpu_model == 26 )
+ is_nmi_quirk = 1;
+ }
+}
+
+static int core2_get_pmc_count(void);
+static void handle_nmi_quirk(u64 msr_content)
+{
+ int num_gen_pmc = core2_get_pmc_count();
+ int num_fix_pmc = 3;
+ int i;
+ u64 val;
+
+ if ( !is_nmi_quirk )
+ return;
+
+ val = msr_content & ((1 << num_gen_pmc) - 1);
+ for ( i = 0; i < num_gen_pmc; i++ )
+ {
+ if ( val & 0x1 )
+ {
+ u64 cnt;
+ rdmsrl(MSR_P6_PERFCTR0 + i, cnt);
+ if ( cnt == 0 )
+ wrmsrl(MSR_P6_PERFCTR0 + i, 1);
+ }
+ val >>= 1;
+ }
+ val = (msr_content >> 32) & ((1 << num_fix_pmc) - 1);
+ for ( i = 0; i < num_fix_pmc; i++ )
+ {
+ if ( val & 0x1 )
+ {
+ u64 cnt;
+ rdmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, cnt);
+ if ( cnt == 0 )
+ wrmsrl(MSR_CORE_PERF_FIXED_CTR0 + i, 1);
+ }
+ val >>= 1;
+ }
+}
+
+#define CHECK_HANDLE_NMI_QUIRK(msr_content) \
+ if ( is_nmi_quirk ) \
+ handle_nmi_quirk(msr_content);
+
u32 core2_counters_msr[] = {
MSR_CORE_PERF_FIXED_CTR0,
MSR_CORE_PERF_FIXED_CTR1,
@@ -494,6 +558,9 @@
rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, msr_content);
if ( !msr_content )
return 0;
+
+ CHECK_HANDLE_NMI_QUIRK(msr_content)
+
core2_vpmu_cxt->global_ovf_status |= msr_content;
msr_content = 0xC000000700000000 | ((1 << core2_get_pmc_count()) - 1);
wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
@@ -515,6 +582,7 @@
static void core2_vpmu_initialise(struct vcpu *v)
{
+ check_nmi_quirk();
}
static void core2_vpmu_destroy(struct vcpu *v)
--
Company details: http://ts.fujitsu.com/imprint.html |