[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [RFC][PATCH 2/2] MCA support for Intel64


  • To: xen-devel@xxxxxxxxxxxxxxxxxxx
  • From: SUZUKI Kazuhiro <kaz@xxxxxxxxxxxxxx>
  • Date: Fri, 26 Sep 2008 15:04:56 +0900 (JST)
  • Delivery-date: Thu, 25 Sep 2008 23:07:11 -0700
  • List-id: Xen developer discussion <xen-devel.lists.xensource.com>

Hi,

This is a linux/x86_64 part patch.

Signed-off-by: Kazuhiro Suzuki <kaz@xxxxxxxxxxxxxx>

Thanks,
KAZ
diff -r 916aae9cc11a arch/x86_64/Kconfig
--- a/arch/x86_64/Kconfig       Mon Sep 22 16:08:10 2008 +0100
+++ b/arch/x86_64/Kconfig       Fri Sep 26 14:59:11 2008 +0900
@@ -472,7 +472,6 @@
 
 config X86_MCE
        bool "Machine check support" if EMBEDDED
-       depends on !X86_64_XEN
        default y
        help
           Include a machine check error handler to report hardware errors.
diff -r 916aae9cc11a arch/x86_64/kernel/apic-xen.c
--- a/arch/x86_64/kernel/apic-xen.c     Mon Sep 22 16:08:10 2008 +0100
+++ b/arch/x86_64/kernel/apic-xen.c     Fri Sep 26 14:59:11 2008 +0900
@@ -60,6 +60,14 @@
 int setup_profiling_timer(unsigned int multiplier)
 {
        return -EINVAL;
+}
+
+void setup_APIC_extened_lvt(unsigned char lvt_off, unsigned char vector,
+                           unsigned char msg_type, unsigned char mask)
+{
+       unsigned long reg = (lvt_off << 4) + K8_APIC_EXT_LVT_BASE;
+       unsigned int  v   = (mask << 16) | (msg_type << 8) | vector;
+       apic_write(reg, v);
 }
 
 void smp_local_timer_interrupt(struct pt_regs *regs)
diff -r 916aae9cc11a arch/x86_64/kernel/entry-xen.S
--- a/arch/x86_64/kernel/entry-xen.S    Mon Sep 22 16:08:10 2008 +0100
+++ b/arch/x86_64/kernel/entry-xen.S    Fri Sep 26 14:59:11 2008 +0900
@@ -1259,12 +1259,13 @@
 #ifdef CONFIG_X86_MCE
        /* runs on exception stack */
 ENTRY(machine_check)
-       INTR_FRAME
+/*     INTR_FRAME
        pushq $0
        CFI_ADJUST_CFA_OFFSET 8 
-       paranoidentry do_machine_check
-       jmp paranoid_exit1
-       CFI_ENDPROC
+       paranoidentry do_machine_check*/
+       errorentry do_machine_check
+/*     jmp paranoid_exit1
+       CFI_ENDPROC*/
 END(machine_check)
 #endif
 
diff -r 916aae9cc11a arch/x86_64/kernel/mce-xen.c
--- /dev/null   Thu Jan 01 00:00:00 1970 +0000
+++ b/arch/x86_64/kernel/mce-xen.c      Fri Sep 26 14:59:11 2008 +0900
@@ -0,0 +1,683 @@
+/*
+ * Machine check handler.
+ * K8 parts Copyright 2002,2003 Andi Kleen, SuSE Labs.
+ * Rest from unknown author(s). 
+ * 2004 Andi Kleen. Rewrote most of it. 
+ */
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/string.h>
+#include <linux/rcupdate.h>
+#include <linux/kallsyms.h>
+#include <linux/sysdev.h>
+#include <linux/miscdevice.h>
+#include <linux/fs.h>
+#include <linux/capability.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
+#include <linux/ctype.h>
+#include <asm/processor.h> 
+#include <asm/msr.h>
+#include <asm/mce.h>
+#include <asm/kdebug.h>
+#include <asm/uaccess.h>
+#include <asm/smp.h>
+
+#include <xen/evtchn.h>
+#include <xen/interface/vcpu.h>
+
+#define MISC_MCELOG_MINOR 227
+#define NR_BANKS 6
+
+atomic_t mce_entry;
+
+static int mce_dont_init;
+
+/* 0: always panic, 1: panic if deadlock possible, 2: try to avoid panic,
+   3: never panic or exit (for testing only) */
+static int tolerant = 1;
+static int banks;
+static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
+static int mce_bootlog = 1;
+
+/*
+ * Lockless MCE logging infrastructure.
+ * This avoids deadlocks on printk locks without having to break locks. Also
+ * separate MCEs from kernel messages to avoid bogus bug reports.
+ */
+
+struct mce_log mcelog = { 
+       MCE_LOG_SIGNATURE,
+       MCE_LOG_LEN,
+}; 
+
+void mce_log(struct mce *mce)
+{
+       unsigned next, entry;
+       mce->finished = 0;
+       wmb();
+       for (;;) {
+               entry = rcu_dereference(mcelog.next);
+               /* The rmb forces the compiler to reload next in each
+                   iteration */
+               rmb();
+               for (;;) {
+                       /* When the buffer fills up discard new entries. Assume
+                          that the earlier errors are the more interesting. */
+                       if (entry >= MCE_LOG_LEN) {
+                               set_bit(MCE_OVERFLOW, &mcelog.flags);
+                               return;
+                       }
+                       /* Old left over entry. Skip. */
+                       if (mcelog.entry[entry].finished) {
+                               entry++;
+                               continue;
+                       }
+                       break;
+               }
+               smp_rmb();
+               next = entry + 1;
+               if (cmpxchg(&mcelog.next, entry, next) == entry)
+                       break;
+       }
+       memcpy(mcelog.entry + entry, mce, sizeof(struct mce));
+       wmb();
+       mcelog.entry[entry].finished = 1;
+       wmb();
+}
+
+static void print_mce(struct mce *m)
+{
+       printk(KERN_EMERG "\n"
+              KERN_EMERG "HARDWARE ERROR\n"
+              KERN_EMERG
+              "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
+              m->cpu, m->mcgstatus, m->bank, m->status);
+       if (m->rip) {
+               printk(KERN_EMERG 
+                      "RIP%s %02x:<%016Lx> ",
+                      !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
+                      m->cs, m->rip);
+               if (m->cs == __KERNEL_CS)
+                       print_symbol("{%s}", m->rip);
+               printk("\n");
+       }
+       printk(KERN_EMERG "TSC %Lx ", m->tsc); 
+       if (m->addr)
+               printk("ADDR %Lx ", m->addr);
+       if (m->misc)
+               printk("MISC %Lx ", m->misc);   
+       printk("\n");
+       printk(KERN_EMERG "This is not a software problem!\n");
+        printk(KERN_EMERG
+    "Run through mcelog --ascii to decode and contact your hardware vendor\n");
+}
+
+static void mce_panic(char *msg, struct mce *backup, unsigned long start)
+{ 
+       int i;
+       oops_begin();
+       for (i = 0; i < MCE_LOG_LEN; i++) {
+               unsigned long tsc = mcelog.entry[i].tsc;
+               if (time_before(tsc, start))
+                       continue;
+               print_mce(&mcelog.entry[i]); 
+               if (backup && mcelog.entry[i].tsc == backup->tsc)
+                       backup = NULL;
+       }
+       if (backup)
+               print_mce(backup);
+       if (tolerant >= 3)
+               printk("Fake panic: %s\n", msg);
+       else
+               panic(msg);
+} 
+
+static int mce_available(struct cpuinfo_x86 *c)
+{
+       return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
+}
+
+static inline void mce_get_rip(struct mce *m, struct pt_regs *regs)
+{
+       if (regs && (m->mcgstatus & MCG_STATUS_RIPV)) {
+               m->rip = regs->rip;
+               m->cs = regs->cs;
+       } else {
+               m->rip = 0;
+               m->cs = 0;
+       }
+}
+
+#define x86_mcinfo_nentries(_mi)    \
+    (_mi)->mi_nentries
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_first(struct mc_info *mi);
+ */
+#define x86_mcinfo_first(_mi)       \
+    (struct mcinfo_common *)((_mi)->mi_data)
+/* Prototype:
+ *    struct mcinfo_common *x86_mcinfo_next(struct mcinfo_common *mic);
+ */
+#define x86_mcinfo_next(_mic)       \
+    (struct mcinfo_common *)((uint8_t *)(_mic) + (_mic)->size)
+
+#define x86_mcinfo_lookup(_ret, _mi, _type)    \
+    do {                                                        \
+        uint32_t found, i;                                      \
+        struct mcinfo_common *_mic;                             \
+                                                                \
+        found = 0;                                              \
+       (_ret) = NULL;                                          \
+       if (_mi == NULL) break;                                 \
+        _mic = x86_mcinfo_first(_mi);                           \
+        for (i = 0; i < x86_mcinfo_nentries(_mi); i++) {        \
+            if (_mic->type == (_type)) {                        \
+                found = 1;                                      \
+                break;                                          \
+            }                                                   \
+            _mic = x86_mcinfo_next(_mic);                       \
+        }                                                       \
+        (_ret) = found ? _mic : NULL;                           \
+    } while (0)
+
+/* 
+ * The actual machine check handler
+ */
+
+void do_machine_check(struct pt_regs * regs, long error_code)
+{
+       xen_mc_t mc;
+       struct mc_info *mi;
+       struct mcinfo_common *mic = NULL;
+       struct mcinfo_global *mc_global;
+       struct mcinfo_bank *mc_bank;
+       struct mce m, panicm;
+       int nowayout = (tolerant < 1); 
+       int kill_it = 0;
+       u64 mcestart = 0;
+       int ret;
+       int panicm_found = 0;
+
+       atomic_inc(&mce_entry);
+
+       /* XXX: The machine check log left over from the previous reset
+          is disregarded now */
+       if (error_code < 0)
+               goto out;
+
+       mc.cmd = XEN_MC_fetch;
+       mc.interface_version = XEN_MCA_INTERFACE_VERSION;
+       mc.u.mc_fetch.flags = error_code? XEN_MC_TRAP: XEN_MC_CORRECTABLE;
+       ret = HYPERVISOR_mca(&mc);
+       if (ret) {
+               printk("HYPERVISOR_mca failed: error %d\n", ret);
+               goto out;
+       }
+
+       mi = &mc.u.mc_fetch.mc_info;
+
+       /* first convert the global info */
+       x86_mcinfo_lookup(mic, mi, MC_TYPE_GLOBAL);
+       if (mic == NULL)
+               goto out;
+
+       mc_global = (struct mcinfo_global *)mic;
+
+       memset(&m, 0, sizeof(struct mce));
+       m.cpu = mc_global->mc_coreid;
+       m.mcgstatus = mc_global->mc_gstatus;
+       if (!(m.mcgstatus & MCG_STATUS_RIPV))
+               kill_it = 1;
+       
+       rdtscll(mcestart);
+       barrier();
+
+       /* then the bank information */
+       x86_mcinfo_lookup(mic, mi, MC_TYPE_BANK); /* finds the first entry */
+       do {
+               if (mic == NULL)
+                       goto out;
+               if (mic->type != MC_TYPE_BANK)
+                       continue;
+
+               mc_bank = (struct mcinfo_bank *)mic;
+               m.misc = 0; 
+               m.addr = 0;
+               m.bank = mc_bank->mc_bank;
+               m.tsc = 0;
+
+               m.status = mc_bank->mc_status;
+               if ((m.status & MCI_STATUS_VAL) == 0)
+                       continue;
+
+               if (m.status & MCI_STATUS_EN) {
+                       /* In theory _OVER could be a nowayout too, but
+                          assume any overflowed errors were no fatal. */
+                       nowayout |= !!(m.status & MCI_STATUS_PCC);
+                       kill_it |= !!(m.status & MCI_STATUS_UC);
+               }
+
+               if (m.status & MCI_STATUS_MISCV)
+                       m.misc = mc_bank->mc_misc;
+               if (m.status & MCI_STATUS_ADDRV)
+                       m.addr = mc_bank->mc_addr;
+
+               mce_get_rip(&m, regs);
+               if (error_code >= 0)
+                       rdtscll(m.tsc);
+               if (error_code != -2)
+                       mce_log(&m);
+
+               /* Did this bank cause the exception? */
+               /* Assume that the bank with uncorrectable errors did it,
+                  and that there is only a single one. */
+               if ((m.status & MCI_STATUS_UC) && (m.status & MCI_STATUS_EN)) {
+                       panicm = m;
+                       panicm_found = 1;
+               }
+
+               add_taint(TAINT_MACHINE_CHECK);
+
+               mic = x86_mcinfo_next(mic); /* next entry */
+               if ((mic == NULL) || (mic->size == 0))
+                       break;
+       } while (1);
+
+       /* Never do anything final in the polling timer */
+       if (!regs)
+               goto out;
+
+       /* If we didn't find an uncorrectable error, pick
+          the last one (shouldn't happen, just being safe). */
+       if (!panicm_found)
+               panicm = m;
+       if (nowayout)
+               mce_panic("Machine check", &panicm, mcestart);
+       if (kill_it) {
+               int user_space = 0;
+
+               if (m.mcgstatus & MCG_STATUS_RIPV)
+                       user_space = panicm.rip && (panicm.cs & 3);
+               
+               /* When the machine was in user space and the CPU didn't get
+                  confused it's normally not necessary to panic, unless you 
+                  are paranoid (tolerant == 0)
+
+                  RED-PEN could be more tolerant for MCEs in idle,
+                  but most likely they occur at boot anyways, where
+                  it is best to just halt the machine. */
+               if ((!user_space && (panic_on_oops || tolerant < 2)) ||
+                   (unsigned)current->pid <= 1)
+                       mce_panic("Uncorrected machine check", &panicm, 
mcestart);
+
+               /* do_exit takes an awful lot of locks and has as
+                  slight risk of deadlocking. If you don't want that
+                  don't set tolerant >= 2 */
+               if (tolerant < 3)
+                       do_exit(SIGBUS);
+       }
+
+ out:
+       atomic_dec(&mce_entry);
+}
+
+static irqreturn_t mcheck_event_handler(int irq, void *dev_id, struct pt_regs 
*regs)
+{
+       if (mce_available(&current_cpu_data))
+               do_machine_check(NULL, 0);
+       return IRQ_HANDLED;
+}
+
+/* 
+ * Initialize Machine Checks for a CPU.
+ */
+static void mce_init(void *dummy)
+{
+       u64 cap;
+       int i;
+
+       rdmsrl(MSR_IA32_MCG_CAP, cap);
+       banks = cap & 0xff;
+       if (banks > NR_BANKS) { 
+               printk(KERN_INFO "MCE: warning: using only %d banks\n", banks);
+               banks = NR_BANKS; 
+       }
+
+       /* Log the machine checks left over from the previous reset.
+          This also clears all registers */
+       do_machine_check(NULL, mce_bootlog ? -1 : -2);
+
+       set_in_cr4(X86_CR4_MCE);
+
+       if (cap & MCG_CTL_P)
+               wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+
+       for (i = 0; i < banks; i++) {
+               wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]);
+               wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0);
+       }       
+}
+
+/* Add per CPU specific workarounds here */
+static void __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
+{ 
+       /* This should be disabled by the BIOS, but isn't always */
+       if (c->x86_vendor == X86_VENDOR_AMD && c->x86 == 15) {
+               /* disable GART TBL walk error reporting, which trips off 
+                  incorrectly with the IOMMU & 3ware & Cerberus. */
+               clear_bit(10, &bank[4]);
+               /* Lots of broken BIOS around that don't clear them
+                  by default and leave crap in there. Don't log. */
+               mce_bootlog = 0;
+       }
+
+}                      
+
+static void __cpuinit mce_cpu_features(struct cpuinfo_x86 *c)
+{
+       switch (c->x86_vendor) {
+       case X86_VENDOR_INTEL:
+               mce_intel_feature_init(c);
+               break;
+       case X86_VENDOR_AMD:
+               mce_amd_feature_init(c);
+               break;
+       default:
+               break;
+       }
+}
+
+/* 
+ * Called for each booted CPU to set up machine checks.
+ * Must be called with preempt off. 
+ */
+void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
+{
+       static cpumask_t mce_cpus = CPU_MASK_NONE;
+
+       mce_cpu_quirks(c); 
+
+       if (mce_dont_init ||
+           cpu_test_and_set(smp_processor_id(), mce_cpus) ||
+           !mce_available(c))
+               return;
+
+       if (smp_processor_id() == 0) {
+               if (bind_virq_to_irqhandler(VIRQ_MCA, 0,
+                                     mcheck_event_handler, SA_INTERRUPT,
+                                     "mce0", NULL) < 0) {
+                       printk(KERN_ERR "Cannot bind mcheck_event_handler\n");
+                       return;
+               }
+       }
+
+       mce_init(NULL);
+       mce_cpu_features(c);
+}
+
+/*
+ * Character device to read and clear the MCE log.
+ */
+
+static void collect_tscs(void *data) 
+{ 
+       unsigned long *cpu_tsc = (unsigned long *)data;
+       rdtscll(cpu_tsc[smp_processor_id()]);
+} 
+
+static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize, 
loff_t *off)
+{
+       unsigned long *cpu_tsc;
+       static DECLARE_MUTEX(mce_read_sem);
+       unsigned next;
+       char __user *buf = ubuf;
+       int i, err;
+
+       cpu_tsc = kmalloc(NR_CPUS * sizeof(long), GFP_KERNEL);
+       if (!cpu_tsc)
+               return -ENOMEM;
+
+       down(&mce_read_sem); 
+       next = rcu_dereference(mcelog.next);
+
+       /* Only supports full reads right now */
+       if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) { 
+               up(&mce_read_sem);
+               kfree(cpu_tsc);
+               return -EINVAL;
+       }
+
+       err = 0;
+       for (i = 0; i < next; i++) {            
+               unsigned long start = jiffies;
+               while (!mcelog.entry[i].finished) {
+                       if (!time_before(jiffies, start + 2)) {
+                               memset(mcelog.entry + i,0, sizeof(struct mce));
+                               continue;
+                       }
+                       cpu_relax();
+               }
+               smp_rmb();
+               err |= copy_to_user(buf, mcelog.entry + i, sizeof(struct mce));
+               buf += sizeof(struct mce); 
+       } 
+
+       memset(mcelog.entry, 0, next * sizeof(struct mce));
+       mcelog.next = 0;
+
+       synchronize_sched();
+
+       /* Collect entries that were still getting written before the 
synchronize. */
+
+       on_each_cpu(collect_tscs, cpu_tsc, 1, 1);
+       for (i = next; i < MCE_LOG_LEN; i++) { 
+               if (mcelog.entry[i].finished && 
+                   mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {  
+                       err |= copy_to_user(buf, mcelog.entry+i, sizeof(struct 
mce));
+                       smp_rmb();
+                       buf += sizeof(struct mce);
+                       memset(&mcelog.entry[i], 0, sizeof(struct mce));
+               }
+       }       
+       up(&mce_read_sem);
+       kfree(cpu_tsc);
+       return err ? -EFAULT : buf - ubuf; 
+}
+
+static int mce_ioctl(struct inode *i, struct file *f,unsigned int cmd, 
unsigned long arg)
+{
+       int __user *p = (int __user *)arg;
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM; 
+       switch (cmd) {
+       case MCE_GET_RECORD_LEN: 
+               return put_user(sizeof(struct mce), p);
+       case MCE_GET_LOG_LEN:
+               return put_user(MCE_LOG_LEN, p);                
+       case MCE_GETCLEAR_FLAGS: {
+               unsigned flags;
+               do { 
+                       flags = mcelog.flags;
+               } while (cmpxchg(&mcelog.flags, flags, 0) != flags); 
+               return put_user(flags, p); 
+       }
+       default:
+               return -ENOTTY; 
+       } 
+}
+
+static struct file_operations mce_chrdev_ops = {
+       .read = mce_read,
+       .ioctl = mce_ioctl,
+};
+
+static struct miscdevice mce_log_device = {
+       MISC_MCELOG_MINOR,
+       "mcelog",
+       &mce_chrdev_ops,
+};
+
+/* 
+ * Old style boot options parsing. Only for compatibility. 
+ */
+
+static int __init mcheck_disable(char *str)
+{
+       mce_dont_init = 1;
+       return 1;
+}
+
+/* mce=off disables machine check. Note you can reenable it later
+   using sysfs.
+   mce=TOLERANCELEVEL (number, see above)
+   mce=bootlog Log MCEs from before booting. Disabled by default on AMD.
+   mce=nobootlog Don't log MCEs from before booting. */
+static int __init mcheck_enable(char *str)
+{
+       if (*str == '=')
+               str++;
+       if (!strcmp(str, "off"))
+               mce_dont_init = 1;
+       else if (!strcmp(str, "bootlog") || !strcmp(str,"nobootlog"))
+               mce_bootlog = str[0] == 'b';
+       else if (isdigit(str[0]))
+               get_option(&str, &tolerant);
+       else
+               printk("mce= argument %s ignored. Please use /sys", str); 
+       return 1;
+}
+
+__setup("nomce", mcheck_disable);
+__setup("mce", mcheck_enable);
+
+/* 
+ * Sysfs support
+ */ 
+
+/* On resume clear all MCE state. Don't want to see leftovers from the BIOS.
+   Only one CPU is active at this time, the others get readded later using
+   CPU hotplug. */
+static int mce_resume(struct sys_device *dev)
+{
+       mce_init(NULL);
+       return 0;
+}
+
+/* Reinit MCEs after user configuration changes */
+static void mce_restart(void) 
+{ 
+       on_each_cpu(mce_init, NULL, 1, 1);       
+}
+
+static struct sysdev_class mce_sysclass = {
+       .resume = mce_resume,
+       set_kset_name("machinecheck"),
+};
+
+DEFINE_PER_CPU(struct sys_device, device_mce);
+
+/* Why are there no generic functions for this? */
+#define ACCESSOR(name, var, start) \
+       static ssize_t show_ ## name(struct sys_device *s, char *buf) {         
           \
+               return sprintf(buf, "%lx\n", (unsigned long)var);               
   \
+       }                                                                       
   \
+       static ssize_t set_ ## name(struct sys_device *s,const char *buf,size_t 
siz) { \
+               char *end;                                                      
   \
+               unsigned long new = simple_strtoul(buf, &end, 0);               
   \
+               if (end == buf) return -EINVAL;                                 
   \
+               var = new;                                                      
   \
+               start;                                                          
   \
+               return end-buf;                                                 
   \
+       }                                                                       
   \
+       static SYSDEV_ATTR(name, 0644, show_ ## name, set_ ## name);
+
+ACCESSOR(bank0ctl,bank[0],mce_restart())
+ACCESSOR(bank1ctl,bank[1],mce_restart())
+ACCESSOR(bank2ctl,bank[2],mce_restart())
+ACCESSOR(bank3ctl,bank[3],mce_restart())
+ACCESSOR(bank4ctl,bank[4],mce_restart())
+ACCESSOR(bank5ctl,bank[5],mce_restart())
+static struct sysdev_attribute * bank_attributes[NR_BANKS] = {
+       &attr_bank0ctl, &attr_bank1ctl, &attr_bank2ctl,
+       &attr_bank3ctl, &attr_bank4ctl, &attr_bank5ctl};
+ACCESSOR(tolerant,tolerant,)
+
+/* Per cpu sysdev init.  All of the cpus still share the same ctl bank */
+static __cpuinit int mce_create_device(unsigned int cpu)
+{
+       int err;
+       int i;
+       if (!mce_available(&cpu_data[cpu]))
+               return -EIO;
+
+       per_cpu(device_mce,cpu).id = cpu;
+       per_cpu(device_mce,cpu).cls = &mce_sysclass;
+
+       err = sysdev_register(&per_cpu(device_mce,cpu));
+
+       if (!err) {
+               for (i = 0; i < banks; i++)
+                       sysdev_create_file(&per_cpu(device_mce,cpu),
+                               bank_attributes[i]);
+               sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant);
+       }
+       return err;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static void mce_remove_device(unsigned int cpu)
+{
+       int i;
+
+       for (i = 0; i < banks; i++)
+               sysdev_remove_file(&per_cpu(device_mce,cpu),
+                       bank_attributes[i]);
+       sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant);
+       sysdev_unregister(&per_cpu(device_mce,cpu));
+}
+
+/* Get notified when a cpu comes on/off. Be hotplug friendly. */
+static int
+mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+       unsigned int cpu = (unsigned long)hcpu;
+
+       switch (action) {
+       case CPU_ONLINE:
+               mce_create_device(cpu);
+               break;
+       case CPU_DEAD:
+               mce_remove_device(cpu);
+               break;
+       }
+       return NOTIFY_OK;
+}
+
+static struct notifier_block mce_cpu_notifier = {
+       .notifier_call = mce_cpu_callback,
+};
+#endif
+
+static __init int mce_init_device(void)
+{
+       int err;
+       int i = 0;
+
+       if (!mce_available(&boot_cpu_data))
+               return -EIO;
+       err = sysdev_class_register(&mce_sysclass);
+
+       for_each_online_cpu(i) {
+               mce_create_device(i);
+       }
+
+       register_hotcpu_notifier(&mce_cpu_notifier);
+       misc_register(&mce_log_device);
+       return err;
+}
+
+device_initcall(mce_init_device);
diff -r 916aae9cc11a buildconfigs/linux-defconfig_xen_x86_64
--- a/buildconfigs/linux-defconfig_xen_x86_64   Mon Sep 22 16:08:10 2008 +0100
+++ b/buildconfigs/linux-defconfig_xen_x86_64   Fri Sep 26 14:59:11 2008 +0900
@@ -142,6 +142,9 @@
 CONFIG_HOTPLUG_CPU=y
 CONFIG_ARCH_ENABLE_MEMORY_HOTPLUG=y
 CONFIG_SWIOTLB=y
+CONFIG_X86_MCE=y
+CONFIG_X86_MCE_INTEL=y
+CONFIG_X86_MCE_AMD=y
 CONFIG_KEXEC=y
 # CONFIG_CRASH_DUMP is not set
 CONFIG_PHYSICAL_START=0x200000
diff -r 916aae9cc11a include/asm-x86_64/mach-xen/asm/hw_irq.h
--- a/include/asm-x86_64/mach-xen/asm/hw_irq.h  Mon Sep 22 16:08:10 2008 +0100
+++ b/include/asm-x86_64/mach-xen/asm/hw_irq.h  Fri Sep 26 14:59:11 2008 +0900
@@ -51,8 +51,10 @@
 #define CALL_FUNCTION_VECTOR   0xfc
 /* fb free - please don't readd KDB here because it's useless
    (hint - think what a NMI bit does to a vector) */
+#endif
 #define THERMAL_APIC_VECTOR    0xfa
 #define THRESHOLD_APIC_VECTOR   0xf9
+#ifndef CONFIG_XEN
 /* f8 free */
 #define INVALIDATE_TLB_VECTOR_END      0xf7
 #define INVALIDATE_TLB_VECTOR_START    0xf0    /* f0-f7 used for TLB flush */
diff -r 916aae9cc11a include/asm-x86_64/mach-xen/asm/hypercall.h
--- a/include/asm-x86_64/mach-xen/asm/hypercall.h       Mon Sep 22 16:08:10 
2008 +0100
+++ b/include/asm-x86_64/mach-xen/asm/hypercall.h       Fri Sep 26 14:59:11 
2008 +0900
@@ -412,4 +412,10 @@
        return _hypercall2(int, kexec_op, op, args);
 }
 
+static inline int __must_check
+HYPERVISOR_mca(
+       const xen_mc_t *mc)
+{
+       return _hypercall1(int, mca, mc);
+}
 #endif /* __HYPERCALL_H__ */
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.