[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] sudden hang on boot on AMD Notebook



On Wed, Apr 08, 2015 at 09:09:23AM +0200, Olaf Hering wrote:
> On Tue, Apr 07, Konrad Rzeszutek Wilk wrote:
> 
> > You might also want to use 'sync_console'.
> 
> This does not give any extra info in the case of the reported hang.

No, but are you able to at least dump the status of the registers?

If not you could write an small timer code that would try to print this
out (or retrofit the inline code) - and allow you see something
one the console - see 'debug_timer_fn' function.
> 
> Olaf

commit a4171fa12583eabd126bc5b4c305f49b2fb2b515
Author: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
Date:   Fri Nov 14 15:00:39 2014 -0500

    debug:
    
    Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>

diff --git a/xen/drivers/passthrough/io.c b/xen/drivers/passthrough/io.c
index 8e9141e..23e5ed1 100644
--- a/xen/drivers/passthrough/io.c
+++ b/xen/drivers/passthrough/io.c
@@ -54,6 +54,117 @@ enum {
     STATE_ZOMBIE
 };
 
+struct _debug_f {
+    unsigned int domid;
+    unsigned long count;
+    s_time_t last;
+    struct list_head list;
+    unsigned int state;
+    struct hvm_pirq_dpci *dpci;
+};
+
+struct __debug {
+    struct _debug_f ok;
+    struct _debug_f poison;
+    struct _debug_f raise;
+    struct _debug_f reset;
+    struct _debug_f zombie_softirq;
+    struct _debug_f zombie_raise;
+};
+
+static DEFINE_PER_CPU(struct __debug, _d);
+
+void _record(struct _debug_f *d, struct hvm_pirq_dpci *pirq_dpci)
+{
+    if (pirq_dpci->dom)
+        d->domid = pirq_dpci->dom->domain_id;
+    else
+        d->domid = 0xdead;
+    d->count ++;
+    d->last = NOW();
+    d->list.next = pirq_dpci->softirq_list.next;
+    d->list.prev = pirq_dpci->softirq_list.prev;
+    d->state = pirq_dpci->state;
+    d->dpci = pirq_dpci;
+}
+
+enum {
+    Z_SOFTIRQ,
+    Z_RAISE,
+    ERR_POISON,
+    OK_SOFTIRQ,
+    OK_RAISE,
+    OK_RESET,
+};
+
+static void dump_record(struct _debug_f *d, unsigned int type)
+{
+    static const char *const names[] = {
+        [Z_SOFTIRQ]  = "Z-softirq ",
+        [Z_RAISE]    = "Z-raise   ",
+        [ERR_POISON] = "ERR-poison",
+        [OK_SOFTIRQ] = "OK-softirq",
+        [OK_RAISE]   = "OK-raise  ",
+        [OK_RESET]   = "OK-reset  ",
+    };
+#define LONG(x) [_HVM_IRQ_DPCI_##x] = #x
+    static const char *const names_flag[] = {
+        LONG(MACH_PCI_SHIFT),
+        LONG(MAPPED_SHIFT),
+        LONG(EOI_LATCH_SHIFT),
+        LONG(GUEST_PCI_SHIFT),
+        LONG(GUEST_MSI_SHIFT),
+    };
+#undef LONG
+    unsigned int i;
+    s_time_t now;
+
+    if ( d->domid == 0 )
+        return;
+
+    if ( type >= ARRAY_SIZE(names) )
+        BUG();
+
+    now = NOW();
+    printk("d%d %s %lumsec ago, state:%x, %ld count, [prev:%p, next:%p] ",
+           d->domid, names[type],
+           (unsigned long)((now - d->last) / MILLISECS(1)),
+            d->state, d->count, d->list.prev, d->list.next);
+
+    if ( d->dpci )
+    {
+        struct hvm_pirq_dpci *pirq_dpci = d->dpci;
+
+        for ( i = 0; i <= _HVM_IRQ_DPCI_GUEST_MSI_SHIFT; i++ )
+            if ( pirq_dpci->flags & 1 << _HVM_IRQ_DPCI_TRANSLATE_SHIFT )
+                printk("%s ", names_flag[i]);
+
+        printk(" PIRQ:%d", pirq_dpci->pirq);
+        if (pirq_dpci->line)
+            printk(" LINE: %d", pirq_dpci->line);
+    }
+    printk("\n");
+    memset(d, 0, sizeof(struct _debug_f));
+}
+
+static void dump_debug(unsigned char key)
+{
+    unsigned int cpu;
+
+    for_each_online_cpu ( cpu )
+    {
+        struct __debug *d;
+        d = &per_cpu(_d, cpu);
+
+        printk("CPU%02d: \n" ,cpu);
+        dump_record(&d->ok, OK_SOFTIRQ);
+        dump_record(&d->raise, OK_RAISE);
+        dump_record(&d->reset, OK_RESET);
+        dump_record(&d->poison, ERR_POISON);
+        dump_record(&d->zombie_softirq, Z_SOFTIRQ);
+        dump_record(&d->zombie_raise, Z_RAISE);
+    }
+}
 /*
  * This can be called multiple times, but the softirq is only raised once.
  * That is until the STATE_SCHED and STATE_ZOMBIE state has been cleared. The
@@ -65,13 +176,18 @@ enum {
 static void raise_softirq_for(struct hvm_pirq_dpci *pirq_dpci)
 {
     unsigned long flags;
+    struct __debug *d = &__get_cpu_var(_d);
 
     if ( test_bit(STATE_ZOMBIE, &pirq_dpci->state) )
+    {
+        _record(&d->zombie_raise, pirq_dpci);
         return;
-
+    }
     if ( test_and_set_bit(STATE_SCHED, &pirq_dpci->state) )
         return;
 
+    _record(&d->raise, pirq_dpci);
+
     get_knownalive_domain(pirq_dpci->dom);
 
     local_irq_save(flags);
@@ -111,9 +227,12 @@ bool_t pt_pirq_softirq_active(struct hvm_pirq_dpci 
*pirq_dpci)
 static void pt_pirq_softirq_reset(struct hvm_pirq_dpci *pirq_dpci)
 {
     struct domain *d = pirq_dpci->dom;
+    struct __debug *debug = &__get_cpu_var(_d);
 
     ASSERT(spin_is_locked(&d->event_lock));
 
+    _record(&debug->reset, pirq_dpci);
+
     switch ( cmpxchg(&pirq_dpci->state, 1 << STATE_SCHED, 1 << STATE_ZOMBIE ) )
     {
     case (1 << STATE_SCHED):
@@ -277,6 +396,7 @@ int pt_irq_create_bind(
              * As such on every 'pt_irq_create_bind' call we MUST set it.
              */
             pirq_dpci->dom = d;
+            pirq_dpci->pirq = pirq;
             /* bind after hvm_irq_dpci is setup to avoid race with irq 
handler*/
             rc = pirq_guest_bind(d->vcpu[0], info, 0);
             if ( rc == 0 && pt_irq_bind->u.msi.gtable )
@@ -291,6 +411,7 @@ int pt_irq_create_bind(
                      * to be scheduled but we must deal with the one that may 
be
                      * in the queue.
                      */
+                    pirq_dpci->line = __LINE__;
                     pt_pirq_softirq_reset(pirq_dpci);
                 }
             }
@@ -300,6 +421,7 @@ int pt_irq_create_bind(
                 pirq_dpci->gmsi.gvec = 0;
                 pirq_dpci->dom = NULL;
                 pirq_dpci->flags = 0;
+                pirq_dpci->pirq = -pirq;
                 pirq_cleanup_check(info, d);
                 spin_unlock(&d->event_lock);
                 return rc;
@@ -544,6 +666,7 @@ int pt_irq_destroy_bind(
          * See comment in pt_irq_create_bind's PT_IRQ_TYPE_MSI before the
          * call to pt_pirq_softirq_reset.
          */
+        pirq_dpci->line = __LINE__;
         pt_pirq_softirq_reset(pirq_dpci);
 
         pirq_cleanup_check(pirq, d);
@@ -778,6 +901,8 @@ unlock:
     spin_unlock(&d->event_lock);
 }
 
+#include <xen/console.h>
+
 /*
  * Note: 'pt_pirq_softirq_reset' can clear the STATE_SCHED before we get to
  * doing it. If that is the case we let 'pt_pirq_softirq_reset' do 
ref-counting.
@@ -787,6 +912,9 @@ static void dpci_softirq(void)
     unsigned int cpu = smp_processor_id();
     unsigned int reset = 0;
     LIST_HEAD(our_list);
+    struct __debug *debug;
+
+    debug = &per_cpu(_d, cpu);
 
     local_irq_disable();
     list_splice_init(&per_cpu(dpci_list, cpu), &our_list);
@@ -796,9 +924,22 @@ static void dpci_softirq(void)
     {
         struct hvm_pirq_dpci *pirq_dpci;
         struct domain *d;
+        struct list_head *entry;
 
         pirq_dpci = list_entry(our_list.next, struct hvm_pirq_dpci, 
softirq_list);
-        list_del(&pirq_dpci->softirq_list);
+        entry = &pirq_dpci->softirq_list;
+        if ( entry->next == LIST_POISON1 || entry->next == LIST_POISON2 ||
+             entry->prev == LIST_POISON2 || entry->prev == LIST_POISON2 )
+        {
+            _record(&debug->poison, pirq_dpci);
+            console_start_sync();
+            dump_debug((char)0);
+            console_end_sync();
+            domain_crash(pirq_dpci->dom);
+            break;
+        }
+        _record(&debug->ok, pirq_dpci);
+        list_del(entry);
 
         d = pirq_dpci->dom;
         smp_mb(); /* 'd' MUST be saved before we set/clear the bits. */
@@ -813,8 +954,10 @@ static void dpci_softirq(void)
             put_domain(d);
         }
         else
+        {
+            _record(&debug->zombie_softirq, pirq_dpci);
             reset = 1;
-
+        }
         clear_bit(STATE_RUN, &pirq_dpci->state);
         if ( reset )
         {
@@ -833,6 +976,7 @@ static int cpu_callback(
     {
     case CPU_UP_PREPARE:
         INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
+        memset(&per_cpu(_d, cpu), 0, sizeof(struct __debug));
         break;
     case CPU_UP_CANCELED:
     case CPU_DEAD:
@@ -854,15 +998,43 @@ static struct notifier_block cpu_nfb = {
     .notifier_call = cpu_callback,
 };
 
+#include <xen/keyhandler.h>
+
+static struct keyhandler dump_debug_keyhandler = {
+    .diagnostic = 1,
+    .u.fn = dump_debug,
+    .desc = "dpci debug stats"
+};
+static struct timer debug_timer;
+static s_time_t last_time = 0;
+static unsigned int debug_cnt = 0;
+
+static void debug_timer_fn(void *d)
+{
+    if ( ( debug_cnt ++ % 10 ) == 0 )
+        printk("--MARK--\n");
+
+    last_time = NOW();
+    set_timer(&debug_timer, last_time + SECONDS(1));
+}
+
 static int __init setup_dpci_softirq(void)
 {
     unsigned int cpu;
 
     for_each_online_cpu(cpu)
+    {
         INIT_LIST_HEAD(&per_cpu(dpci_list, cpu));
-
+        memset(&per_cpu(_d, cpu), 0, sizeof(struct __debug));
+    }
     open_softirq(HVM_DPCI_SOFTIRQ, dpci_softirq);
     register_cpu_notifier(&cpu_nfb);
+
+    init_timer(&debug_timer, debug_timer_fn, NULL, smp_processor_id());
+    last_time = NOW();
+    set_timer(&debug_timer, NOW() + SECONDS(1));
+    register_keyhandler('k', &dump_debug_keyhandler);
+
     return 0;
 }
 __initcall(setup_dpci_softirq);
diff --git a/xen/include/xen/hvm/irq.h b/xen/include/xen/hvm/irq.h
index 9709397..1fb1292 100644
--- a/xen/include/xen/hvm/irq.h
+++ b/xen/include/xen/hvm/irq.h
@@ -100,6 +100,8 @@ struct hvm_pirq_dpci {
     struct hvm_gmsi_info gmsi;
     struct timer timer;
     struct list_head softirq_list;
+    unsigned int pirq;
+    unsigned int line;
 };
 
 void pt_pirq_init(struct domain *, struct hvm_pirq_dpci *);

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.