[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] netback Oops then xenwatch stuck in D state


  • To: xen-devel@xxxxxxxxxxxxx
  • From: "Christopher S. Aker" <caker@xxxxxxxxxxxx>
  • Date: Sun, 10 Feb 2013 17:03:16 -0500
  • Delivery-date: Sun, 10 Feb 2013 22:04:03 +0000
  • List-id: Xen developer discussion <xen-devel.lists.xen.org>

And another this afternoon on a different machine:

BUG: unable to handle kernel NULL pointer dereference at 00000000000008b8
IP: [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80
PGD 0
Oops: 0002 [#1] SMP
Modules linked in: ebt_comment ebt_arp ebt_set ebt_limit ebt_ip6 ebt_ip ip_set_hash_net ip_set ebtable_nat xen_gntdev bonding ebtable_filter e1000e
CPU 5
Pid: 1550, comm: netback/5 Not tainted 3.7.6-1-x86_64 #1 Supermicro X8DT6/X8DT6 RIP: e030:[<ffffffff81011dda>] [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80
RSP: e02b:ffff8800836e7b58  EFLAGS: 00010006
RAX: 0000000000000400 RBX: 00000000000008b8 RCX: 000000000045de5d
RDX: 0000000000000001 RSI: 0000000000000211 RDI: 00000000000008b8
RBP: ffff8800836e7b78 R08: 0000000000000068 R09: 0000000000000000
R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000001
R13: 0000000000000200 R14: 0000000000000400 R15: 000000000045de5d
FS:  00007f474a465700(0000) GS:ffff880100740000(0000) knlGS:0000000000000000
CS:  e033 DS: 0000 ES: 0000 CR0: 000000008005003b
CR2: 00000000000008b8 CR3: 0000000001c0b000 CR4: 0000000000002660
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process netback/5 (pid: 1550, threadinfo ffff8800836e6000, task ffff880084510000)
Stack:
 0000000000000211 00000000000008b8 ffff8800771e5700 ffff8800771e57d8
 ffff8800836e7b98 ffffffff817605da 0000000000000000 00000000000008b8
 ffff8800836e7bd8 ffffffff8154446f ffff8800771e5000 0000000000000000
Call Trace:
 [<ffffffff817605da>] _raw_spin_lock_irqsave+0x2a/0x40
 [<ffffffff8154446f>] xen_netbk_schedule_xenvif+0x8f/0x100
 [<ffffffff81544505>] xen_netbk_check_rx_xenvif+0x25/0x60
 [<ffffffff815445eb>] netbk_tx_err+0x5b/0x70
 [<ffffffff8154518c>] xen_netbk_tx_build_gops+0xb8c/0xbc0
 [<ffffffff81012880>] ? __switch_to+0x160/0x4f0
 [<ffffffff810891b8>] ? idle_balance+0xf8/0x150
 [<ffffffff81080150>] ? finish_task_switch+0x60/0xd0
 [<ffffffff8175f7b4>] ? __schedule+0x394/0x750
 [<ffffffff815452af>] xen_netbk_kthread+0xef/0x9d0
 [<ffffffff81080150>] ? finish_task_switch+0x60/0xd0
 [<ffffffff810720c0>] ? wake_up_bit+0x40/0x40
 [<ffffffff815451c0>] ? xen_netbk_tx_build_gops+0xbc0/0xbc0
 [<ffffffff81071a06>] kthread+0xc6/0xd0
 [<ffffffff810037b9>] ? xen_end_context_switch+0x19/0x20
 [<ffffffff81071940>] ? kthread_freezable_should_stop+0x70/0x70
 [<ffffffff8176847c>] ret_from_fork+0x7c/0xb0
 [<ffffffff81071940>] ? kthread_freezable_should_stop+0x70/0x70
Code: 24 08 4c 89 6c 24 10 4c 89 74 24 18 49 89 f5 48 89 fb 41 81 e5 00 02 00 00 41 bc 01 00 00 00 41 be 00 04 00 00 44 89 f0 44 89 e2 <86> 13 84 d2 74 0b f3 90 80 3b 00 74 f3 ff c8 75 f5 84 d2 75 15
RIP  [<ffffffff81011dda>] xen_spin_lock_flags+0x3a/0x80
 RSP <ffff8800836e7b58>
CR2: 00000000000008b8
---[ end trace 62de4ce454b1699e ]---


Code: 24 08 4c 89 6c 24 10 4c 89 74 24 18 49 89 f5 48 89 fb 41 81 e5 00 02 00 00 41 bc 01 00 00 00 41 be 00 04 00 00 44 89 f0 44 89 e2 <86> 13 84 d2 74 0b f3 90 80 3b 00 74 f3 ff c8 75 f5 84 d2 75 15
All code
========
   0:   24 08                   and    $0x8,%al
   2:   4c 89 6c 24 10          mov    %r13,0x10(%rsp)
   7:   4c 89 74 24 18          mov    %r14,0x18(%rsp)
   c:   49 89 f5                mov    %rsi,%r13
   f:   48 89 fb                mov    %rdi,%rbx
  12:   41 81 e5 00 02 00 00    and    $0x200,%r13d
  19:   41 bc 01 00 00 00       mov    $0x1,%r12d
  1f:   41 be 00 04 00 00       mov    $0x400,%r14d
  25:   44 89 f0                mov    %r14d,%eax
  28:   44 89 e2                mov    %r12d,%edx
2b:* 86 13 xchg %dl,(%rbx) <-- trapping instruction
  2d:   84 d2                   test   %dl,%dl
  2f:   74 0b                   je     0x3c
  31:   f3 90                   pause
  33:   80 3b 00                cmpb   $0x0,(%rbx)
  36:   74 f3                   je     0x2b
  38:   ff c8                   dec    %eax
  3a:   75 f5                   jne    0x31
  3c:   84 d2                   test   %dl,%dl
  3e:   75 15                   jne    0x55

Code starting with the faulting instruction
===========================================
   0:   86 13                   xchg   %dl,(%rbx)
   2:   84 d2                   test   %dl,%dl
   4:   74 0b                   je     0x11
   6:   f3 90                   pause
   8:   80 3b 00                cmpb   $0x0,(%rbx)
   b:   74 f3                   je     0x0
   d:   ff c8                   dec    %eax
   f:   75 f5                   jne    0x6
  11:   84 d2                   test   %dl,%dl
  13:   75 15                   jne    0x2a



static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable)
{
        struct xen_spinlock *xl = (struct xen_spinlock *)lock;
        unsigned timeout;
        u8 oldval;
        u64 start_spin;

        ADD_STATS(taken, 1);

        start_spin = spin_time_start();

        do {
                u64 start_spin_fast = spin_time_start();

                timeout = TIMEOUT;

                asm("1: xchgb %1,%0\n"
                    "   testb %1,%1\n"
                    "   jz 3f\n"
                    "2: rep;nop\n"
                    "   cmpb $0,%0\n"
                    "   je 1b\n"
                    "   dec %2\n"
                    "   jnz 2b\n"
                    "3:\n"
                    : "+m" (xl->lock), "=q" (oldval), "+r" (timeout)
                    : "1" (1)
                    : "memory");

                spin_time_accum_spinning(start_spin_fast);

        } while (unlikely(oldval != 0 &&
(TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable))));

        spin_time_accum_total(start_spin);
}

static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags)
{
        __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags));
}



(gdb) disassemble xen_spin_lock_flags
Dump of assembler code for function xen_spin_lock_flags:
0xffffffff81011da0 <xen_spin_lock_flags+0>:     push   %rbp
0xffffffff81011da1 <xen_spin_lock_flags+1>:     mov    %rsp,%rbp
0xffffffff81011da4 <xen_spin_lock_flags+4>:     sub    $0x20,%rsp
0xffffffff81011da8 <xen_spin_lock_flags+8>:     mov    %rbx,(%rsp)
0xffffffff81011dac <xen_spin_lock_flags+12>:    mov    %r12,0x8(%rsp)
0xffffffff81011db1 <xen_spin_lock_flags+17>:    mov    %r13,0x10(%rsp)
0xffffffff81011db6 <xen_spin_lock_flags+22>: mov %r14,ffff81011dda0x18(%rsp)
0xffffffff81011dbb <xen_spin_lock_flags+27>:    mov    %rsi,%r13
0xffffffff81011dbe <xen_spin_lock_flags+30>:    mov    %rdi,%rbx
0xffffffff81011dc1 <xen_spin_lock_flags+33>:    and    $0x200,%r13d
0xffffffff81011dc8 <xen_spin_lock_flags+40>:    mov    $0x1,%r12d
0xffffffff81011dce <xen_spin_lock_flags+46>:    mov    $0x400,%r14d
0xffffffff81011dd4 <xen_spin_lock_flags+52>:    mov    %r14d,%eax
0xffffffff81011dd7 <xen_spin_lock_flags+55>:    mov    %r12d,%edx
0xffffffff81011dda <xen_spin_lock_flags+58>:    xchg   %dl,(%rbx) <--
0xffffffff81011ddc <xen_spin_lock_flags+60>:    test   %dl,%dl
0xffffffff81011dde <xen_spin_lock_flags+62>: je 0xffffffff81011deb <xen_spin_lock_flags+75>
0xffffffff81011de0 <xen_spin_lock_flags+64>:    pause
0xffffffff81011de2 <xen_spin_lock_flags+66>:    cmpb   $0x0,(%rbx)
0xffffffff81011de5 <xen_spin_lock_flags+69>: je 0xffffffff81011dda <xen_spin_lock_flags+58>
0xffffffff81011de7 <xen_spin_lock_flags+71>:    dec    %eax
0xffffffff81011de9 <xen_spin_lock_flags+73>: jne 0xffffffff81011de0 <xen_spin_lock_flags+64>
0xffffffff81011deb <xen_spin_lock_flags+75>:    test   %dl,%dl
0xffffffff81011ded <xen_spin_lock_flags+77>: jne 0xffffffff81011e04 <xen_spin_lock_flags+100>
0xffffffff81011def <xen_spin_lock_flags+79>:    mov    (%rsp),%rbx
0xffffffff81011df3 <xen_spin_lock_flags+83>:    mov    0x8(%rsp),%r12
0xffffffff81011df8 <xen_spin_lock_flags+88>:    mov    0x10(%rsp),%r13
0xffffffff81011dfd <xen_spin_lock_flags+93>:    mov    0x18(%rsp),%r14
0xffffffff81011e02 <xen_spin_lock_flags+98>:    leaveq
0xffffffff81011e03 <xen_spin_lock_flags+99>:    retq
0xffffffff81011e04 <xen_spin_lock_flags+100>:   xor    %esi,%esi
0xffffffff81011e06 <xen_spin_lock_flags+102>:   mov    %rbx,%rdi
0xffffffff81011e09 <xen_spin_lock_flags+105>:   test   %r13,%r13
0xffffffff81011e0c <xen_spin_lock_flags+108>:   setne  %sil
0xffffffff81011e10 <xen_spin_lock_flags+112>: callq 0xffffffff81011ca0 <xen_spin_lock_slow>
0xffffffff81011e15 <xen_spin_lock_flags+117>:   test   %eax,%eax
0xffffffff81011e17 <xen_spin_lock_flags+119>: jne 0xffffffff81011def <xen_spin_lock_flags+79> 0xffffffff81011e19 <xen_spin_lock_flags+121>: jmp 0xffffffff81011dd4 <xen_spin_lock_flags+52>
End of assembler dump.


We're not so good at this, but it looks like xl->lock deref is what we hit? The lock was gone?

-Chris

_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxx
http://lists.xen.org/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.