[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Xen-devel] RE: bnx2 FTQ issues on 2.6.32 + xen 4.0.1;



On Mon, Jul 04, 2011 at 03:09:06PM +0800, MaoXiaoyun wrote:
> 
> Hi:
>  
>        I've spent more time on this.
>        It's easy to reproduce. I have two physical host, on top of which,  
> totally start 15VMS, doing migrating over
> and over again. Since xen live migration invovle large mount of memory copy, 
> so I can see that the network is
> saturated. About 16 hours later, the FTQ dump will show up.
>  
>       I've debugged more. 
>       Below debug code is added when FTQ happens.
>       Does anyone know what is going on? 

No idea. Did Michael Chan send you a private email with some ideas?


>       Thanks.
> 
> --------------
>    static void
> bnx2_tx_timeout(struct net_device *dev)
> {
>         struct bnx2 *bp = netdev_priv(dev);
>         struct bnx2_napi *bnapi = &bp->bnx2_napi[0];
>         struct bnx2_tx_ring_info *txr = &bnapi->tx_ring;
>         struct bnx2_rx_ring_info *rxr = &bnapi->rx_ring;
>         int i ;
>         
>         bnx2_dump_ftq(bp);
>         bnx2_dump_state(bp);
>         
>         if (stop_on_tx_timeout) {
>                 printk(KERN_WARNING PFX
>                        "%s: prevent chip reset during tx timeout\n",
>                         bp->dev->name);
>                 smp_rmb();
>                 
>                 printk("last status idx %d \n", bnapi->last_status_idx);
>                 printk("hw_tx_cons %d, txr->hw_tx_conds %d txr->tx_prod %d 
> txr->tx_cons %d\n",
>                          bnx2_get_hw_tx_cons(bnapi), txr->hw_tx_cons, 
> txr->tx_prod, txr->tx_cons);
>                 printk("hw_rx_cons %d, txr->hw_rx_conds %d\n", 
> bnx2_get_hw_rx_cons(bnapi), rxr->rx_cons);
>                 printk("sblk->status_attn_bits 
> %d\n",bnapi->status_blk.msi->status_attn_bits);
>                 printk("sblk->status_attn_bits_ack 
> %d\n",bnapi->status_blk.msi->status_attn_bits_ack);
>                 printk("bnx2_tx_avail %d \n",(bnx2_tx_avail(bp, txr)));
>                 
>                 printk("sblk->status_tx_quick_consumer_index0 
> %d\n",bnapi->status_blk.msi->status_tx_quick_consumer_index0);
>                 printk("sblk->status_tx_quick_consumer_index1 
> %d\n",bnapi->status_blk.msi->status_tx_quick_consumer_index1);
>                 printk("sblk->status_tx_quick_consumer_index2 
> %d\n",bnapi->status_blk.msi->status_tx_quick_consumer_index2);
>                 printk("sblk->status_tx_quick_consumer_index3 
> %d\n",bnapi->status_blk.msi->status_tx_quick_consumer_index3);
>                 printk("sblk->status_rx_quick_consumer_index0 
> %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index0);
>                 printk("sblk->status_rx_quick_consumer_index1 
> %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index1);
>                 printk("sblk->status_rx_quick_consumer_index2 
> %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index2);
>                 printk("sblk->status_rx_quick_consumer_index3 
> %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index3);
>                 printk("sblk->status_rx_quick_consumer_index4 
> %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index4);
>                 printk("sblk->status_rx_quick_consumer_index5 
> %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index5);
>                 printk("sblk->status_rx_quick_consumer_index6 
> %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index6);
>                 printk("sblk->status_rx_quick_consumer_index7 
> %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index7);
>                 printk("sblk->status_rx_quick_consumer_index8 
> %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index8);
>                 printk("sblk->status_rx_quick_consumer_index9 
> %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index9);
>                 printk("sblk->status_rx_quick_consumer_index10 
> %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index10);
>                 printk("sblk->status_rx_quick_consumer_index11 
> %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index11);
>                 printk("sblk->status_rx_quick_consumer_index12 
> %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index12);
>                 printk("sblk->status_rx_quick_consumer_index13 
> %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index13);
>                 printk("sblk->status_rx_quick_consumer_index14 
> %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index14);
>                 printk("sblk->status_rx_quick_consumer_index15 
> %d\n",bnapi->status_blk.msi->status_rx_quick_consumer_index15);
>                 printk("sblk->status_completion_producer_index 
> %d\n",bnapi->status_blk.msi->status_completion_producer_index);
>                 printk("sblk->status_cmd_consumer_index 
> %d\n",bnapi->status_blk.msi->status_cmd_consumer_index);
>                 printk("sblk->status_idx 
> %d\n",bnapi->status_blk.msi->status_idx);
>                 printk("sblk->status_unused 
> %d\n",bnapi->status_blk.msi->status_unused);
>                 printk("sblk->status_blk_num 
> %d\n",bnapi->status_blk.msi->status_blk_num);
> 
>                 is_timedout = 1;
>                 for (i = 0; i < bp->irq_nvecs; i++) {
>                         bnapi = &bp->bnx2_napi[i];
>                         bnx2_tx_int(bp, bnapi, 0);
>                 }
>                 return;
>         }
> -----------------
>  
> last status idx 2426 
> hw_tx_cons 32474, txr->hw_tx_conds 32474 txr->tx_prod 32641 txr->tx_cons 32474
> hw_rx_cons 19665, txr->hw_rx_conds 19665
> sblk->status_attn_bits 1
> sblk->status_attn_bits_ack 1
> bnx2_tx_avail 88 
> sblk->status_tx_quick_consumer_index0 32474
> sblk->status_tx_quick_consumer_index1 0
> sblk->status_tx_quick_consumer_index2 0
> sblk->status_tx_quick_consumer_index3 0
> sblk->status_rx_quick_consumer_index0 19665
> sblk->status_rx_quick_consumer_index1 0
> sblk->status_rx_quick_consumer_index2 0
> sblk->status_rx_quick_consumer_index3 0
> sblk->status_rx_quick_consumer_index4 0
> sblk->status_rx_quick_consumer_index5 0
> sblk->status_rx_quick_consumer_index6 0
> sblk->status_rx_quick_consumer_index7 0
> sblk->status_rx_quick_consumer_index8 0
> sblk->status_rx_quick_consumer_index9 0
> sblk->status_rx_quick_consumer_index10 0
> sblk->status_rx_quick_consumer_index11 0
> sblk->status_rx_quick_consumer_index12 0
> sblk->status_rx_quick_consumer_index13 0
> sblk->status_rx_quick_consumer_index14 0
> sblk->status_rx_quick_consumer_index15 0
> sblk->status_completion_producer_index 0
> sblk->status_cmd_consumer_index 0
> sblk->status_idx 2426
> sblk->status_unused 0
> sblk->status_blk_num 0
> hw_cons 32474 sw_cons 32474 ffff8801d27f85c0 bnapi 
> return hw_cons 32474 sw_cons 32474 ffff8801d27f85c0 bnapi 
> hw_cons 3628 sw_cons 3625 ffff8801d27f8bc0 bnapi 
> return hw_cons 3628 sw_cons 3625 ffff8801d27f8bc0 bnapi 
> hw_cons 62094 sw_cons 62090 ffff8801d27f91c0 bnapi 
> return hw_cons 62094 sw_cons 62090 ffff8801d27f91c0 bnapi 
> hw_cons 3184 sw_cons 3173 ffff8801d27f97c0 bnapi 
> return hw_cons 3184 sw_cons 3173 ffff8801d27f97c0 bnapi 
> hw_cons 0 sw_cons 0 ffff8801d27f9dc0 bnapi 
> return hw_cons 0 sw_cons 0 ffff8801d27f9dc0 bnapi 
> 
>  
> >--------------------------------------------------------------------------------
> >From: tinnycloud@xxxxxxxxxxx
> >To: xen-devel@xxxxxxxxxxxxxxxxxxx
> >CC: mchan@xxxxxxxxxxxx
> >Subject: bnx2 FTQ issues on 2.6.32 + xen 4.0.1;
> >Date: Mon, 27 Jun 2011 13:38:07 +0800
> >
> >
> >Hi Michael: 
> > 
> >     Sorry to  brother. But I've been suffered this issue quite a long time.
> >     My test environment is 2.6.32.36 + xen 4.0.1 + bnx2, 2.3.
> > 
> >     Also cpu idle is disabled in grub.
> > 
> >     Grub info: 
> >     title Red Hat Enterprise Linux Server (2.6.32.36xen)
> >        kernel /xen-4.0.1.gz iommu=off x2apic=off console=com1,vga 
> > com1=115200,8n1 noreboot cpuidle=0 cpufreq=none no-xsave max_cstate=1
> >        module /vmlinuz-2.6.32.36xen ro root=LABEL=/ hda=noprobe 
> > console=hvc0 console=ttyS0,115200n8 
> >        module /initrd-2.6.32.36xen.img
> >     
> >     Someone has pointed out that this might be interrupt lost, but I don't 
> > understand the log info, if the log
> >info is documented somewhere, could you let me know?
> >      
> >      Many thanks.
> > 
> >root@xmao # ethtool  -i eth0
> >driver: bridge
> >version: 2.3
> >firmware-version: N/A
> >bus-info: N/A
> > 
> > 
> >/var/log/message:
> > 
> >196299 Jun 27 07:36:56 xmao kernel: ------------[ cut here ]------------
> >196300 Jun 27 07:36:56 xmao kernel: WARNING: at net/sched/sch_generic.c:261 
> >dev_watchdog+0x105/0x16a()
> >196301 Jun 27 07:36:56 xmao kernel: Hardware name: Tecal RH2285
> >196302 Jun 27 07:36:56 xmao kernel: NETDEV WATCHDOG: peth0 (bnx2): transmit 
> >queue 0 timed out                                                        
> >196303 Jun 27 07:36:56 xmao kernel: Modules linked in: iptable_filter nfs 
> >fscache nfs_acl auth_rpcgss bridge stp llc autofs4 ipmi_devintf ipmi_si ipm  
> >     i_msghandler lockd sunrpc ipv6 xenfs dm_multipath fuse nf_conntrack 
> >ip_tables xen_netback xen_blkback blktap blkback_pagemap loop nbd video 
> >output sbs        sbshc parport_pc lp parport snd_seq_dummy snd_seq_oss 
> >snd_seq_midi_event snd_seq snd_seq_device serio_raw bnx2 snd_pcm_oss 
> >snd_mixer_oss snd_pcm snd_       timer snd i2c_i801 iTCO_wdt soundcore 
> >iTCO_vendor_support pata_acpi snd_page_alloc i2c_core ata_generic pcspkr 
> >ata_piix shpchp mptsas mptscsih mptbase        [last unloaded: freq_table]
> >196304 Jun 27 07:36:56 xmao kernel: Pid: 0, comm: swapper Not tainted 
> >2.6.32.36xen #1
> >196305 Jun 27 07:36:56 xmao kernel: Call Trace:
> >196306 Jun 27 07:36:56 xmao kernel:  <IRQ>  [<ffffffff813ba154>] ? 
> >dev_watchdog+0x105/0x16a
> >196307 Jun 27 07:36:56 xmao kernel:  [<ffffffff81056666>] 
> >warn_slowpath_common+0x7c/0x94
> >196308 Jun 27 07:36:56 xmao kernel:  [<ffffffff81056738>] 
> >warn_slowpath_fmt+0xa4/0xa6
> >196309 Jun 27 07:36:56 xmao kernel:  [<ffffffff81080bfa>] ? 
> >clockevents_program_event+0x78/0x81
> >196310 Jun 27 07:36:56 xmao kernel:  [<ffffffff81081fce>] ? 
> >tick_program_event+0x2a/0x2c
> >196311 Jun 27 07:36:56 xmao kernel:  [<ffffffff813b951d>] ? 
> >__netif_tx_lock+0x1b/0x24
> >196312 Jun 27 07:36:56 xmao kernel:  [<ffffffff813b95a8>] ? 
> >netif_tx_lock+0x46/0x6e
> >196313 Jun 27 07:36:56 xmao kernel:  [<ffffffff813a3ed1>] ? 
> >netdev_drivername+0x48/0x4f
> >196314 Jun 27 07:36:56 xmao kernel:  [<ffffffff813ba154>] 
> >dev_watchdog+0x105/0x16a
> >196315 Jun 27 07:36:56 xmao kernel:  [<ffffffff81063d98>] 
> >run_timer_softirq+0x156/0x1f8
> >196316 Jun 27 07:36:56 xmao kernel:  [<ffffffff813ba04f>] ? 
> >dev_watchdog+0x0/0x16a
> >196317 Jun 27 07:36:56 xmao kernel:  [<ffffffff8105d6f0>] 
> >__do_softirq+0xd7/0x19e
> >196318 Jun 27 07:36:56 xmao kernel:  [<ffffffff81013eac>] 
> >call_softirq+0x1c/0x30
> >196319 Jun 27 07:36:56 xmao kernel:  [<ffffffff8101564b>] 
> >do_softirq+0x46/0x87
> >196320 Jun 27 07:36:56 xmao kernel:  [<ffffffff8105d575>] irq_exit+0x3b/0x7a
> >196321 Jun 27 07:36:56 xmao kernel:  [<ffffffff8128dcfe>] 
> >xen_evtchn_do_upcall+0x38/0x46
> >196322 Jun 27 07:36:56 xmao kernel:  [<ffffffff81013efe>] 
> >xen_do_hypervisor_callback+0x1e/0x30
> >196323 Jun 27 07:36:56 xmao kernel:  <EOI>  [<ffffffff8103f642>] ? 
> >pick_next_task_idle+0x18/0x22
> >196324 Jun 27 07:36:56 xmao kernel:  [<ffffffff810093aa>] ? 
> >hypercall_page+0x3aa/0x1000
> >196325 Jun 27 07:36:56 xmao kernel:  [<ffffffff810093aa>] ? 
> >hypercall_page+0x3aa/0x1000
> >196326 Jun 27 07:36:56 xmao kernel:  [<ffffffff8100f1bb>] ? 
> >xen_safe_halt+0x10/0x1a
> >196327 Jun 27 07:36:56 xmao kernel:  [<ffffffff81019e14>] ? 
> >default_idle+0x39/0x56
> >196328 Jun 27 07:36:56 xmao kernel:  [<ffffffff81011cd0>] ? 
> >cpu_idle+0x5d/0x8c
> >196329 Jun 27 07:36:56 xmao kernel:  [<ffffffff8143375d>] ? 
> >cpu_bringup_and_idle+0x13/0x15
> >196330 Jun 27 07:36:56 xmao kernel: ---[ end trace 8ea3df16a1603e41 ]---
> >196331 Jun 27 07:36:56 xmao kernel: bnx2: <--- start FTQ dump on peth0 --->
> >196332 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_RV2P_PFTQ_CTL 10000
> >196333 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_RV2P_TFTQ_CTL 20000
> >196334 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_RV2P_MFTQ_CTL 4000
> >196335 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_TBDR_FTQ_CTL 1004002
> >196336 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_TDMA_FTQ_CTL 4010002
> >196337 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_TXP_FTQ_CTL 4010002
> >196338 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_TPAT_FTQ_CTL 10002
> >196339 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_RXP_CFTQ_CTL 8000
> >196340 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_RXP_FTQ_CTL 100000
> >196341 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_COM_COMXQ_FTQ_CTL 10000
> >196342 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_COM_COMTQ_FTQ_CTL 20000
> >196343 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_COM_COMQ_FTQ_CTL 10000
> >196344 Jun 27 07:36:56 xmao kernel: bnx2: peth0: BNX2_CP_CPQ_FTQ_CTL 4000
> >196345 Jun 27 07:36:56 xmao kernel: bnx2: peth0: TXP mode b84c state 
> >80005000 evt_mask 500 pc 8000758 pc 8000758 instr 8fa90030
> >196346 Jun 27 07:36:56 xmao kernel: bnx2: peth0: TPAT mode b84c state 
> >80001000 evt_mask 500 pc 8000a5c pc 8000a4c instr 38420001
> >196347 Jun 27 07:36:56 xmao kernel: bnx2: peth0: RXP mode b84c state 
> >80001000 evt_mask 500 pc 8004c1c pc 8004c20 instr 32070001
> >196348 Jun 27 07:36:56 xmao kernel: bnx2: peth0: COM mode b8cc state 
> >80008000 evt_mask 500 pc 8000a94 pc 8000b28 instr 3c028000
> >196349 Jun 27 07:36:56 xmao kernel: bnx2: peth0: CP mode b8cc state 80004000 
> >evt_mask 500 pc 8000c6c pc 8000928 instr 8ce800e8
> >196350 Jun 27 07:36:56 xmao kernel: bnx2: <--- end FTQ dump on peth0 --->
> >196351 Jun 27 07:36:56 xmao kernel: bnx2: peth0 DEBUG: intr_sem[0] 
> >196351 Jun 27 07:36:56 xmao kernel: bnx2: peth0 DEBUG: intr_sem[0]
> >196352 Jun 27 07:36:56 xmao kernel: bnx2: peth0 DEBUG: intr_sem[0] 
> >PCI_CMD[20100406]
> >196353 Jun 27 07:36:56 xmao kernel: bnx2: peth0 DEBUG: PCI_PM[19002008] 
> >PCI_MISC_CFG[92000088]
> >196354 Jun 27 07:36:56 xmao kernel: bnx2: peth0 DEBUG: 
> >EMAC_TX_STATUS[00000008] EMAC_RX_STATUS[00000000]
> >196355 Jun 27 07:36:56 xmao kernel: bnx2: peth0 RPM_MGMT_PKT_CTRL[40000088]
> >196356 Jun 27 07:36:56 xmao kernel: bnx2: peth0 DEBUG: 
> >MCP_STATE_P0[0007e10e] MCP_STATE_P1[0003e00e]
> >196357 Jun 27 07:36:56 xmao kernel: bnx2: peth0 DEBUG: 
> >HC_STATS_INTERRUPT_STATUS[01ff0000]
> >196358 Jun 27 07:36:56 xmao kernel: bnx2: peth0 DEBUG: PBA[00000000]
> >196359 Jun 27 07:36:56 xmao kernel: bnx2: peth0 NIC Copper Link is Down
> >196360 Jun 27 07:36:56 xmao kernel: eth0: port 1(peth0) entering disabled 
> >state
> >196361 Jun 27 07:36:59 xmao kernel: bnx2: peth0 NIC Copper Link is Up, 1000 
> >Mbps full duplex
> >196362 Jun 27 07:36:59 xmao kernel: eth0: port 1(peth0) entering forwarding 
> >state
> 
> 
> 
>                                         

> _______________________________________________
> Xen-devel mailing list
> Xen-devel@xxxxxxxxxxxxxxxxxxx
> http://lists.xensource.com/xen-devel


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.