SFC frontend network accelerator source diff -r dd748ded9ba8 drivers/xen/Kconfig --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -78,6 +78,10 @@ config XEN_NETDEV_PIPELINED_TRANSMITTER like reassembling packets to perform firewall filtering; or if you are unsure; or if you experience network hangs when this option is enabled; then you must say N here. + +config XEN_NETDEV_ACCEL_SFC_UTIL + tristate + default n config XEN_NETDEV_LOOPBACK tristate "Network-device loopback driver" @@ -192,6 +196,12 @@ config XEN_GRANT_DEV Device for accessing (in user-space) pages that have been granted by other domains. +config XEN_NETDEV_ACCEL_SFC_FRONTEND + tristate "Network-device frontend driver acceleration for Solarflare NICs" + depends on XEN_NETDEV_FRONTEND + select XEN_NETDEV_ACCEL_SFC_UTIL + default m + config XEN_FRAMEBUFFER tristate "Framebuffer-device frontend driver" depends on FB diff -r dd748ded9ba8 drivers/xen/Makefile --- a/drivers/xen/Makefile +++ b/drivers/xen/Makefile @@ -20,3 +20,5 @@ obj-$(CONFIG_XEN_KEYBOARD) += fbfront/ obj-$(CONFIG_XEN_KEYBOARD) += fbfront/ obj-$(CONFIG_XEN_PRIVCMD) += privcmd/ obj-$(CONFIG_XEN_GRANT_DEV) += gntdev/ +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_UTIL) += sfc_netutil/ +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_FRONTEND) += sfc_netfront/ diff -r dd748ded9ba8 drivers/xen/sfc_netfront/Makefile --- /dev/null +++ b/drivers/xen/sfc_netfront/Makefile @@ -0,0 +1,11 @@ +EXTRA_CFLAGS += -Idrivers/xen/sfc_netutil -Idrivers/xen/netfront +EXTRA_CFLAGS += -D__ci_driver__ +EXTRA_CFLAGS += -Werror + +ifdef GCOV +EXTRA_CFLAGS += -fprofile-arcs -ftest-coverage -DEFX_GCOV +endif + +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_FRONTEND) := sfc_netfront.o + +sfc_netfront-objs := accel_msg.o accel_bufs.o accel_netfront.o accel_vi.o accel_xenbus.o accel_tso.o accel_ssr.o accel_debugfs.o falcon_event.o falcon_vi.o pt_tx.o vi_init.o diff -r dd748ded9ba8 drivers/xen/sfc_netfront/accel.h --- /dev/null +++ b/drivers/xen/sfc_netfront/accel.h @@ -0,0 +1,477 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef NETFRONT_ACCEL_H +#define NETFRONT_ACCEL_H + +#include "accel_msg_iface.h" +#include "accel_cuckoo_hash.h" +#include "accel_bufs.h" + +#include "etherfabric/ef_vi.h" + +#include +#include + +#include +#include + +enum netfront_accel_post_status { + NETFRONT_ACCEL_STATUS_GOOD, + NETFRONT_ACCEL_STATUS_BUSY, + NETFRONT_ACCEL_STATUS_CANT +}; + +#define NETFRONT_ACCEL_STATS 1 +#if NETFRONT_ACCEL_STATS +#define NETFRONT_ACCEL_STATS_OP(x) x +#else +#define NETFRONT_ACCEL_STATS_OP(x) +#endif + + +enum netfront_accel_msg_state { + NETFRONT_ACCEL_MSG_NONE = 0, + NETFRONT_ACCEL_MSG_HELLO = 1, + NETFRONT_ACCEL_MSG_HW = 2 +}; + + +typedef struct { + u32 in_progress; + u32 total_len; + struct sk_buff *skb; +} netfront_accel_jumbo_state; + + +struct netfront_accel_ssr_state { + /** List of tracked connections. */ + struct list_head conns; + + /** Free efx_ssr_conn instances. */ + struct list_head free_conns; +}; + + +struct netfront_accel_netdev_stats { + /* Fastpath stats. */ + u32 fastpath_rx_pkts; + u32 fastpath_rx_bytes; + u32 fastpath_rx_errors; + u32 fastpath_tx_pkts; + u32 fastpath_tx_bytes; + u32 fastpath_tx_errors; +}; + + +struct netfront_accel_netdev_dbfs { + struct dentry *fastpath_rx_pkts; + struct dentry *fastpath_rx_bytes; + struct dentry *fastpath_rx_errors; + struct dentry *fastpath_tx_pkts; + struct dentry *fastpath_tx_bytes; + struct dentry *fastpath_tx_errors; +}; + + +struct netfront_accel_stats { + /** Fast path events */ + u64 fastpath_tx_busy; + + /** TX DMA queue status */ + u64 fastpath_tx_completions; + + /** The number of events processed. */ + u64 event_count; + + /** Number of frame trunc events seen on fastpath */ + u64 fastpath_frm_trunc; + + /** Number of no rx descriptor trunc events seen on fastpath */ + u64 rx_no_desc_trunc; + + /** The number of misc bad events (e.g. RX_DISCARD) processed. */ + u64 bad_event_count; + + /** Number of events dealt with in poll loop */ + u32 events_per_poll_max; + u32 events_per_poll_tx_max; + u32 events_per_poll_rx_max; + + /** Largest number of concurrently outstanding tx descriptors */ + u32 fastpath_tx_pending_max; + + /** The number of events since the last interrupts. */ + u32 event_count_since_irq; + + /** The max number of events between interrupts. */ + u32 events_per_irq_max; + + /** The number of interrupts. */ + u64 irq_count; + + /** The number of useless interrupts. */ + u64 useless_irq_count; + + /** The number of polls scheduled. */ + u64 poll_schedule_count; + + /** The number of polls called. */ + u64 poll_call_count; + + /** The number of rechecks. */ + u64 poll_reschedule_count; + + /** Number of times we've called netif_stop_queue/netif_wake_queue */ + u64 queue_stops; + u64 queue_wakes; + + /** SSR stats */ + u64 ssr_bursts; + u64 ssr_drop_stream; + u64 ssr_misorder; + u64 ssr_slow_start; + u64 ssr_merges; + u64 ssr_too_many; + u64 ssr_new_stream; +}; + + +struct netfront_accel_dbfs { + struct dentry *fastpath_tx_busy; + struct dentry *fastpath_tx_completions; + struct dentry *fastpath_tx_pending_max; + struct dentry *fastpath_frm_trunc; + struct dentry *rx_no_desc_trunc; + struct dentry *event_count; + struct dentry *bad_event_count; + struct dentry *events_per_poll_max; + struct dentry *events_per_poll_rx_max; + struct dentry *events_per_poll_tx_max; + struct dentry *event_count_since_irq; + struct dentry *events_per_irq_max; + struct dentry *irq_count; + struct dentry *useless_irq_count; + struct dentry *poll_schedule_count; + struct dentry *poll_call_count; + struct dentry *poll_reschedule_count; + struct dentry *queue_stops; + struct dentry *queue_wakes; + struct dentry *ssr_bursts; + struct dentry *ssr_drop_stream; + struct dentry *ssr_misorder; + struct dentry *ssr_slow_start; + struct dentry *ssr_merges; + struct dentry *ssr_too_many; + struct dentry *ssr_new_stream; +}; + + +typedef struct netfront_accel_vnic { + struct netfront_accel_vnic *next; + + struct mutex vnic_mutex; + + spinlock_t tx_lock; + + struct netfront_accel_bufpages bufpages; + struct netfront_accel_bufinfo *rx_bufs; + struct netfront_accel_bufinfo *tx_bufs; + + /** Hardware & VI state */ + ef_vi vi; + + ef_vi_state *vi_state; + + ef_eventq_state evq_state; + + void *evq_mapping; + + /** Hardware dependant state */ + union { + struct { + /** Falcon A or B */ + enum net_accel_hw_type type; + u32 *evq_rptr; + u32 *doorbell; + void *evq_rptr_mapping; + void *doorbell_mapping; + void *txdmaq_mapping; + void *rxdmaq_mapping; + } falcon; + } hw; + + /** RX DMA queue status */ + u32 rx_dma_level; + + /** Number of RX descriptors waiting to be pushed to the card. */ + u32 rx_dma_batched; +#define NETFRONT_ACCEL_RX_DESC_BATCH 16 + + /** + * Hash table of remote mac addresses to decide whether to try + * fast path + */ + cuckoo_hash_table fastpath_table; + spinlock_t table_lock; + + /** the local mac address of virtual interface we're accelerating */ + u8 mac[ETH_ALEN]; + + int rx_pkt_stride; + int rx_skb_stride; + + /** + * Keep track of fragments of jumbo packets as events are + * delivered by NIC + */ + netfront_accel_jumbo_state jumbo_state; + + struct net_device *net_dev; + + /** These two gate the enabling of fast path operations */ + int frontend_ready; + int backend_netdev_up; + + int irq_enabled; + spinlock_t irq_enabled_lock; + + int tx_enabled; + + int poll_enabled; + + /** A spare slot for a TX packet. This is treated as an extension + * of the DMA queue. */ + struct sk_buff *tx_skb; + + /** Keep track of fragments of SSR packets */ + struct netfront_accel_ssr_state ssr_state; + + struct xenbus_device *dev; + + /** Event channel for messages */ + int msg_channel; + int msg_channel_irq; + + /** Event channel for network interrupts. */ + int net_channel; + int net_channel_irq; + + struct net_accel_shared_page *shared_page; + + grant_ref_t ctrl_page_gnt; + grant_ref_t msg_page_gnt; + + /** Message Qs, 1 each way. */ + sh_msg_fifo2 to_dom0; + sh_msg_fifo2 from_dom0; + + enum netfront_accel_msg_state msg_state; + + /** Watch on accelstate */ + struct xenbus_watch backend_accel_watch; + /** Watch on frontend's MAC address */ + struct xenbus_watch mac_address_watch; + + /** Work to process received irq/msg */ + struct work_struct msg_from_bend; + + /** Wait queue for changes in accelstate. */ + wait_queue_head_t state_wait_queue; + + /** The current accelstate of this driver. */ + XenbusState frontend_state; + + /** The most recent accelstate seen by the xenbus watch. */ + XenbusState backend_state; + + /** Non-zero if we should reject requests to connect. */ + int removing; + + /** Non-zero if the domU shared state has been initialised. */ + int domU_state_is_setup; + + /** Non-zero if the dom0 shared state has been initialised. */ + int dom0_state_is_setup; + + /* Those statistics that are added to the netdev stats */ + struct netfront_accel_netdev_stats netdev_stats; + struct netfront_accel_netdev_stats stats_last_read; +#ifdef CONFIG_DEBUG_FS + struct netfront_accel_netdev_dbfs netdev_dbfs; +#endif + + /* These statistics are internal and optional */ +#if NETFRONT_ACCEL_STATS + struct netfront_accel_stats stats; +#ifdef CONFIG_DEBUG_FS + struct netfront_accel_dbfs dbfs; +#endif +#endif + + /** Debufs fs dir for this interface */ + struct dentry *dbfs_dir; +} netfront_accel_vnic; + + +/* Module parameters */ +extern unsigned max_pages; +extern unsigned buffer_split; + +extern const char *frontend_name; +extern struct netfront_accel_hooks accel_hooks; +extern struct workqueue_struct *netfront_accel_workqueue; + + +extern +void netfront_accel_vi_ctor(netfront_accel_vnic *vnic); + +extern +int netfront_accel_vi_init(netfront_accel_vnic *vnic, + struct net_accel_msg_hw *hw_msg); + +extern +void netfront_accel_vi_dtor(netfront_accel_vnic *vnic); + + +/** + * Add new buffers which have been registered with the NIC. + * + * @v vnic The vnic instance to process the response. + * + * The buffers contained in the message are added to the buffer pool. + */ +extern +void netfront_accel_vi_add_bufs(netfront_accel_vnic *vnic, int is_rx); + +/** + * Put a packet on the tx DMA queue. + * + * @v vnic The vnic instance to accept the packet. + * @v skb A sk_buff to send. + * + * Attempt to send a packet. On success, the skb is owned by the DMA + * queue and will be released when the completion event arrives. + */ +extern enum netfront_accel_post_status +netfront_accel_vi_tx_post(netfront_accel_vnic *vnic, + struct sk_buff *skb); + + +/** + * Process events in response to an interrupt. + * + * @v vnic The vnic instance to poll. + * @v rx_packets The maximum number of rx packets to process. + * @ret rx_done The number of rx packets processed. + * + * The vnic will process events until there are no more events + * remaining or the specified number of rx packets has been processed. + * The split from the interrupt call is to allow Linux NAPI + * polling. + */ +extern +int netfront_accel_vi_poll(netfront_accel_vnic *vnic, int rx_packets); + + +/** + * Iterate over the fragments of a packet buffer. + * + * @v skb The packet buffer to examine. + * @v idx A variable name for the fragment index. + * @v data A variable name for the address of the fragment data. + * @v length A variable name for the fragment length. + * @v code A section of code to execute for each fragment. + * + * This macro iterates over the fragments in a packet buffer and + * executes the code for each of them. + */ +#define NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT(skb, frag_idx, \ + frag_data, frag_len, \ + code) \ + do { \ + int frag_idx; \ + void *frag_data; \ + unsigned int frag_len; \ + \ + frag_data = skb->data; \ + frag_len = skb_headlen(skb); \ + frag_idx = 0; \ + while (1) { /* For each fragment */ \ + code; \ + if (frag_idx >= skb_shinfo(skb)->nr_frags) { \ + break; \ + } else { \ + skb_frag_t *fragment; \ + fragment = &skb_shinfo(skb)->frags[frag_idx]; \ + frag_len = fragment->size; \ + frag_data = ((void*)page_address(fragment->page) \ + + fragment->page_offset); \ + }; \ + frag_idx++; \ + } \ + } while(0) + +static inline +void netfront_accel_disable_net_interrupts(netfront_accel_vnic *vnic) +{ + mask_evtchn(vnic->net_channel); +} + +static inline +void netfront_accel_enable_net_interrupts(netfront_accel_vnic *vnic) +{ + unmask_evtchn(vnic->net_channel); +} + +void netfront_accel_msg_tx_fastpath(netfront_accel_vnic *vnic, const void *mac, + u32 ip, u16 port, u8 protocol); + +/* Process an IRQ received from back end driver */ +irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context, + struct pt_regs *unused); +irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context, + struct pt_regs *unused); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) +extern void netfront_accel_msg_from_bend(struct work_struct *context); +#else +extern void netfront_accel_msg_from_bend(void *context); +#endif + +extern void vnic_stop_fastpath(netfront_accel_vnic *vnic); + +extern int netfront_accel_probe(struct net_device *net_dev, + struct xenbus_device *dev); +extern int netfront_accel_remove(struct xenbus_device *dev); +extern void netfront_accel_set_closing(netfront_accel_vnic *vnic); + +extern int netfront_accel_vi_enable_interrupts(netfront_accel_vnic *vnic); + +extern void netfront_accel_debugfs_init(void); +extern void netfront_accel_debugfs_fini(void); +extern int netfront_accel_debugfs_create(netfront_accel_vnic *vnic); +extern int netfront_accel_debugfs_remove(netfront_accel_vnic *vnic); + +#endif /* NETFRONT_ACCEL_H */ diff -r dd748ded9ba8 drivers/xen/sfc_netfront/accel_bufs.c --- /dev/null +++ b/drivers/xen/sfc_netfront/accel_bufs.c @@ -0,0 +1,393 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include + +#include "accel_bufs.h" +#include "accel_util.h" + +#include "accel.h" + + +static int +netfront_accel_alloc_buf_desc_blocks(struct netfront_accel_bufinfo *manager, + int pages) +{ + manager->desc_blocks = + kzalloc(sizeof(struct netfront_accel_pkt_desc *) * + NETFRONT_ACCEL_BUF_NUM_BLOCKS(pages), GFP_KERNEL); + if (manager->desc_blocks == NULL) { + return -ENOMEM; + } + + return 0; +} + +static int +netfront_accel_alloc_buf_lists(struct netfront_accel_bufpages *bufpages, + int pages) +{ + bufpages->page_list = kmalloc(pages * sizeof(void *), GFP_KERNEL); + if (bufpages->page_list == NULL) { + return -ENOMEM; + } + + bufpages->grant_list = kzalloc(pages * sizeof(grant_ref_t), GFP_KERNEL); + if (bufpages->grant_list == NULL) { + kfree(bufpages->page_list); + bufpages->page_list = NULL; + return -ENOMEM; + } + + return 0; +} + + +int netfront_accel_alloc_buffer_mem(struct netfront_accel_bufpages *bufpages, + struct netfront_accel_bufinfo *rx_manager, + struct netfront_accel_bufinfo *tx_manager, + int pages) +{ + int n, rc; + + if ((rc = netfront_accel_alloc_buf_desc_blocks + (rx_manager, pages - (pages / buffer_split))) < 0) { + goto rx_fail; + } + + if ((rc = netfront_accel_alloc_buf_desc_blocks + (tx_manager, pages / buffer_split)) < 0) { + goto tx_fail; + } + + if ((rc = netfront_accel_alloc_buf_lists(bufpages, pages)) < 0) { + goto lists_fail; + } + + for (n = 0; n < pages; n++) { + void *tmp = (void*)__get_free_page(GFP_KERNEL); + if (tmp == NULL) + break; + + bufpages->page_list[n] = tmp; + } + + if (n != pages) { + EPRINTK("%s: not enough pages: %d != %d\n", __FUNCTION__, n, + pages); + for (; n >= 0; n--) + free_page((unsigned long)(bufpages->page_list[n])); + rc = -ENOMEM; + goto pages_fail; + } + + bufpages->max_pages = pages; + bufpages->page_reqs = 0; + + return 0; + + pages_fail: + kfree(bufpages->page_list); + kfree(bufpages->grant_list); + + bufpages->page_list = NULL; + bufpages->grant_list = NULL; + lists_fail: + kfree(tx_manager->desc_blocks); + tx_manager->desc_blocks = NULL; + + tx_fail: + kfree(rx_manager->desc_blocks); + rx_manager->desc_blocks = NULL; + rx_fail: + return rc; +} + + +void netfront_accel_free_buffer_mem(struct netfront_accel_bufpages *bufpages, + struct netfront_accel_bufinfo *rx_manager, + struct netfront_accel_bufinfo *tx_manager) +{ + int i; + + for (i = 0; i < bufpages->max_pages; i++) { + if (bufpages->grant_list[i] != 0) + net_accel_ungrant_page(bufpages->grant_list[i]); + free_page((unsigned long)(bufpages->page_list[i])); + } + + if (bufpages->max_pages) { + kfree(bufpages->page_list); + kfree(bufpages->grant_list); + kfree(rx_manager->desc_blocks); + kfree(tx_manager->desc_blocks); + } +} + + +/* + * Allocate memory for the buffer manager and create a lock. If no + * lock is supplied its own is allocated. + */ +struct netfront_accel_bufinfo *netfront_accel_init_bufs(spinlock_t *lock) +{ + struct netfront_accel_bufinfo *res = kmalloc(sizeof(*res), GFP_KERNEL); + if (res != NULL) { + res->npages = res->nused = 0; + res->first_free = -1; + + if (lock == NULL) { + res->lock = kmalloc(sizeof(*res->lock), GFP_KERNEL); + if (res->lock == NULL) { + kfree(res); + return NULL; + } + spin_lock_init(res->lock); + res->internally_locked = 1; + } else { + res->lock = lock; + res->internally_locked = 0; + } + + res->desc_blocks = NULL; + } + + return res; +} + + +void netfront_accel_fini_bufs(struct netfront_accel_bufinfo *bufs) +{ + if (bufs->internally_locked) + kfree(bufs->lock); + kfree(bufs); +} + + +int netfront_accel_buf_map_request(struct xenbus_device *dev, + struct netfront_accel_bufpages *bufpages, + struct net_accel_msg *msg, + int pages, int offset) +{ + int i, mfn; + int err; + + net_accel_msg_init(msg, NET_ACCEL_MSG_MAPBUF); + + BUG_ON(pages > NET_ACCEL_MSG_MAX_PAGE_REQ); + + msg->u.mapbufs.pages = pages; + + for (i = 0; i < msg->u.mapbufs.pages; i++) { + /* + * This can happen if we tried to send this message + * earlier but the queue was full. + */ + if (bufpages->grant_list[offset+i] != 0) { + msg->u.mapbufs.grants[i] = + bufpages->grant_list[offset+i]; + continue; + } + + mfn = virt_to_mfn(bufpages->page_list[offset+i]); + VPRINTK("%s: Granting page %d, mfn %08x\n", + __FUNCTION__, i, mfn); + + bufpages->grant_list[offset+i] = + net_accel_grant_page(dev, mfn, 0); + msg->u.mapbufs.grants[i] = bufpages->grant_list[offset+i]; + + if (msg->u.mapbufs.grants[i] < 0) { + EPRINTK("%s: Failed to grant buffer: %d\n", + __FUNCTION__, msg->u.mapbufs.grants[i]); + err = -EIO; + goto error; + } + } + + /* This is interpreted on return as the offset in the the page_list */ + msg->u.mapbufs.reqid = offset; + + return 0; + +error: + /* Ungrant all the pages we've successfully granted. */ + for (i--; i >= 0; i--) { + net_accel_ungrant_page(bufpages->grant_list[offset+i]); + bufpages->grant_list[offset+i] = 0; + } + return err; +} + + +/* Process a response to a buffer request. */ +int netfront_accel_add_bufs(struct netfront_accel_bufpages *bufpages, + struct netfront_accel_bufinfo *manager, + struct net_accel_msg *msg) +{ + int msg_pages, page_offset, i, newtot; + int old_block_count, new_block_count; + u32 msg_buf; + unsigned long flags; + + VPRINTK("%s: manager %p msg %p\n", __FUNCTION__, manager, msg); + + BUG_ON(msg->id != (NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY)); + + msg_pages = msg->u.mapbufs.pages; + msg_buf = msg->u.mapbufs.buf; + page_offset = msg->u.mapbufs.reqid; + + spin_lock_irqsave(manager->lock, flags); + newtot = manager->npages + msg_pages; + old_block_count = + (manager->npages + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK - 1) >> + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT; + new_block_count = + (newtot + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK - 1) >> + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT; + + for (i = old_block_count; i < new_block_count; i++) { + struct netfront_accel_pkt_desc *block; + if (manager->desc_blocks[i] != NULL) { + VPRINTK("Not needed\n"); + continue; + } + block = kzalloc(NETFRONT_ACCEL_BUFS_PER_BLOCK * + sizeof(netfront_accel_pkt_desc), GFP_ATOMIC); + if (block == NULL) { + spin_unlock_irqrestore(manager->lock, flags); + return -ENOMEM; + } + manager->desc_blocks[i] = block; + } + for (i = manager->npages; i < newtot; i++) { + int k, j = i - manager->npages; + int block_num; + int block_idx; + struct netfront_accel_pkt_desc *pkt; + + block_num = i >> NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT; + block_idx = (NETFRONT_ACCEL_BUFS_PER_PAGE*i) + & (NETFRONT_ACCEL_BUFS_PER_BLOCK-1); + + pkt = manager->desc_blocks[block_num] + block_idx; + + for (k = 0; k < NETFRONT_ACCEL_BUFS_PER_PAGE; k++) { + BUG_ON(page_offset + j >= bufpages->max_pages); + + pkt[k].buf_id = NETFRONT_ACCEL_BUFS_PER_PAGE * i + k; + pkt[k].pkt_kva = bufpages->page_list[page_offset + j] + + (PAGE_SIZE/NETFRONT_ACCEL_BUFS_PER_PAGE) * k; + pkt[k].pkt_buff_addr = msg_buf + + (PAGE_SIZE/NETFRONT_ACCEL_BUFS_PER_PAGE) * + (NETFRONT_ACCEL_BUFS_PER_PAGE * j + k); + pkt[k].next_free = manager->first_free; + manager->first_free = pkt[k].buf_id; + *(int*)(pkt[k].pkt_kva) = pkt[k].buf_id; + + VPRINTK("buf %d desc %p kva %p buffaddr %x\n", + pkt[k].buf_id, &(pkt[k]), pkt[k].pkt_kva, + pkt[k].pkt_buff_addr); + } + } + manager->npages = newtot; + spin_unlock_irqrestore(manager->lock, flags); + VPRINTK("Added %d pages. Total is now %d\n", msg_pages, + manager->npages); + return 0; +} + + +netfront_accel_pkt_desc * +netfront_accel_buf_find(struct netfront_accel_bufinfo *manager, u16 id) +{ + netfront_accel_pkt_desc *pkt; + int block_num = id >> NETFRONT_ACCEL_BUFS_PER_BLOCK_SHIFT; + int block_idx = id & (NETFRONT_ACCEL_BUFS_PER_BLOCK - 1); + BUG_ON(id >= manager->npages * NETFRONT_ACCEL_BUFS_PER_PAGE); + BUG_ON(block_idx >= NETFRONT_ACCEL_BUFS_PER_BLOCK); + pkt = manager->desc_blocks[block_num] + block_idx; + return pkt; +} + + +/* Allocate a buffer from the buffer manager */ +netfront_accel_pkt_desc * +netfront_accel_buf_get(struct netfront_accel_bufinfo *manager) +{ + int bufno = -1; + netfront_accel_pkt_desc *buf = NULL; + unsigned long flags = 0; + + /* Any spare? */ + if (manager->first_free == -1) + return NULL; + /* Take lock */ + if (manager->internally_locked) + spin_lock_irqsave(manager->lock, flags); + bufno = manager->first_free; + if (bufno != -1) { + buf = netfront_accel_buf_find(manager, bufno); + manager->first_free = buf->next_free; + manager->nused++; + } + /* Release lock */ + if (manager->internally_locked) + spin_unlock_irqrestore(manager->lock, flags); + + /* Tell the world */ + VPRINTK("Allocated buffer %i, buffaddr %x\n", bufno, + buf->pkt_buff_addr); + + return buf; +} + + +/* Release a buffer back to the buffer manager pool */ +int netfront_accel_buf_put(struct netfront_accel_bufinfo *manager, u16 id) +{ + netfront_accel_pkt_desc *buf = netfront_accel_buf_find(manager, id); + unsigned long flags = 0; + unsigned was_empty = 0; + int bufno = id; + + VPRINTK("Freeing buffer %i\n", id); + BUG_ON(id == (u16)-1); + + if (manager->internally_locked) + spin_lock_irqsave(manager->lock, flags); + + if (manager->first_free == -1) + was_empty = 1; + + buf->next_free = manager->first_free; + manager->first_free = bufno; + manager->nused--; + + if (manager->internally_locked) + spin_unlock_irqrestore(manager->lock, flags); + + return was_empty; +} diff -r dd748ded9ba8 drivers/xen/sfc_netfront/accel_bufs.h --- /dev/null +++ b/drivers/xen/sfc_netfront/accel_bufs.h @@ -0,0 +1,181 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef NETFRONT_ACCEL_BUFS_H +#define NETFRONT_ACCEL_BUFS_H + +#include +#include +#include + +#include "accel_msg_iface.h" + + +/*! Buffer descriptor structure */ +typedef struct netfront_accel_pkt_desc { + int buf_id; + u32 pkt_buff_addr; + void *pkt_kva; + /* This is the socket buffer currently married to this buffer */ + struct sk_buff *skb; + int next_free; +} netfront_accel_pkt_desc; + + +#define NETFRONT_ACCEL_DEFAULT_BUF_PAGES (384) +#define NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT (4) +#define NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK \ + (1 << (NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT)) +#define NETFRONT_ACCEL_BUFS_PER_PAGE_SHIFT (1) +#define NETFRONT_ACCEL_BUFS_PER_PAGE \ + (1 << (NETFRONT_ACCEL_BUFS_PER_PAGE_SHIFT)) +#define NETFRONT_ACCEL_BUFS_PER_BLOCK_SHIFT \ + (NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK_SHIFT + \ + NETFRONT_ACCEL_BUFS_PER_PAGE_SHIFT) +#define NETFRONT_ACCEL_BUFS_PER_BLOCK \ + (1 << NETFRONT_ACCEL_BUFS_PER_BLOCK_SHIFT) +#define NETFRONT_ACCEL_BUF_NUM_BLOCKS(max_pages) \ + (((max_pages)+NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK-1) / \ + NETFRONT_ACCEL_BUF_PAGES_PER_BLOCK) + +/*! Buffer management structure. */ +struct netfront_accel_bufinfo { + /* number added to this manager */ + unsigned npages; + /* number currently used from this manager */ + unsigned nused; + + int first_free; + + int internally_locked; + spinlock_t *lock; + + /* + * array of pointers (length NETFRONT_ACCEL_BUF_NUM_BLOCKS) to + * pkt descs + */ + struct netfront_accel_pkt_desc **desc_blocks; +}; + + +struct netfront_accel_bufpages { + /* length of lists of pages/grants */ + int max_pages; + /* list of pages allocated for network buffers */ + void **page_list; + /* list of grants for the above pages */ + grant_ref_t *grant_list; + + /* number of page requests that have been made */ + unsigned page_reqs; +}; + + +/*! Allocate memory for the buffer manager, set up locks etc. + * Optionally takes a lock to use, if not supplied it makes its own. + * + * \return pointer to netfront_accel_bufinfo structure that represents the + * buffer manager + */ +extern struct netfront_accel_bufinfo * +netfront_accel_init_bufs(spinlock_t *lock); + +/*! Allocate memory for the buffers + */ +extern int +netfront_accel_alloc_buffer_mem(struct netfront_accel_bufpages *bufpages, + struct netfront_accel_bufinfo *rx_res, + struct netfront_accel_bufinfo *tx_res, + int pages); +extern void +netfront_accel_free_buffer_mem(struct netfront_accel_bufpages *bufpages, + struct netfront_accel_bufinfo *rx_res, + struct netfront_accel_bufinfo *tx_res); + +/*! Release memory for the buffer manager, buffers, etc. + * + * \param manager pointer to netfront_accel_bufinfo structure that + * represents the buffer manager + */ +extern void netfront_accel_fini_bufs(struct netfront_accel_bufinfo *manager); + +/*! Release a buffer. + * + * \param manager The buffer manager which owns the buffer. + * \param id The buffer identifier. + */ +extern int netfront_accel_buf_put(struct netfront_accel_bufinfo *manager, + u16 id); + +/*! Get the packet descriptor associated with a buffer id. + * + * \param manager The buffer manager which owns the buffer. + * \param id The buffer identifier. + * + * The returned value is the packet descriptor for this buffer. + */ +extern netfront_accel_pkt_desc * +netfront_accel_buf_find(struct netfront_accel_bufinfo *manager, u16 id); + + +/*! Fill out a message request for some buffers to be mapped by the + * back end driver + * + * \param manager The buffer manager + * \param msg Pointer to an ef_msg to complete. + * \return 0 on success + */ +extern int +netfront_accel_buf_map_request(struct xenbus_device *dev, + struct netfront_accel_bufpages *bufpages, + struct net_accel_msg *msg, + int pages, int offset); + +/*! Process a response to a buffer request. + * + * Deal with a received message from the back end in response to our + * request for buffers + * + * \param manager The buffer manager + * \param msg The received message from the back end describing new + * buffers + * \return 0 on success + */ +extern int +netfront_accel_add_bufs(struct netfront_accel_bufpages *bufpages, + struct netfront_accel_bufinfo *manager, + struct net_accel_msg *msg); + + +/*! Allocate a buffer from the buffer manager + * + * \param manager The buffer manager data structure + * \param id On exit, the id of the buffer allocated + * \return Pointer to buffer descriptor. + */ +struct netfront_accel_pkt_desc * +netfront_accel_buf_get(struct netfront_accel_bufinfo *manager); + +#endif /* NETFRONT_ACCEL_BUFS_H */ + diff -r dd748ded9ba8 drivers/xen/sfc_netfront/accel_debugfs.c --- /dev/null +++ b/drivers/xen/sfc_netfront/accel_debugfs.c @@ -0,0 +1,234 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include + +#include "accel.h" + +#if defined(CONFIG_DEBUG_FS) +static struct dentry *sfc_debugfs_root = NULL; +#endif + + +/* + * Extend debugfs helper functions to have a u64 version + */ +static void debugfs_u64_set(void *data, u64 val) +{ + *(u64 *)data = val; +} + +static u64 debugfs_u64_get(void *data) +{ + return *(u64 *)data; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_u64, debugfs_u64_get, debugfs_u64_set, "%llu\n"); + +struct dentry *debugfs_create_u64(const char *name, mode_t mode, + struct dentry *parent, u64 *value) +{ + return debugfs_create_file(name, mode, parent, value, &fops_u64); +} + + +void netfront_accel_debugfs_init(void) +{ +#if defined(CONFIG_DEBUG_FS) + sfc_debugfs_root = debugfs_create_dir(frontend_name, NULL); +#endif +} + + +void netfront_accel_debugfs_fini(void) +{ +#if defined(CONFIG_DEBUG_FS) + if (sfc_debugfs_root) + debugfs_remove(sfc_debugfs_root); +#endif +} + + +int netfront_accel_debugfs_create(netfront_accel_vnic *vnic) +{ +#if defined(CONFIG_DEBUG_FS) + if (sfc_debugfs_root == NULL) + return -ENOENT; + + vnic->dbfs_dir = debugfs_create_dir(vnic->net_dev->name, + sfc_debugfs_root); + if (vnic->dbfs_dir == NULL) + return -ENOMEM; + + vnic->netdev_dbfs.fastpath_rx_pkts = debugfs_create_u32 + ("fastpath_rx_pkts", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->netdev_stats.fastpath_rx_pkts); + vnic->netdev_dbfs.fastpath_rx_bytes = debugfs_create_u32 + ("fastpath_rx_bytes", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->netdev_stats.fastpath_rx_bytes); + vnic->netdev_dbfs.fastpath_rx_errors = debugfs_create_u32 + ("fastpath_rx_errors", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->netdev_stats.fastpath_rx_errors); + vnic->netdev_dbfs.fastpath_tx_pkts = debugfs_create_u32 + ("fastpath_tx_pkts", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->netdev_stats.fastpath_tx_pkts); + vnic->netdev_dbfs.fastpath_tx_bytes = debugfs_create_u32 + ("fastpath_tx_bytes", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->netdev_stats.fastpath_tx_bytes); + vnic->netdev_dbfs.fastpath_tx_errors = debugfs_create_u32 + ("fastpath_tx_errors", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->netdev_stats.fastpath_tx_errors); + +#if NETFRONT_ACCEL_STATS + vnic->dbfs.irq_count = debugfs_create_u64 + ("irq_count", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.irq_count); + vnic->dbfs.useless_irq_count = debugfs_create_u64 + ("useless_irq_count", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.useless_irq_count); + vnic->dbfs.poll_schedule_count = debugfs_create_u64 + ("poll_schedule_count", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.poll_schedule_count); + vnic->dbfs.poll_call_count = debugfs_create_u64 + ("poll_call_count", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.poll_call_count); + vnic->dbfs.poll_reschedule_count = debugfs_create_u64 + ("poll_reschedule_count", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.poll_reschedule_count); + vnic->dbfs.queue_stops = debugfs_create_u64 + ("queue_stops", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.queue_stops); + vnic->dbfs.queue_wakes = debugfs_create_u64 + ("queue_wakes", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.queue_wakes); + vnic->dbfs.ssr_bursts = debugfs_create_u64 + ("ssr_bursts", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.ssr_bursts); + vnic->dbfs.ssr_drop_stream = debugfs_create_u64 + ("ssr_drop_stream", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.ssr_drop_stream); + vnic->dbfs.ssr_misorder = debugfs_create_u64 + ("ssr_misorder", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.ssr_misorder); + vnic->dbfs.ssr_slow_start = debugfs_create_u64 + ("ssr_slow_start", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.ssr_slow_start); + vnic->dbfs.ssr_merges = debugfs_create_u64 + ("ssr_merges", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.ssr_merges); + vnic->dbfs.ssr_too_many = debugfs_create_u64 + ("ssr_too_many", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.ssr_too_many); + vnic->dbfs.ssr_new_stream = debugfs_create_u64 + ("ssr_new_stream", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.ssr_new_stream); + + vnic->dbfs.fastpath_tx_busy = debugfs_create_u64 + ("fastpath_tx_busy", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.fastpath_tx_busy); + vnic->dbfs.fastpath_tx_completions = debugfs_create_u64 + ("fastpath_tx_completions", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.fastpath_tx_completions); + vnic->dbfs.fastpath_tx_pending_max = debugfs_create_u32 + ("fastpath_tx_pending_max", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.fastpath_tx_pending_max); + vnic->dbfs.event_count = debugfs_create_u64 + ("event_count", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.event_count); + vnic->dbfs.bad_event_count = debugfs_create_u64 + ("bad_event_count", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.bad_event_count); + vnic->dbfs.event_count_since_irq = debugfs_create_u32 + ("event_count_since_irq", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.event_count_since_irq); + vnic->dbfs.events_per_irq_max = debugfs_create_u32 + ("events_per_irq_max", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.events_per_irq_max); + vnic->dbfs.fastpath_frm_trunc = debugfs_create_u64 + ("fastpath_frm_trunc", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.fastpath_frm_trunc); + vnic->dbfs.rx_no_desc_trunc = debugfs_create_u64 + ("rx_no_desc_trunc", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.rx_no_desc_trunc); + vnic->dbfs.events_per_poll_max = debugfs_create_u32 + ("events_per_poll_max", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.events_per_poll_max); + vnic->dbfs.events_per_poll_rx_max = debugfs_create_u32 + ("events_per_poll_rx_max", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.events_per_poll_rx_max); + vnic->dbfs.events_per_poll_tx_max = debugfs_create_u32 + ("events_per_poll_tx_max", S_IRUSR | S_IRGRP | S_IROTH, + vnic->dbfs_dir, &vnic->stats.events_per_poll_tx_max); +#endif +#endif + return 0; +} + + +int netfront_accel_debugfs_remove(netfront_accel_vnic *vnic) +{ +#if defined(CONFIG_DEBUG_FS) + if (vnic->dbfs_dir != NULL) { + debugfs_remove(vnic->netdev_dbfs.fastpath_rx_pkts); + debugfs_remove(vnic->netdev_dbfs.fastpath_rx_bytes); + debugfs_remove(vnic->netdev_dbfs.fastpath_rx_errors); + debugfs_remove(vnic->netdev_dbfs.fastpath_tx_pkts); + debugfs_remove(vnic->netdev_dbfs.fastpath_tx_bytes); + debugfs_remove(vnic->netdev_dbfs.fastpath_tx_errors); + +#if NETFRONT_ACCEL_STATS + debugfs_remove(vnic->dbfs.irq_count); + debugfs_remove(vnic->dbfs.useless_irq_count); + debugfs_remove(vnic->dbfs.poll_schedule_count); + debugfs_remove(vnic->dbfs.poll_call_count); + debugfs_remove(vnic->dbfs.poll_reschedule_count); + debugfs_remove(vnic->dbfs.queue_stops); + debugfs_remove(vnic->dbfs.queue_wakes); + debugfs_remove(vnic->dbfs.ssr_bursts); + debugfs_remove(vnic->dbfs.ssr_drop_stream); + debugfs_remove(vnic->dbfs.ssr_misorder); + debugfs_remove(vnic->dbfs.ssr_slow_start); + debugfs_remove(vnic->dbfs.ssr_merges); + debugfs_remove(vnic->dbfs.ssr_too_many); + debugfs_remove(vnic->dbfs.ssr_new_stream); + + debugfs_remove(vnic->dbfs.fastpath_tx_busy); + debugfs_remove(vnic->dbfs.fastpath_tx_completions); + debugfs_remove(vnic->dbfs.fastpath_tx_pending_max); + debugfs_remove(vnic->dbfs.event_count); + debugfs_remove(vnic->dbfs.bad_event_count); + debugfs_remove(vnic->dbfs.event_count_since_irq); + debugfs_remove(vnic->dbfs.events_per_irq_max); + debugfs_remove(vnic->dbfs.fastpath_frm_trunc); + debugfs_remove(vnic->dbfs.rx_no_desc_trunc); + debugfs_remove(vnic->dbfs.events_per_poll_max); + debugfs_remove(vnic->dbfs.events_per_poll_rx_max); + debugfs_remove(vnic->dbfs.events_per_poll_tx_max); +#endif + debugfs_remove(vnic->dbfs_dir); + } +#endif + return 0; +} diff -r dd748ded9ba8 drivers/xen/sfc_netfront/accel_msg.c --- /dev/null +++ b/drivers/xen/sfc_netfront/accel_msg.c @@ -0,0 +1,566 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include + +#include + +#include "accel.h" +#include "accel_msg_iface.h" +#include "accel_util.h" +#include "accel_bufs.h" + +#include "netfront.h" /* drivers/xen/netfront/netfront.h */ + +static void vnic_start_interrupts(netfront_accel_vnic *vnic) +{ + unsigned long flags; + + /* Prime our interrupt */ + spin_lock_irqsave(&vnic->irq_enabled_lock, flags); + if (!netfront_accel_vi_enable_interrupts(vnic)) { + /* Cripes, that was quick, better pass it up */ + netfront_accel_disable_net_interrupts(vnic); + vnic->irq_enabled = 0; + NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_schedule_count++); + netif_rx_schedule(vnic->net_dev); + } else { + /* + * Nothing yet, make sure we get interrupts through + * back end + */ + vnic->irq_enabled = 1; + netfront_accel_enable_net_interrupts(vnic); + } + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); +} + + +static void vnic_stop_interrupts(netfront_accel_vnic *vnic) +{ + unsigned long flags; + + spin_lock_irqsave(&vnic->irq_enabled_lock, flags); + netfront_accel_disable_net_interrupts(vnic); + vnic->irq_enabled = 0; + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); +} + + +static void vnic_start_fastpath(netfront_accel_vnic *vnic) +{ + struct net_device *net_dev = vnic->net_dev; + unsigned long flags; + + DPRINTK("%s\n", __FUNCTION__); + + spin_lock_irqsave(&vnic->tx_lock, flags); + vnic->tx_enabled = 1; + spin_unlock_irqrestore(&vnic->tx_lock, flags); + + netif_poll_disable(net_dev); + vnic->poll_enabled = 1; + netif_poll_enable(net_dev); + + vnic_start_interrupts(vnic); +} + + +void vnic_stop_fastpath(netfront_accel_vnic *vnic) +{ + struct net_device *net_dev = vnic->net_dev; + struct netfront_info *np = (struct netfront_info *)netdev_priv(net_dev); + unsigned long flags1, flags2; + + DPRINTK("%s\n", __FUNCTION__); + + vnic_stop_interrupts(vnic); + + spin_lock_irqsave(&vnic->tx_lock, flags1); + vnic->tx_enabled = 0; + spin_lock_irqsave(&np->tx_lock, flags2); + if (vnic->tx_skb != NULL) { + dev_kfree_skb_any(vnic->tx_skb); + vnic->tx_skb = NULL; + if (netfront_check_queue_ready(net_dev)) { + netif_wake_queue(net_dev); + NETFRONT_ACCEL_STATS_OP + (vnic->stats.queue_wakes++); + } + } + spin_unlock_irqrestore(&np->tx_lock, flags2); + spin_unlock_irqrestore(&vnic->tx_lock, flags1); + + /* Must prevent polls and hold lock to modify poll_enabled */ + netif_poll_disable(net_dev); + spin_lock_irqsave(&vnic->irq_enabled_lock, flags1); + vnic->poll_enabled = 0; + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags1); + netif_poll_enable(net_dev); +} + + +static void netfront_accel_interface_up(netfront_accel_vnic *vnic) +{ + + if (!vnic->backend_netdev_up) { + vnic->backend_netdev_up = 1; + + if (vnic->frontend_ready) + vnic_start_fastpath(vnic); + } +} + + +static void netfront_accel_interface_down(netfront_accel_vnic *vnic) +{ + + if (vnic->backend_netdev_up) { + vnic->backend_netdev_up = 0; + + if (vnic->frontend_ready) + vnic_stop_fastpath(vnic); + } +} + + +static int vnic_add_bufs(netfront_accel_vnic *vnic, + struct net_accel_msg *msg) +{ + int rc, offset; + struct netfront_accel_bufinfo *bufinfo; + + BUG_ON(msg->u.mapbufs.pages > NET_ACCEL_MSG_MAX_PAGE_REQ); + + offset = msg->u.mapbufs.reqid; + + if (offset < vnic->bufpages.max_pages - + (vnic->bufpages.max_pages / buffer_split)) { + bufinfo = vnic->rx_bufs; + } else + bufinfo = vnic->tx_bufs; + + /* Queue up some Rx buffers to start things off. */ + if ((rc = netfront_accel_add_bufs(&vnic->bufpages, bufinfo, msg)) == 0) { + netfront_accel_vi_add_bufs(vnic, bufinfo == vnic->rx_bufs); + + if (offset + msg->u.mapbufs.pages == vnic->bufpages.max_pages) { + VPRINTK("%s: got all buffers back\n", __FUNCTION__); + vnic->frontend_ready = 1; + if (vnic->backend_netdev_up) + vnic_start_fastpath(vnic); + } else { + VPRINTK("%s: got buffers back %d %d\n", __FUNCTION__, + offset, msg->u.mapbufs.pages); + } + } + + return rc; +} + + +/* The largest [o] such that (1u << o) <= n. Requires n > 0. */ + +inline unsigned log2_le(unsigned long n) { + unsigned order = 1; + while ((1ul << order) <= n) ++order; + return (order - 1); +} + +static int vnic_send_buffer_requests(netfront_accel_vnic *vnic, + struct netfront_accel_bufpages *bufpages) +{ + int pages, offset, rc = 0, sent = 0; + struct net_accel_msg msg; + + while (bufpages->page_reqs < bufpages->max_pages) { + offset = bufpages->page_reqs; + + pages = pow2(log2_le(bufpages->max_pages - + bufpages->page_reqs)); + pages = pages < NET_ACCEL_MSG_MAX_PAGE_REQ ? + pages : NET_ACCEL_MSG_MAX_PAGE_REQ; + + BUG_ON(offset < 0); + BUG_ON(pages <= 0); + + rc = netfront_accel_buf_map_request(vnic->dev, bufpages, + &msg, pages, offset); + if (rc == 0) { + rc = net_accel_msg_send(vnic->shared_page, + &vnic->to_dom0, &msg); + if (rc < 0) { + VPRINTK("%s: queue full, stopping for now\n", + __FUNCTION__); + break; + } + sent++; + } else { + EPRINTK("%s: problem with grant, stopping for now\n", + __FUNCTION__); + break; + } + + bufpages->page_reqs += pages; + } + + if (sent) + net_accel_msg_notify(vnic->msg_channel_irq); + + return rc; +} + + +/* + * In response to dom0 saying "my queue is full", we reply with this + * when it is no longer full + */ +inline void vnic_set_queue_not_full(netfront_accel_vnic *vnic) +{ + + if (test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL_B, + (unsigned long *)&vnic->shared_page->aflags)) + notify_remote_via_irq(vnic->msg_channel_irq); + else + VPRINTK("queue not full bit already set, not signalling\n"); +} + +/* + * Notify dom0 that the queue we want to use is full, it should + * respond by setting MSG_AFLAGS_QUEUEUNOTFULL in due course + */ +inline void vnic_set_queue_full(netfront_accel_vnic *vnic) +{ + + if (!test_and_set_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUFULL_B, + (unsigned long *)&vnic->shared_page->aflags)) + notify_remote_via_irq(vnic->msg_channel_irq); + else + VPRINTK("queue full bit already set, not signalling\n"); +} + + +static int vnic_check_hello_version(unsigned version) +{ + if (version > NET_ACCEL_MSG_VERSION) { + /* Newer protocol, we must refuse */ + return -EPROTO; + } + + if (version < NET_ACCEL_MSG_VERSION) { + /* + * We are newer, so have discretion to accept if we + * wish. For now however, just reject + */ + return -EPROTO; + } + + BUG_ON(version != NET_ACCEL_MSG_VERSION); + return 0; +} + + +static int vnic_process_hello_msg(netfront_accel_vnic *vnic, + struct net_accel_msg *msg) +{ + int err = 0; + unsigned pages = max_pages; + + if (vnic_check_hello_version(msg->u.hello.version) < 0) { + msg->id = NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_REPLY + | NET_ACCEL_MSG_ERROR; + msg->u.hello.version = NET_ACCEL_MSG_VERSION; + } else { + vnic->backend_netdev_up + = vnic->shared_page->net_dev_up; + + msg->id = NET_ACCEL_MSG_HELLO | NET_ACCEL_MSG_REPLY; + msg->u.hello.version = NET_ACCEL_MSG_VERSION; + if (msg->u.hello.max_pages && + msg->u.hello.max_pages < pages) + pages = msg->u.hello.max_pages; + msg->u.hello.max_pages = pages; + + /* Half of pages for rx, half for tx */ + err = netfront_accel_alloc_buffer_mem(&vnic->bufpages, + vnic->rx_bufs, + vnic->tx_bufs, + pages); + if (err) + msg->id |= NET_ACCEL_MSG_ERROR; + } + + /* Send reply */ + net_accel_msg_reply_notify(vnic->shared_page, vnic->msg_channel_irq, + &vnic->to_dom0, msg); + return err; +} + + +static int vnic_process_localmac_msg(netfront_accel_vnic *vnic, + struct net_accel_msg *msg) +{ + unsigned long flags; + cuckoo_hash_mac_key key; + + if (msg->u.localmac.flags & NET_ACCEL_MSG_ADD) { + DPRINTK("MAC has moved, could be local: " MAC_FMT "\n", + MAC_ARG(msg->u.localmac.mac)); + key = cuckoo_mac_to_key(msg->u.localmac.mac); + spin_lock_irqsave(&vnic->table_lock, flags); + /* Try to remove it, not a big deal if not there */ + cuckoo_hash_remove(&vnic->fastpath_table, + (cuckoo_hash_key *)&key); + spin_unlock_irqrestore(&vnic->table_lock, flags); + } + + return 0; +} + + +static +int vnic_process_rx_msg(netfront_accel_vnic *vnic, + struct net_accel_msg *msg) +{ + int err; + + switch (msg->id) { + case NET_ACCEL_MSG_HELLO: + /* Hello, reply with Reply */ + DPRINTK("got Hello, with version %.8x\n", + msg->u.hello.version); + BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_NONE); + err = vnic_process_hello_msg(vnic, msg); + if (err == 0) + vnic->msg_state = NETFRONT_ACCEL_MSG_HELLO; + break; + case NET_ACCEL_MSG_SETHW: + /* Hardware info message */ + DPRINTK("got H/W info\n"); + BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HELLO); + err = netfront_accel_vi_init(vnic, &msg->u.hw); + if (err == 0) + vnic->msg_state = NETFRONT_ACCEL_MSG_HW; + break; + case NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY: + VPRINTK("Got mapped buffers back\n"); + BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW); + err = vnic_add_bufs(vnic, msg); + break; + case NET_ACCEL_MSG_MAPBUF | NET_ACCEL_MSG_REPLY | NET_ACCEL_MSG_ERROR: + /* No buffers. Can't use the fast path. */ + EPRINTK("Got mapped buffers error. Cannot accelerate.\n"); + BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW); + err = -EIO; + break; + case NET_ACCEL_MSG_LOCALMAC: + /* Should be add, remove not currently used */ + EPRINTK_ON(!(msg->u.localmac.flags & NET_ACCEL_MSG_ADD)); + BUG_ON(vnic->msg_state != NETFRONT_ACCEL_MSG_HW); + err = vnic_process_localmac_msg(vnic, msg); + break; + default: + EPRINTK("Huh? Message code is 0x%x\n", msg->id); + err = -EPROTO; + break; + } + + return err; +} + + +/* Process an IRQ received from back end driver */ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) +void netfront_accel_msg_from_bend(struct work_struct *context) +#else +void netfront_accel_msg_from_bend(void *context) +#endif +{ +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) + netfront_accel_vnic *vnic = + container_of(context, netfront_accel_vnic, msg_from_bend); +#else + netfront_accel_vnic *vnic = (netfront_accel_vnic *)context; +#endif + struct net_accel_msg msg; + int err, queue_was_full = 0; + + mutex_lock(&vnic->vnic_mutex); + + /* + * This happens when the shared pages have been unmapped but + * the workqueue has yet to be flushed + */ + if (!vnic->dom0_state_is_setup) + goto unlock_out; + + while ((vnic->shared_page->aflags & NET_ACCEL_MSG_AFLAGS_TO_DOMU_MASK) + != 0) { + if (vnic->shared_page->aflags & + NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL) { + /* We've been told there may now be space. */ + clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL_B, + (unsigned long *)&vnic->shared_page->aflags); + } + + if (vnic->shared_page->aflags & + NET_ACCEL_MSG_AFLAGS_QUEUE0FULL) { + /* + * There will be space at the end of this + * function if we can make any. + */ + clear_bit(NET_ACCEL_MSG_AFLAGS_QUEUE0FULL_B, + (unsigned long *)&vnic->shared_page->aflags); + queue_was_full = 1; + } + + if (vnic->shared_page->aflags & + NET_ACCEL_MSG_AFLAGS_NETUPDOWN) { + DPRINTK("%s: net interface change\n", __FUNCTION__); + clear_bit(NET_ACCEL_MSG_AFLAGS_NETUPDOWN_B, + (unsigned long *)&vnic->shared_page->aflags); + if (vnic->shared_page->net_dev_up) + netfront_accel_interface_up(vnic); + else + netfront_accel_interface_down(vnic); + } + } + + /* Pull msg out of shared memory */ + while ((err = net_accel_msg_recv(vnic->shared_page, &vnic->from_dom0, + &msg)) == 0) { + err = vnic_process_rx_msg(vnic, &msg); + + if (err != 0) + goto done; + } + + /* + * Send any pending buffer map request messages that we can, + * and mark domU->dom0 as full if necessary. + */ + if (vnic->msg_state == NETFRONT_ACCEL_MSG_HW && + vnic->bufpages.page_reqs < vnic->bufpages.max_pages) { + if (vnic_send_buffer_requests(vnic, &vnic->bufpages) == -ENOSPC) + vnic_set_queue_full(vnic); + } + + /* + * If there are no messages then this is not an error. It + * just means that we've finished processing the queue. + */ + if (err == -ENOENT) + err = 0; + done: + /* We will now have made space in the dom0->domU queue if we can */ + if (queue_was_full) + vnic_set_queue_not_full(vnic); + + if (err != 0) { + EPRINTK("%s returned %d\n", __FUNCTION__, err); + netfront_accel_set_closing(vnic); + } + + unlock_out: + mutex_unlock(&vnic->vnic_mutex); + + return; +} + + +irqreturn_t netfront_accel_msg_channel_irq_from_bend(int irq, void *context, + struct pt_regs *unused) +{ + netfront_accel_vnic *vnic = (netfront_accel_vnic *)context; + VPRINTK("irq %d from device %s\n", irq, vnic->dev->nodename); + + queue_work(netfront_accel_workqueue, &vnic->msg_from_bend); + + return IRQ_HANDLED; +} + +/* Process an interrupt received from the NIC via backend */ +irqreturn_t netfront_accel_net_channel_irq_from_bend(int irq, void *context, + struct pt_regs *unused) +{ + netfront_accel_vnic *vnic = (netfront_accel_vnic *)context; + struct net_device *net_dev = vnic->net_dev; + unsigned long flags; + + VPRINTK("net irq %d from device %s\n", irq, vnic->dev->nodename); + + NETFRONT_ACCEL_STATS_OP(vnic->stats.irq_count++); + + BUG_ON(net_dev==NULL); + + spin_lock_irqsave(&vnic->irq_enabled_lock, flags); + if (vnic->irq_enabled) { + netfront_accel_disable_net_interrupts(vnic); + vnic->irq_enabled = 0; + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); + +#if NETFRONT_ACCEL_STATS + vnic->stats.poll_schedule_count++; + if (vnic->stats.event_count_since_irq > + vnic->stats.events_per_irq_max) + vnic->stats.events_per_irq_max = + vnic->stats.event_count_since_irq; + vnic->stats.event_count_since_irq = 0; +#endif + netif_rx_schedule(net_dev); + } + else { + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); + NETFRONT_ACCEL_STATS_OP(vnic->stats.useless_irq_count++); + DPRINTK("%s: irq when disabled\n", __FUNCTION__); + } + + return IRQ_HANDLED; +} + + +void netfront_accel_msg_tx_fastpath(netfront_accel_vnic *vnic, const void *mac, + u32 ip, u16 port, u8 protocol) +{ + unsigned long lock_state; + struct net_accel_msg *msg; + + msg = net_accel_msg_start_send(vnic->shared_page, &vnic->to_dom0, + &lock_state); + + if (msg == NULL) + return; + + net_accel_msg_init(msg, NET_ACCEL_MSG_FASTPATH); + msg->u.fastpath.flags = NET_ACCEL_MSG_REMOVE; + memcpy(msg->u.fastpath.mac, mac, ETH_ALEN); + + msg->u.fastpath.port = port; + msg->u.fastpath.ip = ip; + msg->u.fastpath.proto = protocol; + + net_accel_msg_complete_send_notify(vnic->shared_page, &vnic->to_dom0, + &lock_state, vnic->msg_channel_irq); +} diff -r dd748ded9ba8 drivers/xen/sfc_netfront/accel_netfront.c --- /dev/null +++ b/drivers/xen/sfc_netfront/accel_netfront.c @@ -0,0 +1,318 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include + +/* drivers/xen/netfront/netfront.h */ +#include "netfront.h" + +#include "accel.h" +#include "accel_bufs.h" +#include "accel_util.h" +#include "accel_msg_iface.h" +#include "accel_ssr.h" + +#ifdef EFX_GCOV +#include "gcov.h" +#endif + +#define NETFRONT_ACCEL_VNIC_FROM_NETDEV(_nd) \ + ((netfront_accel_vnic *)((struct netfront_info *)netdev_priv(net_dev))->accel_priv) + +static int netfront_accel_netdev_start_xmit(struct sk_buff *skb, + struct net_device *net_dev) +{ + netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev); + struct netfront_info *np = + (struct netfront_info *)netdev_priv(net_dev); + int handled, rc; + unsigned long flags1, flags2; + + BUG_ON(vnic == NULL); + + /* Take our tx lock and hold for the duration */ + spin_lock_irqsave(&vnic->tx_lock, flags1); + + if (!vnic->tx_enabled) { + rc = 0; + goto unlock_out; + } + + handled = netfront_accel_vi_tx_post(vnic, skb); + if (handled == NETFRONT_ACCEL_STATUS_BUSY) { + BUG_ON(vnic->net_dev != net_dev); + DPRINTK("%s stopping queue\n", __FUNCTION__); + + /* Netfront's lock protects tx_skb */ + spin_lock_irqsave(&np->tx_lock, flags2); + BUG_ON(vnic->tx_skb != NULL); + vnic->tx_skb = skb; + netif_stop_queue(net_dev); + spin_unlock_irqrestore(&np->tx_lock, flags2); + + NETFRONT_ACCEL_STATS_OP(vnic->stats.queue_stops++); + } + + if (handled == NETFRONT_ACCEL_STATUS_CANT) + rc = 0; + else + rc = 1; + +unlock_out: + spin_unlock_irqrestore(&vnic->tx_lock, flags1); + + return rc; +} + + +static int netfront_accel_netdev_poll(struct net_device *net_dev, int *budget) +{ + netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev); + int rx_allowed = *budget, rx_done; + + BUG_ON(vnic == NULL); + + /* Can check this without lock as modifier excludes polls */ + if (!vnic->poll_enabled) + return 0; + + rx_done = netfront_accel_vi_poll(vnic, rx_allowed); + *budget -= rx_done; + + NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_call_count++); + + VPRINTK("%s: done %d allowed %d\n", + __FUNCTION__, rx_done, rx_allowed); + + netfront_accel_ssr_end_of_burst(vnic, &vnic->ssr_state); + + if (rx_done < rx_allowed) { + return 0; /* Done */ + } + + NETFRONT_ACCEL_STATS_OP(vnic->stats.poll_reschedule_count++); + + return 1; /* More to do. */ +} + + +/* + * Process request from netfront to start napi interrupt + * mode. (i.e. enable interrupts as it's finished polling) + */ +static int netfront_accel_start_napi_interrupts(struct net_device *net_dev) +{ + netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev); + unsigned long flags; + + BUG_ON(vnic == NULL); + + /* + * Can check this without lock as writer excludes poll before + * modifying + */ + if (!vnic->poll_enabled) + return 0; + + if (!netfront_accel_vi_enable_interrupts(vnic)) { + /* + * There was something there, tell caller we had + * something to do. + */ + return 1; + } + + spin_lock_irqsave(&vnic->irq_enabled_lock, flags); + vnic->irq_enabled = 1; + netfront_accel_enable_net_interrupts(vnic); + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); + + return 0; +} + + +/* + * Process request from netfront to stop napi interrupt + * mode. (i.e. disable interrupts as it's starting to poll + */ +static void netfront_accel_stop_napi_interrupts(struct net_device *net_dev) +{ + netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev); + unsigned long flags; + + BUG_ON(vnic == NULL); + + spin_lock_irqsave(&vnic->irq_enabled_lock, flags); + + if (!vnic->poll_enabled) { + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); + return; + } + + netfront_accel_disable_net_interrupts(vnic); + vnic->irq_enabled = 0; + spin_unlock_irqrestore(&vnic->irq_enabled_lock, flags); +} + + +static int netfront_accel_check_ready(struct net_device *net_dev) +{ + netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev); + + BUG_ON(vnic == NULL); + + /* This is protected by netfront's lock */ + return vnic->tx_skb == NULL; +} + + +static int netfront_accel_get_stats(struct net_device *net_dev, + struct net_device_stats *stats) +{ + netfront_accel_vnic *vnic = NETFRONT_ACCEL_VNIC_FROM_NETDEV(net_dev); + struct netfront_accel_netdev_stats now; + + BUG_ON(vnic == NULL); + + now.fastpath_rx_pkts = vnic->netdev_stats.fastpath_rx_pkts; + now.fastpath_rx_bytes = vnic->netdev_stats.fastpath_rx_bytes; + now.fastpath_rx_errors = vnic->netdev_stats.fastpath_rx_errors; + now.fastpath_tx_pkts = vnic->netdev_stats.fastpath_tx_pkts; + now.fastpath_tx_bytes = vnic->netdev_stats.fastpath_tx_bytes; + now.fastpath_tx_errors = vnic->netdev_stats.fastpath_tx_errors; + + stats->rx_packets += (now.fastpath_rx_pkts - + vnic->stats_last_read.fastpath_rx_pkts); + stats->rx_bytes += (now.fastpath_rx_bytes - + vnic->stats_last_read.fastpath_rx_bytes); + stats->rx_errors += (now.fastpath_rx_errors - + vnic->stats_last_read.fastpath_rx_errors); + stats->tx_packets += (now.fastpath_tx_pkts - + vnic->stats_last_read.fastpath_tx_pkts); + stats->tx_bytes += (now.fastpath_tx_bytes - + vnic->stats_last_read.fastpath_tx_bytes); + stats->tx_errors += (now.fastpath_tx_errors - + vnic->stats_last_read.fastpath_tx_errors); + + vnic->stats_last_read = now; + + return 0; +} + + +struct netfront_accel_hooks accel_hooks = { + .new_device = &netfront_accel_probe, + .remove = &netfront_accel_remove, + .netdev_poll = &netfront_accel_netdev_poll, + .start_xmit = &netfront_accel_netdev_start_xmit, + .start_napi_irq = &netfront_accel_start_napi_interrupts, + .stop_napi_irq = &netfront_accel_stop_napi_interrupts, + .check_ready = &netfront_accel_check_ready, + .get_stats = &netfront_accel_get_stats +}; + + +unsigned max_pages = NETFRONT_ACCEL_DEFAULT_BUF_PAGES; +module_param (max_pages, int, 0666); +MODULE_PARM_DESC(max_pages, "Number of buffer pages to request"); + +unsigned buffer_split = 2; +module_param (buffer_split, int, 0666); +MODULE_PARM_DESC(buffer_split, "Fraction of buffers to use for TX, rest for RX"); + + +const char *frontend_name = "sfc_netfront"; + +struct workqueue_struct *netfront_accel_workqueue; + +static int __init netfront_accel_init(void) +{ + int rc; +#ifdef EFX_GCOV + gcov_provider_init(THIS_MODULE); +#endif + + /* + * If we're running on dom0, netfront hasn't initialised + * itself, so we need to keep away + */ + if (is_initial_xendomain()) + return 0; + + if (!is_pow2(sizeof(struct net_accel_msg))) + EPRINTK("%s: bad structure size\n", __FUNCTION__); + + netfront_accel_workqueue = create_workqueue(frontend_name); + + netfront_accel_debugfs_init(); + + rc = netfront_accelerator_loaded(NETFRONT_ACCEL_VERSION, + frontend_name, &accel_hooks); + + if (rc < 0) { + EPRINTK("Xen netfront accelerator version mismatch\n"); + return -EINVAL; + } + + if (rc > 0) { + /* + * In future may want to add backwards compatibility + * and accept certain subsets of previous versions + */ + EPRINTK("Xen netfront accelerator version mismatch\n"); + return -EINVAL; + } + + return 0; +} +module_init(netfront_accel_init); + +static void __exit netfront_accel_exit(void) +{ + if (is_initial_xendomain()) + return; + + DPRINTK("%s: unhooking\n", __FUNCTION__); + + /* Unhook from normal netfront */ + netfront_accelerator_stop(frontend_name); + + DPRINTK("%s: done\n", __FUNCTION__); + + netfront_accel_debugfs_fini(); + + flush_workqueue(netfront_accel_workqueue); + + destroy_workqueue(netfront_accel_workqueue); + +#ifdef EFX_GCOV + gcov_provider_fini(THIS_MODULE); +#endif + return; +} +module_exit(netfront_accel_exit); + +MODULE_LICENSE("GPL"); + diff -r dd748ded9ba8 drivers/xen/sfc_netfront/accel_ssr.c --- /dev/null +++ b/drivers/xen/sfc_netfront/accel_ssr.c @@ -0,0 +1,308 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "accel.h" +#include "accel_util.h" +#include "accel_bufs.h" + +#include "accel_ssr.h" + +static inline int list_valid(struct list_head *lh) { + return(lh->next != NULL); +} + +static void netfront_accel_ssr_deliver (struct netfront_accel_vnic *vnic, + struct netfront_accel_ssr_state *st, + struct netfront_accel_ssr_conn *c); + +/** Construct an efx_ssr_state. + * + * @v st The SSR state (per channel per port) + * @v port The port. + */ +void netfront_accel_ssr_init(struct netfront_accel_ssr_state *st) { + unsigned i; + + INIT_LIST_HEAD(&st->conns); + INIT_LIST_HEAD(&st->free_conns); + for (i = 0; i < 8; ++i) { + struct netfront_accel_ssr_conn *c = + kmalloc(sizeof(*c), GFP_KERNEL); + if (c == NULL) break; + c->n_in_order_pkts = 0; + c->skb = NULL; + list_add(&c->link, &st->free_conns); + } + +} + + +/** Destructor for an efx_ssr_state. + * + * @v st The SSR state (per channel per port) + */ +void netfront_accel_ssr_fini(netfront_accel_vnic *vnic, + struct netfront_accel_ssr_state *st) { + struct netfront_accel_ssr_conn *c; + + /* Return cleanly if efx_ssr_init() not previously called */ + BUG_ON(list_valid(&st->conns) != list_valid(&st->free_conns)); + if (! list_valid(&st->conns)) + return; + + while ( ! list_empty(&st->free_conns)) { + c = list_entry(st->free_conns.prev, + struct netfront_accel_ssr_conn, link); + list_del(&c->link); + BUG_ON(c->skb != NULL); + kfree(c); + } + while ( ! list_empty(&st->conns)) { + c = list_entry(st->conns.prev, + struct netfront_accel_ssr_conn, link); + list_del(&c->link); + if (c->skb) + netfront_accel_ssr_deliver(vnic, st, c); + kfree(c); + } +} + + +/** Calc IP checksum and deliver to the OS + * + * @v st The SSR state (per channel per port) + * @v c The SSR connection state + */ +static void netfront_accel_ssr_deliver(netfront_accel_vnic *vnic, + struct netfront_accel_ssr_state *st, + struct netfront_accel_ssr_conn *c) { + BUG_ON(c->skb == NULL); + + /* + * If we've chained packets together, recalculate the IP + * checksum. + */ + if (skb_shinfo(c->skb)->frag_list) { + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_bursts); + c->iph->check = 0; + c->iph->check = ip_fast_csum((unsigned char *) c->iph, + c->iph->ihl); + } + + VPRINTK("%s: %d\n", __FUNCTION__, c->skb->len); + + netif_receive_skb(c->skb); + c->skb = NULL; +} + + +/** Push held skbs down into network stack. + * + * @v st SSR state + * + * Only called if we are tracking one or more connections. + */ +void __netfront_accel_ssr_end_of_burst(netfront_accel_vnic *vnic, + struct netfront_accel_ssr_state *st) { + struct netfront_accel_ssr_conn *c; + + BUG_ON(list_empty(&st->conns)); + + list_for_each_entry(c, &st->conns, link) + if (c->skb) + netfront_accel_ssr_deliver(vnic, st, c); + + /* Time-out connections that have received no traffic for 20ms. */ + c = list_entry(st->conns.prev, struct netfront_accel_ssr_conn, + link); + if (jiffies - c->last_pkt_jiffies > (HZ / 50 + 1)) { + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_drop_stream); + list_del(&c->link); + list_add(&c->link, &st->free_conns); + } +} + + +/** Process SKB and decide whether to dispatch it to the stack now or + * later. + * + * @v st SSR state + * @v skb SKB to exmaine + * @ret rc 0 => deliver SKB to kernel now, otherwise the SKB belongs + * us. + */ +int netfront_accel_ssr_skb(struct netfront_accel_vnic *vnic, + struct netfront_accel_ssr_state *st, + struct sk_buff *skb) { + int data_length, dont_merge; + struct netfront_accel_ssr_conn *c; + struct iphdr *iph; + struct tcphdr *th; + unsigned th_seq; + + BUG_ON(skb_shinfo(skb)->frag_list != NULL); + BUG_ON(skb->next != NULL); + + /* We're not interested if it isn't TCP over IPv4. */ + iph = (struct iphdr *) skb->data; + if (skb->protocol != htons(ETH_P_IP) || + iph->protocol != IPPROTO_TCP) { + return 0; + } + + /* Ignore segments that fail csum or are fragmented. */ + if (unlikely((skb->ip_summed - CHECKSUM_UNNECESSARY) | + (iph->frag_off & htons(IP_MF | IP_OFFSET)))) { + return 0; + } + + th = (struct tcphdr*)(skb->data + iph->ihl * 4); + data_length = ntohs(iph->tot_len) - iph->ihl * 4 - th->doff * 4; + th_seq = ntohl(th->seq); + dont_merge = (data_length == 0) | th->urg | th->syn | th->rst; + + list_for_each_entry(c, &st->conns, link) { + if ((c->saddr - iph->saddr) | + (c->daddr - iph->daddr) | + (c->source - th->source) | + (c->dest - th->dest )) + continue; + + /* Re-insert at head of list to reduce lookup time. */ + list_del(&c->link); + list_add(&c->link, &st->conns); + c->last_pkt_jiffies = jiffies; + + if (unlikely(th_seq - c->next_seq)) { + /* Out-of-order, so start counting again. */ + if (c->skb) + netfront_accel_ssr_deliver(vnic, st, c); + c->n_in_order_pkts = 0; + c->next_seq = th_seq + data_length; + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_misorder); + return 0; + } + c->next_seq = th_seq + data_length; + + if (++c->n_in_order_pkts < 300) { + /* May be in slow-start, so don't merge. */ + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_slow_start); + return 0; + } + + if (unlikely(dont_merge)) { + if (c->skb) + netfront_accel_ssr_deliver(vnic, st, c); + return 0; + } + + if (c->skb) { + c->iph->tot_len = ntohs(c->iph->tot_len); + c->iph->tot_len += data_length; + c->iph->tot_len = htons(c->iph->tot_len); + c->th->ack_seq = th->ack_seq; + c->th->fin |= th->fin; + c->th->psh |= th->psh; + c->th->window = th->window; + + /* Remove the headers from this skb. */ + skb_pull(skb, skb->len - data_length); + + /* + * Tack the new skb onto the head skb's frag_list. + * This is exactly the format that fragmented IP + * datagrams are reassembled into. + */ + BUG_ON(skb->next != 0); + if ( ! skb_shinfo(c->skb)->frag_list) + skb_shinfo(c->skb)->frag_list = skb; + else + c->skb_tail->next = skb; + c->skb_tail = skb; + c->skb->len += skb->len; + c->skb->data_len += skb->len; + c->skb->truesize += skb->truesize; + + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_merges); + + /* + * If the next packet might push this super-packet + * over the limit for an IP packet, deliver it now. + * This is slightly conservative, but close enough. + */ + if (c->skb->len + + (PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE) + > 16384) + netfront_accel_ssr_deliver(vnic, st, c); + + return 1; + } + else { + c->iph = iph; + c->th = th; + c->skb = skb; + return 1; + } + } + + /* We're not yet tracking this connection. */ + + if (dont_merge) { + return 0; + } + + if (list_empty(&st->free_conns)) { + c = list_entry(st->conns.prev, + struct netfront_accel_ssr_conn, + link); + if (c->skb) { + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_too_many); + return 0; + } + } + else { + c = list_entry(st->free_conns.next, + struct netfront_accel_ssr_conn, + link); + } + list_del(&c->link); + list_add(&c->link, &st->conns); + c->saddr = iph->saddr; + c->daddr = iph->daddr; + c->source = th->source; + c->dest = th->dest; + c->next_seq = th_seq + data_length; + c->n_in_order_pkts = 0; + BUG_ON(c->skb != NULL); + NETFRONT_ACCEL_STATS_OP(++vnic->stats.ssr_new_stream); + return 0; +} diff -r dd748ded9ba8 drivers/xen/sfc_netfront/accel_ssr.h --- /dev/null +++ b/drivers/xen/sfc_netfront/accel_ssr.h @@ -0,0 +1,88 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef NETFRONT_ACCEL_SSR_H +#define NETFRONT_ACCEL_SSR_H + +#include +#include +#include +#include + +#include "accel.h" + +/** State for Soft Segment Reassembly (SSR). */ + +struct netfront_accel_ssr_conn { + struct list_head link; + + unsigned saddr, daddr; + unsigned short source, dest; + + /** Number of in-order packets we've seen with payload. */ + unsigned n_in_order_pkts; + + /** Next in-order sequence number. */ + unsigned next_seq; + + /** Time we last saw a packet on this connection. */ + unsigned long last_pkt_jiffies; + + /** The SKB we are currently holding. If NULL, then all following + * fields are undefined. + */ + struct sk_buff *skb; + + /** The tail of the frag_list of SKBs we're holding. Only valid + * after at least one merge. + */ + struct sk_buff *skb_tail; + + /** The IP header of the skb we are holding. */ + struct iphdr *iph; + + /** The TCP header of the skb we are holding. */ + struct tcphdr *th; +}; + +extern void netfront_accel_ssr_init(struct netfront_accel_ssr_state *st); +extern void netfront_accel_ssr_fini(netfront_accel_vnic *vnic, + struct netfront_accel_ssr_state *st); + +extern void +__netfront_accel_ssr_end_of_burst(netfront_accel_vnic *vnic, + struct netfront_accel_ssr_state *st); + +extern int netfront_accel_ssr_skb(netfront_accel_vnic *vnic, + struct netfront_accel_ssr_state *st, + struct sk_buff *skb); + +static inline void +netfront_accel_ssr_end_of_burst (netfront_accel_vnic *vnic, + struct netfront_accel_ssr_state *st) { + if ( ! list_empty(&st->conns) ) + __netfront_accel_ssr_end_of_burst(vnic, st); +} + +#endif /* NETFRONT_ACCEL_SSR_H */ diff -r dd748ded9ba8 drivers/xen/sfc_netfront/accel_tso.c --- /dev/null +++ b/drivers/xen/sfc_netfront/accel_tso.c @@ -0,0 +1,512 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include +#include +#include +#include + +#include "accel.h" +#include "accel_util.h" + +#include "accel_tso.h" + +#define PTR_DIFF(p1, p2) ((u8*)(p1) - (u8*)(p2)) +#define ETH_HDR_LEN(skb) ((skb)->nh.raw - (skb)->data) +#define SKB_TCP_OFF(skb) PTR_DIFF ((skb)->h.th, (skb)->data) +#define SKB_IP_OFF(skb) PTR_DIFF ((skb)->nh.iph, (skb)->data) + +/* + * Set a maximum number of buffers in each output packet to make life + * a little simpler - if this is reached it will just move on to + * another packet + */ +#define ACCEL_TSO_MAX_BUFFERS (6) + +/** TSO State. + * + * The state used during segmentation. It is put into this data structure + * just to make it easy to pass into inline functions. + */ +struct netfront_accel_tso_state { + /** bytes of data we've yet to segment */ + unsigned remaining_len; + + /** current sequence number */ + unsigned seqnum; + + /** remaining space in current packet */ + unsigned packet_space; + + /** List of packets to be output, containing the buffers and + * iovecs to describe each packet + */ + struct netfront_accel_tso_output_packet *output_packets; + + /** Total number of buffers in output_packets */ + unsigned buffers; + + /** Total number of packets in output_packets */ + unsigned packets; + + /** Input Fragment Cursor. + * + * Where we are in the current fragment of the incoming SKB. These + * values get updated in place when we split a fragment over + * multiple packets. + */ + struct { + /** address of current position */ + void *addr; + /** remaining length */ + unsigned int len; + } ifc; /* == ifc Input Fragment Cursor */ + + /** Parameters. + * + * These values are set once at the start of the TSO send and do + * not get changed as the routine progresses. + */ + struct { + /* the number of bytes of header */ + unsigned int header_length; + + /* The number of bytes to put in each outgoing segment. */ + int full_packet_size; + + /* Current IP ID, host endian. */ + unsigned ip_id; + + /* Max size of each output packet payload */ + int gso_size; + } p; +}; + + +/** + * Verify that our various assumptions about sk_buffs and the conditions + * under which TSO will be attempted hold true. + * + * @v skb The sk_buff to check. + */ +static inline void tso_check_safe(struct sk_buff *skb) { + EPRINTK_ON(skb->protocol != htons (ETH_P_IP)); + EPRINTK_ON(((struct ethhdr*) skb->data)->h_proto != htons (ETH_P_IP)); + EPRINTK_ON(skb->nh.iph->protocol != IPPROTO_TCP); + EPRINTK_ON((PTR_DIFF(skb->h.th, skb->data) + + (skb->h.th->doff << 2u)) > skb_headlen(skb)); +} + + + +/** Parse the SKB header and initialise state. */ +static inline void tso_start(struct netfront_accel_tso_state *st, + struct sk_buff *skb) { + + /* + * All ethernet/IP/TCP headers combined size is TCP header size + * plus offset of TCP header relative to start of packet. + */ + st->p.header_length = ((skb->h.th->doff << 2u) + + PTR_DIFF(skb->h.th, skb->data)); + st->p.full_packet_size = (st->p.header_length + + skb_shinfo(skb)->gso_size); + st->p.gso_size = skb_shinfo(skb)->gso_size; + + st->p.ip_id = htons(skb->nh.iph->id); + st->seqnum = ntohl(skb->h.th->seq); + + EPRINTK_ON(skb->h.th->urg); + EPRINTK_ON(skb->h.th->syn); + EPRINTK_ON(skb->h.th->rst); + + st->remaining_len = skb->len - st->p.header_length; + + st->output_packets = NULL; + st->buffers = 0; + st->packets = 0; + + VPRINTK("Starting new TSO: hl %d ps %d gso %d seq %x len %d\n", + st->p.header_length, st->p.full_packet_size, st->p.gso_size, + st->seqnum, skb->len); +} + +/** + * Add another NIC mapped buffer onto an output packet + */ +static inline int tso_start_new_buffer(netfront_accel_vnic *vnic, + struct netfront_accel_tso_state *st, + int first) +{ + struct netfront_accel_tso_buffer *tso_buf; + struct netfront_accel_pkt_desc *buf; + + /* Get a mapped packet buffer */ + buf = netfront_accel_buf_get(vnic->tx_bufs); + if (buf == NULL) { + DPRINTK("%s: No buffer for TX\n", __FUNCTION__); + return -1; + } + + /* Store a bit of meta-data at the end */ + tso_buf =(struct netfront_accel_tso_buffer *) + (buf->pkt_kva + NETFRONT_ACCEL_TSO_BUF_LENGTH + + sizeof(struct netfront_accel_tso_output_packet)); + + tso_buf->buf = buf; + + tso_buf->length = 0; + + if (first) { + struct netfront_accel_tso_output_packet *output_packet + = (struct netfront_accel_tso_output_packet *) + (buf->pkt_kva + NETFRONT_ACCEL_TSO_BUF_LENGTH); + output_packet->next = st->output_packets; + st->output_packets = output_packet; + tso_buf->next = NULL; + st->output_packets->tso_bufs = tso_buf; + st->output_packets->tso_bufs_len = 1; + } else { + tso_buf->next = st->output_packets->tso_bufs; + st->output_packets->tso_bufs = tso_buf; + st->output_packets->tso_bufs_len ++; + } + + BUG_ON(st->output_packets->tso_bufs_len > ACCEL_TSO_MAX_BUFFERS); + + st->buffers ++; + + /* + * Store the context, set to NULL, last packet buffer will get + * non-NULL later + */ + tso_buf->buf->skb = NULL; + + return 0; +} + + +/* Generate a new header, and prepare for the new packet. + * + * @v vnic VNIC + * @v skb Socket buffer + * @v st TSO state + * @ret rc 0 on success, or -1 if failed to alloc header + */ + +static inline +int tso_start_new_packet(netfront_accel_vnic *vnic, + struct sk_buff *skb, + struct netfront_accel_tso_state *st) +{ + struct netfront_accel_tso_buffer *tso_buf; + struct iphdr *tsoh_iph; + struct tcphdr *tsoh_th; + unsigned ip_length; + + if (tso_start_new_buffer(vnic, st, 1) < 0) { + NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++); + return -1; + } + + /* This has been set up by tso_start_new_buffer() */ + tso_buf = st->output_packets->tso_bufs; + + /* Copy in the header */ + memcpy(tso_buf->buf->pkt_kva, skb->data, st->p.header_length); + tso_buf->length = st->p.header_length; + + tsoh_th = (struct tcphdr*) + (tso_buf->buf->pkt_kva + SKB_TCP_OFF(skb)); + tsoh_iph = (struct iphdr*) + (tso_buf->buf->pkt_kva + SKB_IP_OFF(skb)); + + /* Set to zero to encourage falcon to fill these in */ + tsoh_th->check = 0; + tsoh_iph->check = 0; + + tsoh_th->seq = htonl(st->seqnum); + st->seqnum += st->p.gso_size; + + if (st->remaining_len > st->p.gso_size) { + /* This packet will not finish the TSO burst. */ + ip_length = st->p.full_packet_size - ETH_HDR_LEN(skb); + tsoh_th->fin = 0; + tsoh_th->psh = 0; + } else { + /* This packet will be the last in the TSO burst. */ + ip_length = (st->p.header_length - ETH_HDR_LEN(skb) + + st->remaining_len); + tsoh_th->fin = skb->h.th->fin; + tsoh_th->psh = skb->h.th->psh; + } + + tsoh_iph->tot_len = htons(ip_length); + + /* Linux leaves suitable gaps in the IP ID space for us to fill. */ + tsoh_iph->id = st->p.ip_id++; + tsoh_iph->id = htons(tsoh_iph->id); + + st->packet_space = st->p.gso_size; + + st->packets++; + + return 0; +} + + + +static inline void tso_get_fragment(struct netfront_accel_tso_state *st, + int len, void *addr) +{ + st->ifc.len = len; + st->ifc.addr = addr; + return; +} + + +static inline void tso_unwind(netfront_accel_vnic *vnic, + struct netfront_accel_tso_state *st) +{ + struct netfront_accel_tso_buffer *tso_buf; + struct netfront_accel_tso_output_packet *output_packet; + + DPRINTK("%s\n", __FUNCTION__); + + while (st->output_packets != NULL) { + output_packet = st->output_packets; + st->output_packets = output_packet->next; + while (output_packet->tso_bufs != NULL) { + tso_buf = output_packet->tso_bufs; + output_packet->tso_bufs = tso_buf->next; + + st->buffers --; + output_packet->tso_bufs_len --; + + netfront_accel_buf_put(vnic->tx_bufs, + tso_buf->buf->buf_id); + } + } + BUG_ON(st->buffers != 0); +} + + + +static inline +void tso_fill_packet_with_fragment(netfront_accel_vnic *vnic, + struct netfront_accel_tso_state *st) +{ + struct netfront_accel_tso_buffer *tso_buf; + int n, space; + + BUG_ON(st->output_packets == NULL); + BUG_ON(st->output_packets->tso_bufs == NULL); + + tso_buf = st->output_packets->tso_bufs; + + if (st->ifc.len == 0) return; + if (st->packet_space == 0) return; + if (tso_buf->length == NETFRONT_ACCEL_TSO_BUF_LENGTH) return; + + n = min(st->ifc.len, st->packet_space); + + space = NETFRONT_ACCEL_TSO_BUF_LENGTH - tso_buf->length; + n = min(n, space); + + st->packet_space -= n; + st->remaining_len -= n; + st->ifc.len -= n; + + memcpy(tso_buf->buf->pkt_kva + tso_buf->length, st->ifc.addr, n); + + tso_buf->length += n; + + BUG_ON(tso_buf->length > NETFRONT_ACCEL_TSO_BUF_LENGTH); + + st->ifc.addr += n; + + return; +} + + +int netfront_accel_enqueue_skb_tso(netfront_accel_vnic *vnic, + struct sk_buff *skb) +{ + struct netfront_accel_tso_state state; + struct netfront_accel_tso_buffer *tso_buf = NULL; + struct netfront_accel_tso_output_packet *reversed_list = NULL; + struct netfront_accel_tso_output_packet *tmp_pkt; + ef_iovec iovecs[ACCEL_TSO_MAX_BUFFERS]; + int frag_i, rc, dma_id; + skb_frag_t *f; + + tso_check_safe(skb); + + if (skb->ip_summed != CHECKSUM_HW) + EPRINTK("Trying to TSO send a packet without HW checksum\n"); + + tso_start(&state, skb); + + /* + * Setup the first payload fragment. If the skb header area + * contains exactly the headers and all payload is in the frag + * list things are little simpler + */ + if (skb_headlen(skb) == state.p.header_length) { + /* Grab the first payload fragment. */ + BUG_ON(skb_shinfo(skb)->nr_frags < 1); + frag_i = 0; + f = &skb_shinfo(skb)->frags[frag_i]; + tso_get_fragment(&state, f->size, + page_address(f->page) + f->page_offset); + } else { + int hl = state.p.header_length; + tso_get_fragment(&state, skb_headlen(skb) - hl, + skb->data + hl); + frag_i = -1; + } + + if (tso_start_new_packet(vnic, skb, &state) < 0) { + DPRINTK("%s: out of first start-packet memory\n", + __FUNCTION__); + goto unwind; + } + + while (1) { + tso_fill_packet_with_fragment(vnic, &state); + + /* Move onto the next fragment? */ + if (state.ifc.len == 0) { + if (++frag_i >= skb_shinfo(skb)->nr_frags) + /* End of payload reached. */ + break; + f = &skb_shinfo(skb)->frags[frag_i]; + tso_get_fragment(&state, f->size, + page_address(f->page) + + f->page_offset); + } + + /* Start a new buffer? */ + if ((state.output_packets->tso_bufs->length == + NETFRONT_ACCEL_TSO_BUF_LENGTH) && + tso_start_new_buffer(vnic, &state, 0)) { + DPRINTK("%s: out of start-buffer memory\n", + __FUNCTION__); + goto unwind; + } + + /* Start at new packet? */ + if ((state.packet_space == 0 || + ((state.output_packets->tso_bufs_len >= + ACCEL_TSO_MAX_BUFFERS) && + (state.output_packets->tso_bufs->length >= + NETFRONT_ACCEL_TSO_BUF_LENGTH))) && + tso_start_new_packet(vnic, skb, &state) < 0) { + DPRINTK("%s: out of start-packet memory\n", + __FUNCTION__); + goto unwind; + } + + } + + /* Check for space */ + if (ef_vi_transmit_space(&vnic->vi) < state.buffers) { + DPRINTK("%s: Not enough TX space (%d)\n", + __FUNCTION__, state.buffers); + goto unwind; + } + + /* + * Store the skb context in the most recent buffer (i.e. the + * last buffer that will be sent) + */ + state.output_packets->tso_bufs->buf->skb = skb; + + /* Reverse the list of packets as we construct it on a stack */ + while (state.output_packets != NULL) { + tmp_pkt = state.output_packets; + state.output_packets = tmp_pkt->next; + tmp_pkt->next = reversed_list; + reversed_list = tmp_pkt; + } + + /* Pass off to hardware */ + while (reversed_list != NULL) { + tmp_pkt = reversed_list; + reversed_list = tmp_pkt->next; + + BUG_ON(tmp_pkt->tso_bufs_len > ACCEL_TSO_MAX_BUFFERS); + BUG_ON(tmp_pkt->tso_bufs_len == 0); + + dma_id = tmp_pkt->tso_bufs->buf->buf_id; + + /* + * Make an iovec of the buffers in the list, reversing + * the buffers as we go as they are constructed on a + * stack + */ + tso_buf = tmp_pkt->tso_bufs; + for (frag_i = tmp_pkt->tso_bufs_len - 1; + frag_i >= 0; + frag_i--) { + iovecs[frag_i].iov_base = tso_buf->buf->pkt_buff_addr; + iovecs[frag_i].iov_len = tso_buf->length; + tso_buf = tso_buf->next; + } + + rc = ef_vi_transmitv(&vnic->vi, iovecs, tmp_pkt->tso_bufs_len, + dma_id); + /* + * We checked for space already, so it really should + * succeed + */ + BUG_ON(rc != 0); + } + + /* Track number of tx fastpath stats */ + vnic->netdev_stats.fastpath_tx_bytes += skb->len; + vnic->netdev_stats.fastpath_tx_pkts += state.packets; +#if NETFRONT_ACCEL_STATS + { + unsigned n; + n = vnic->netdev_stats.fastpath_tx_pkts - + vnic->stats.fastpath_tx_completions; + if (n > vnic->stats.fastpath_tx_pending_max) + vnic->stats.fastpath_tx_pending_max = n; + } +#endif + + return NETFRONT_ACCEL_STATUS_GOOD; + + unwind: + tso_unwind(vnic, &state); + + NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++); + + return NETFRONT_ACCEL_STATUS_BUSY; +} + + + diff -r dd748ded9ba8 drivers/xen/sfc_netfront/accel_tso.h --- /dev/null +++ b/drivers/xen/sfc_netfront/accel_tso.h @@ -0,0 +1,57 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef NETFRONT_ACCEL_TSO_H +#define NETFRONT_ACCEL_TSO_H + +#include "accel_bufs.h" + +/* Track the buffers used in each output packet */ +struct netfront_accel_tso_buffer { + struct netfront_accel_tso_buffer *next; + struct netfront_accel_pkt_desc *buf; + unsigned length; +}; + +/* Track the output packets formed from each input packet */ +struct netfront_accel_tso_output_packet { + struct netfront_accel_tso_output_packet *next; + struct netfront_accel_tso_buffer *tso_bufs; + unsigned tso_bufs_len; +}; + + +/* + * Max available space in a buffer for data once meta-data has taken + * its place + */ +#define NETFRONT_ACCEL_TSO_BUF_LENGTH \ + ((PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE) \ + - sizeof(struct netfront_accel_tso_buffer) \ + - sizeof(struct netfront_accel_tso_output_packet)) + +int netfront_accel_enqueue_skb_tso(netfront_accel_vnic *vnic, + struct sk_buff *skb); + +#endif /* NETFRONT_ACCEL_TSO_H */ diff -r dd748ded9ba8 drivers/xen/sfc_netfront/accel_vi.c --- /dev/null +++ b/drivers/xen/sfc_netfront/accel_vi.c @@ -0,0 +1,1194 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include +#include +#include + +#include "accel.h" +#include "accel_util.h" +#include "accel_bufs.h" +#include "accel_tso.h" +#include "accel_ssr.h" +#include "netfront.h" + +#include "etherfabric/ef_vi.h" + +/* + * Max available space in a buffer for data once meta-data has taken + * its place + */ +#define NETFRONT_ACCEL_TX_BUF_LENGTH \ + ((PAGE_SIZE / NETFRONT_ACCEL_BUFS_PER_PAGE) \ + - sizeof(struct netfront_accel_tso_buffer)) + +#define ACCEL_TX_MAX_BUFFERS (6) +#define ACCEL_VI_POLL_EVENTS (8) + +static +int netfront_accel_vi_init_fini(netfront_accel_vnic *vnic, + struct net_accel_msg_hw *hw_msg) +{ + struct ef_vi_nic_type nic_type; + struct net_accel_hw_falcon_b *hw_info; + void *io_kva, *evq_base, *rx_dma_kva, *tx_dma_kva, *doorbell_kva; + u32 *evq_gnts; + u32 evq_order; + int vi_state_size; + u8 vi_data[VI_MAPPINGS_SIZE]; + + if (hw_msg == NULL) + goto fini; + + /* And create the local macs table lock */ + spin_lock_init(&vnic->table_lock); + + /* Create fastpath table, initial size 8, key length 8 */ + if (cuckoo_hash_init(&vnic->fastpath_table, 3, 8)) { + EPRINTK("failed to allocate fastpath table\n"); + goto fail_cuckoo; + } + + vnic->hw.falcon.type = hw_msg->type; + + switch (hw_msg->type) { + case NET_ACCEL_MSG_HWTYPE_FALCON_A: + hw_info = &hw_msg->resources.falcon_a.common; + /* Need the extra rptr register page on A1 */ + io_kva = net_accel_map_iomem_page + (vnic->dev, hw_msg->resources.falcon_a.evq_rptr_gnt, + &vnic->hw.falcon.evq_rptr_mapping); + if (io_kva == NULL) { + EPRINTK("%s: evq_rptr permission failed\n", __FUNCTION__); + goto evq_rptr_fail; + } + + vnic->hw.falcon.evq_rptr = io_kva + + (hw_info->evq_rptr & (PAGE_SIZE - 1)); + break; + case NET_ACCEL_MSG_HWTYPE_FALCON_B: + hw_info = &hw_msg->resources.falcon_b; + break; + default: + goto bad_type; + } + + /**** Event Queue ****/ + + /* Map the event queue pages */ + evq_gnts = hw_info->evq_mem_gnts; + evq_order = hw_info->evq_order; + + EPRINTK_ON(hw_info->evq_offs != 0); + + DPRINTK("Will map evq %d pages\n", 1 << evq_order); + + evq_base = + net_accel_map_grants_contig(vnic->dev, evq_gnts, 1 << evq_order, + &vnic->evq_mapping); + if (evq_base == NULL) { + EPRINTK("%s: evq_base failed\n", __FUNCTION__); + goto evq_fail; + } + + /**** Doorbells ****/ + /* Set up the doorbell mappings. */ + doorbell_kva = + net_accel_map_iomem_page(vnic->dev, hw_info->doorbell_gnt, + &vnic->hw.falcon.doorbell_mapping); + if (doorbell_kva == NULL) { + EPRINTK("%s: doorbell permission failed\n", __FUNCTION__); + goto doorbell_fail; + } + vnic->hw.falcon.doorbell = doorbell_kva; + + /* On Falcon_B we get the rptr from the doorbell page */ + if (hw_msg->type == NET_ACCEL_MSG_HWTYPE_FALCON_B) { + vnic->hw.falcon.evq_rptr = + (u32 *)((char *)vnic->hw.falcon.doorbell + + hw_info->evq_rptr); + } + + /**** DMA Queue ****/ + + /* Set up the DMA Queues from the message. */ + tx_dma_kva = net_accel_map_grants_contig + (vnic->dev, &(hw_info->txdmaq_gnt), 1, + &vnic->hw.falcon.txdmaq_mapping); + if (tx_dma_kva == NULL) { + EPRINTK("%s: TX dma failed\n", __FUNCTION__); + goto tx_dma_fail; + } + + rx_dma_kva = net_accel_map_grants_contig + (vnic->dev, &(hw_info->rxdmaq_gnt), 1, + &vnic->hw.falcon.rxdmaq_mapping); + if (rx_dma_kva == NULL) { + EPRINTK("%s: RX dma failed\n", __FUNCTION__); + goto rx_dma_fail; + } + + /* Full confession */ + DPRINTK("Mapped H/W" + " Tx DMAQ grant %x -> %p\n" + " Rx DMAQ grant %x -> %p\n" + " EVQ grant %x -> %p\n", + hw_info->txdmaq_gnt, tx_dma_kva, + hw_info->rxdmaq_gnt, rx_dma_kva, + evq_gnts[0], evq_base + ); + + memset(vi_data, 0, sizeof(vi_data)); + + /* TODO BUG11305: convert efhw_arch to ef_vi_arch + * e.g. + * arch = ef_vi_arch_from_efhw_arch(hw_info->nic_arch); + * assert(arch >= 0); + * nic_type.arch = arch; + */ + nic_type.arch = (unsigned char)hw_info->nic_arch; + nic_type.variant = (char)hw_info->nic_variant; + nic_type.revision = (unsigned char)hw_info->nic_revision; + + ef_vi_init_mapping_evq(vi_data, nic_type, hw_info->instance, + 1 << (evq_order + PAGE_SHIFT), evq_base, + (void *)0xdeadbeef); + + ef_vi_init_mapping_vi(vi_data, nic_type, hw_info->rx_capacity, + hw_info->tx_capacity, hw_info->instance, + doorbell_kva, rx_dma_kva, tx_dma_kva, 0); + + vi_state_size = ef_vi_calc_state_bytes(hw_info->rx_capacity, + hw_info->tx_capacity); + vnic->vi_state = (ef_vi_state *)kmalloc(vi_state_size, GFP_KERNEL); + if (vnic->vi_state == NULL) { + EPRINTK("%s: kmalloc for VI state failed\n", __FUNCTION__); + goto vi_state_fail; + } + ef_vi_init(&vnic->vi, vi_data, vnic->vi_state, &vnic->evq_state, 0); + + ef_eventq_state_init(&vnic->vi); + + ef_vi_state_init(&vnic->vi); + + return 0; + +fini: + kfree(vnic->vi_state); + vnic->vi_state = NULL; +vi_state_fail: + net_accel_unmap_grants_contig(vnic->dev, vnic->hw.falcon.rxdmaq_mapping); +rx_dma_fail: + net_accel_unmap_grants_contig(vnic->dev, vnic->hw.falcon.txdmaq_mapping); +tx_dma_fail: + net_accel_unmap_iomem_page(vnic->dev, vnic->hw.falcon.doorbell_mapping); + vnic->hw.falcon.doorbell = NULL; +doorbell_fail: + net_accel_unmap_grants_contig(vnic->dev, vnic->evq_mapping); +evq_fail: + if (vnic->hw.falcon.type == NET_ACCEL_MSG_HWTYPE_FALCON_A) + net_accel_unmap_iomem_page(vnic->dev, + vnic->hw.falcon.evq_rptr_mapping); + vnic->hw.falcon.evq_rptr = NULL; +evq_rptr_fail: +bad_type: + cuckoo_hash_destroy(&vnic->fastpath_table); +fail_cuckoo: + return -EIO; +} + + +void netfront_accel_vi_ctor(netfront_accel_vnic *vnic) +{ + /* Just mark the VI as uninitialised. */ + vnic->vi_state = NULL; +} + + +int netfront_accel_vi_init(netfront_accel_vnic *vnic, struct net_accel_msg_hw *hw_msg) +{ + BUG_ON(hw_msg == NULL); + return netfront_accel_vi_init_fini(vnic, hw_msg); +} + + +void netfront_accel_vi_dtor(netfront_accel_vnic *vnic) +{ + if (vnic->vi_state != NULL) + netfront_accel_vi_init_fini(vnic, NULL); +} + + +static +void netfront_accel_vi_post_rx(netfront_accel_vnic *vnic, u16 id, + netfront_accel_pkt_desc *buf) +{ + + int idx = vnic->rx_dma_batched; + +#if 0 + VPRINTK("Posting buffer %d (0x%08x) for rx at index %d, space is %d\n", + id, buf->pkt_buff_addr, idx, ef_vi_receive_space(&vnic->vi)); +#endif + /* Set up a virtual buffer descriptor */ + ef_vi_receive_init(&vnic->vi, buf->pkt_buff_addr, id, + /*rx_bytes=max*/0); + + idx++; + + vnic->rx_dma_level++; + + /* + * Only push the descriptor to the card if we've reached the + * batch size. Otherwise, the descriptors can sit around for + * a while. There will be plenty available. + */ + if (idx >= NETFRONT_ACCEL_RX_DESC_BATCH || + vnic->rx_dma_level < NETFRONT_ACCEL_RX_DESC_BATCH) { +#if 0 + VPRINTK("Flushing %d rx descriptors.\n", idx); +#endif + + /* Push buffer to hardware */ + ef_vi_receive_push(&vnic->vi); + + idx = 0; + } + + vnic->rx_dma_batched = idx; +} + + +inline +void netfront_accel_vi_post_rx_or_free(netfront_accel_vnic *vnic, u16 id, + netfront_accel_pkt_desc *buf) +{ + + VPRINTK("%s: %d\n", __FUNCTION__, id); + + if (ef_vi_receive_space(&vnic->vi) <= vnic->rx_dma_batched) { + VPRINTK("RX space is full\n"); + netfront_accel_buf_put(vnic->rx_bufs, id); + return; + } + + VPRINTK("Completed buffer %d is reposted\n", id); + netfront_accel_vi_post_rx(vnic, id, buf); + + /* + * Let's see if there's any more to be pushed out to the NIC + * while we're here + */ + while (ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) { + /* Try to allocate a buffer. */ + buf = netfront_accel_buf_get(vnic->rx_bufs); + if (buf == NULL) + break; + + /* Add it to the rx dma queue. */ + netfront_accel_vi_post_rx(vnic, buf->buf_id, buf); + } +} + + +void netfront_accel_vi_add_bufs(netfront_accel_vnic *vnic, int is_rx) +{ + + while (is_rx && + ef_vi_receive_space(&vnic->vi) > vnic->rx_dma_batched) { + netfront_accel_pkt_desc *buf; + + VPRINTK("%s: %d\n", __FUNCTION__, vnic->rx_dma_level); + + /* Try to allocate a buffer. */ + buf = netfront_accel_buf_get(vnic->rx_bufs); + + if (buf == NULL) + break; + + /* Add it to the rx dma queue. */ + netfront_accel_vi_post_rx(vnic, buf->buf_id, buf); + } + + VPRINTK("%s: done\n", __FUNCTION__); +} + + +struct netfront_accel_multi_state { + unsigned remaining_len; + + unsigned buffers; + + struct netfront_accel_tso_buffer *output_buffers; + + /* Where we are in the current fragment of the SKB. */ + struct { + /* address of current position */ + void *addr; + /* remaining length */ + unsigned int len; + } ifc; /* == Input Fragment Cursor */ +}; + + +static inline void multi_post_start(struct netfront_accel_multi_state *st, + struct sk_buff *skb) +{ + st->remaining_len = skb->len; + st->output_buffers = NULL; + st->buffers = 0; + st->ifc.len = skb_headlen(skb); + st->ifc.addr = skb->data; +} + +static int multi_post_start_new_buffer(netfront_accel_vnic *vnic, + struct netfront_accel_multi_state *st) +{ + struct netfront_accel_tso_buffer *tso_buf; + struct netfront_accel_pkt_desc *buf; + + /* Get a mapped packet buffer */ + buf = netfront_accel_buf_get(vnic->tx_bufs); + if (buf == NULL) { + DPRINTK("%s: No buffer for TX\n", __FUNCTION__); + return -1; + } + + /* Store a bit of meta-data at the end */ + tso_buf = (struct netfront_accel_tso_buffer *) + (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH); + + tso_buf->buf = buf; + + tso_buf->length = 0; + + tso_buf->next = st->output_buffers; + st->output_buffers = tso_buf; + st->buffers++; + + BUG_ON(st->buffers >= ACCEL_TX_MAX_BUFFERS); + + /* + * Store the context, set to NULL, last packet buffer will get + * non-NULL later + */ + tso_buf->buf->skb = NULL; + + return 0; +} + + +static void +multi_post_fill_buffer_with_fragment(netfront_accel_vnic *vnic, + struct netfront_accel_multi_state *st) +{ + struct netfront_accel_tso_buffer *tso_buf; + unsigned n, space; + + BUG_ON(st->output_buffers == NULL); + tso_buf = st->output_buffers; + + if (st->ifc.len == 0) return; + if (tso_buf->length == NETFRONT_ACCEL_TX_BUF_LENGTH) return; + + BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH); + + space = NETFRONT_ACCEL_TX_BUF_LENGTH - tso_buf->length; + n = min(st->ifc.len, space); + + memcpy(tso_buf->buf->pkt_kva + tso_buf->length, st->ifc.addr, n); + + st->remaining_len -= n; + st->ifc.len -= n; + tso_buf->length += n; + st->ifc.addr += n; + + BUG_ON(tso_buf->length > NETFRONT_ACCEL_TX_BUF_LENGTH); + + return; +} + + +static inline void multi_post_unwind(netfront_accel_vnic *vnic, + struct netfront_accel_multi_state *st) +{ + struct netfront_accel_tso_buffer *tso_buf; + + DPRINTK("%s\n", __FUNCTION__); + + while (st->output_buffers != NULL) { + tso_buf = st->output_buffers; + st->output_buffers = tso_buf->next; + st->buffers--; + netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id); + } + BUG_ON(st->buffers != 0); +} + + +static enum netfront_accel_post_status +netfront_accel_enqueue_skb_multi(netfront_accel_vnic *vnic, struct sk_buff *skb) +{ + struct netfront_accel_tso_buffer *tso_buf; + struct netfront_accel_multi_state state; + ef_iovec iovecs[ACCEL_TX_MAX_BUFFERS]; + skb_frag_t *f; + int frag_i, rc, dma_id; + + multi_post_start(&state, skb); + + frag_i = -1; + + if (skb->ip_summed == CHECKSUM_HW) { + /* Set to zero to encourage falcon to work it out for us */ + *(u16*)(skb->h.raw + skb->csum) = 0; + } + + if (multi_post_start_new_buffer(vnic, &state)) { + DPRINTK("%s: out of buffers\n", __FUNCTION__); + goto unwind; + } + + while (1) { + multi_post_fill_buffer_with_fragment(vnic, &state); + + /* Move onto the next fragment? */ + if (state.ifc.len == 0) { + if (++frag_i >= skb_shinfo(skb)->nr_frags) + /* End of payload reached. */ + break; + f = &skb_shinfo(skb)->frags[frag_i]; + state.ifc.len = f->size; + state.ifc.addr = page_address(f->page) + f->page_offset; + } + + /* Start a new buffer? */ + if ((state.output_buffers->length == + NETFRONT_ACCEL_TX_BUF_LENGTH) && + multi_post_start_new_buffer(vnic, &state)) { + DPRINTK("%s: out of buffers\n", __FUNCTION__); + goto unwind; + } + } + + /* Check for space */ + if (ef_vi_transmit_space(&vnic->vi) < state.buffers) { + DPRINTK("%s: Not enough TX space (%d)\n", __FUNCTION__, state.buffers); + goto unwind; + } + + /* Store the skb in what will be the last buffer's context */ + state.output_buffers->buf->skb = skb; + /* Remember dma_id of what will be the last buffer */ + dma_id = state.output_buffers->buf->buf_id; + + /* + * Make an iovec of the buffers in the list, reversing the + * buffers as we go as they are constructed on a stack + */ + tso_buf = state.output_buffers; + for (frag_i = state.buffers-1; frag_i >= 0; frag_i--) { + iovecs[frag_i].iov_base = tso_buf->buf->pkt_buff_addr; + iovecs[frag_i].iov_len = tso_buf->length; + tso_buf = tso_buf->next; + } + + rc = ef_vi_transmitv(&vnic->vi, iovecs, state.buffers, dma_id); + + /* Track number of tx fastpath stats */ + vnic->netdev_stats.fastpath_tx_bytes += skb->len; + vnic->netdev_stats.fastpath_tx_pkts ++; +#if NETFRONT_ACCEL_STATS + { + u32 n; + n = vnic->netdev_stats.fastpath_tx_pkts - + (u32)vnic->stats.fastpath_tx_completions; + if (n > vnic->stats.fastpath_tx_pending_max) + vnic->stats.fastpath_tx_pending_max = n; + } +#endif + return NETFRONT_ACCEL_STATUS_GOOD; + +unwind: + multi_post_unwind(vnic, &state); + + NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++); + + return NETFRONT_ACCEL_STATUS_BUSY; +} + + +static enum netfront_accel_post_status +netfront_accel_enqueue_skb_single(netfront_accel_vnic *vnic, struct sk_buff *skb) +{ + struct netfront_accel_tso_buffer *tso_buf; + struct netfront_accel_pkt_desc *buf; + u8 *kva; + int rc; + + if (ef_vi_transmit_space(&vnic->vi) < 1) { + DPRINTK("%s: No TX space\n", __FUNCTION__); + NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++); + return NETFRONT_ACCEL_STATUS_BUSY; + } + + buf = netfront_accel_buf_get(vnic->tx_bufs); + if (buf == NULL) { + DPRINTK("%s: No buffer for TX\n", __FUNCTION__); + NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_busy++); + return NETFRONT_ACCEL_STATUS_BUSY; + } + + /* Track number of tx fastpath stats */ + vnic->netdev_stats.fastpath_tx_pkts++; + vnic->netdev_stats.fastpath_tx_bytes += skb->len; + +#if NETFRONT_ACCEL_STATS + { + u32 n; + n = vnic->netdev_stats.fastpath_tx_pkts - + (u32)vnic->stats.fastpath_tx_completions; + if (n > vnic->stats.fastpath_tx_pending_max) + vnic->stats.fastpath_tx_pending_max = n; + } +#endif + + /* Store the context */ + buf->skb = skb; + + kva = buf->pkt_kva; + + if (skb->ip_summed == CHECKSUM_HW) { + /* Set to zero to encourage falcon to work it out for us */ + *(u16*)(skb->h.raw + skb->csum) = 0; + } + NETFRONT_ACCEL_PKTBUFF_FOR_EACH_FRAGMENT + (skb, idx, frag_data, frag_len, { + /* Copy in payload */ + VPRINTK("*** Copying %d bytes to %p\n", frag_len, kva); + memcpy(kva, frag_data, frag_len); + kva += frag_len; + }); + + VPRINTK("%s: id %d pkt %p kva %p buff_addr 0x%08x\n", __FUNCTION__, + buf->buf_id, buf, buf->pkt_kva, buf->pkt_buff_addr); + + + /* Set up the TSO meta-data for a single buffer/packet */ + tso_buf = (struct netfront_accel_tso_buffer *) + (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH); + tso_buf->next = NULL; + tso_buf->buf = buf; + tso_buf->length = skb->len; + + rc = ef_vi_transmit(&vnic->vi, buf->pkt_buff_addr, skb->len, + buf->buf_id); + /* We checked for space already, so it really should succeed */ + BUG_ON(rc != 0); + + return NETFRONT_ACCEL_STATUS_GOOD; +} + + +enum netfront_accel_post_status +netfront_accel_vi_tx_post(netfront_accel_vnic *vnic, struct sk_buff *skb) +{ + struct ethhdr *pkt_eth_hdr; + struct iphdr *pkt_ipv4_hdr; + int value, try_fastpath; + + /* + * This assumes that the data field points to the dest mac + * address. + */ + cuckoo_hash_mac_key key = cuckoo_mac_to_key(skb->data); + + /* + * NB very important that all things that could return "CANT" + * are tested before things that return "BUSY" as if it it + * returns "BUSY" it is assumed that it won't return "CANT" + * next time it is tried + */ + + /* + * Do a fastpath send if fast path table lookup returns true. + * We do this without the table lock and so may get the wrong + * answer, but current opinion is that's not a big problem + */ + try_fastpath = cuckoo_hash_lookup(&vnic->fastpath_table, + (cuckoo_hash_key *)(&key), &value); + + if (!try_fastpath) { + VPRINTK("try fast path false for mac: " MAC_FMT "\n", + MAC_ARG(skb->data)); + + return NETFRONT_ACCEL_STATUS_CANT; + } + + /* Check to see if the packet can be sent. */ + if (skb_headlen(skb) < sizeof(*pkt_eth_hdr) + sizeof(*pkt_ipv4_hdr)) { + EPRINTK("%s: Packet header is too small\n", __FUNCTION__); + return NETFRONT_ACCEL_STATUS_CANT; + } + + pkt_eth_hdr = (void*)skb->data; + pkt_ipv4_hdr = (void*)(pkt_eth_hdr+1); + + if (be16_to_cpu(pkt_eth_hdr->h_proto) != ETH_P_IP) { + DPRINTK("%s: Packet is not IPV4 (ether_type=0x%04x)\n", __FUNCTION__, + be16_to_cpu(pkt_eth_hdr->h_proto)); + return NETFRONT_ACCEL_STATUS_CANT; + } + + if (pkt_ipv4_hdr->protocol != IPPROTO_TCP && + pkt_ipv4_hdr->protocol != IPPROTO_UDP) { + DPRINTK("%s: Packet is not TCP/UDP (ip_protocol=0x%02x)\n", + __FUNCTION__, pkt_ipv4_hdr->protocol); + return NETFRONT_ACCEL_STATUS_CANT; + } + + VPRINTK("%s: %d bytes, gso %d\n", __FUNCTION__, skb->len, + skb_shinfo(skb)->gso_size); + + if (skb_shinfo(skb)->gso_size) { + return netfront_accel_enqueue_skb_tso(vnic, skb); + } + + if (skb->len <= NETFRONT_ACCEL_TX_BUF_LENGTH) { + return netfront_accel_enqueue_skb_single(vnic, skb); + } + + return netfront_accel_enqueue_skb_multi(vnic, skb); +} + + +/* + * Copy the data to required end destination. NB. len is the total new + * length of the socket buffer, not the amount of data to copy + */ +inline +int ef_vnic_copy_to_skb(netfront_accel_vnic *vnic, struct sk_buff *skb, + struct netfront_accel_pkt_desc *buf, int len) +{ + int i, extra = len - skb->len; + char c; + int pkt_stride = vnic->rx_pkt_stride; + int skb_stride = vnic->rx_skb_stride; + char *skb_start; + + /* + * This pulls stuff into the cache - have seen performance + * benefit in this, but disabled by default + */ + skb_start = skb->data; + if (pkt_stride) { + for (i = 0; i < len; i += pkt_stride) { + c += ((volatile char*)(buf->pkt_kva))[i]; + } + } + if (skb_stride) { + for (i = skb->len; i < len ; i += skb_stride) { + c += ((volatile char*)(skb_start))[i]; + } + } + + if (skb_tailroom(skb) >= extra) { + memcpy(skb_put(skb, extra), buf->pkt_kva, extra); + return 0; + } + + return -ENOSPC; +} + + +static void discard_jumbo_state(netfront_accel_vnic *vnic) +{ + + if (vnic->jumbo_state.skb != NULL) { + dev_kfree_skb_any(vnic->jumbo_state.skb); + + vnic->jumbo_state.skb = NULL; + } + vnic->jumbo_state.in_progress = 0; +} + + +static void netfront_accel_vi_rx_complete(netfront_accel_vnic *vnic, + struct sk_buff *skb) +{ + cuckoo_hash_mac_key key; + unsigned long flags; + int value; + struct net_device *net_dev; + + + key = cuckoo_mac_to_key(skb->data + ETH_ALEN); + + /* + * If this is a MAC address that we want to do fast path TX + * to, and we don't already, add it to the fastpath table. + * The initial lookup is done without the table lock and so + * may get the wrong answer, but current opinion is that's not + * a big problem + */ + if (is_valid_ether_addr(skb->data + ETH_ALEN) && + !cuckoo_hash_lookup(&vnic->fastpath_table, (cuckoo_hash_key *)&key, + &value)) { + spin_lock_irqsave(&vnic->table_lock, flags); + + cuckoo_hash_add_check(&vnic->fastpath_table, + (cuckoo_hash_key *)&key, + 1, 1); + + spin_unlock_irqrestore(&vnic->table_lock, flags); + } + + if (compare_ether_addr(skb->data, vnic->mac)) { + struct iphdr *ip = (struct iphdr *)(skb->data + ETH_HLEN); + u16 port; + + DPRINTK("%s: saw wrong MAC address " MAC_FMT "\n", + __FUNCTION__, MAC_ARG(skb->data)); + + if (ip->protocol == IPPROTO_TCP) { + struct tcphdr *tcp = (struct tcphdr *) + ((char *)ip + 4 * ip->ihl); + port = tcp->dest; + } else { + struct udphdr *udp = (struct udphdr *) + ((char *)ip + 4 * ip->ihl); + EPRINTK_ON(ip->protocol != IPPROTO_UDP); + port = udp->dest; + } + + netfront_accel_msg_tx_fastpath(vnic, skb->data, + ip->daddr, port, + ip->protocol); + } + + net_dev = vnic->net_dev; + skb->dev = net_dev; + skb->protocol = eth_type_trans(skb, net_dev); + /* CHECKSUM_UNNECESSARY as hardware has done it already */ + skb->ip_summed = CHECKSUM_UNNECESSARY; + + if (!netfront_accel_ssr_skb(vnic, &vnic->ssr_state, skb)) + netif_receive_skb(skb); +} + + +static int netfront_accel_vi_poll_process_rx(netfront_accel_vnic *vnic, + ef_event *ev) +{ + struct netfront_accel_bufinfo *bufinfo = vnic->rx_bufs; + struct netfront_accel_pkt_desc *buf = NULL; + struct sk_buff *skb; + int id, len, sop = 0, cont = 0; + + VPRINTK("Rx event.\n"); + /* + * Complete the receive operation, and get the request id of + * the buffer + */ + id = ef_vi_receive_done(&vnic->vi, ev); + + if (id < 0 || id >= bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE) { + EPRINTK("Rx packet %d is invalid\n", id); + /* Carry on round the loop if more events */ + goto bad_packet; + } + /* Get our buffer descriptor */ + buf = netfront_accel_buf_find(bufinfo, id); + + len = EF_EVENT_RX_BYTES(*ev); + + /* An RX buffer has been removed from the DMA ring. */ + vnic->rx_dma_level--; + + if (EF_EVENT_TYPE(*ev) == EF_EVENT_TYPE_RX) { + sop = EF_EVENT_RX_SOP(*ev); + cont = EF_EVENT_RX_CONT(*ev); + + skb = vnic->jumbo_state.skb; + + VPRINTK("Rx packet %d: %d bytes so far; sop %d; cont %d\n", + id, len, sop, cont); + + if (sop) { + if (!vnic->jumbo_state.in_progress) { + vnic->jumbo_state.in_progress = 1; + BUG_ON(vnic->jumbo_state.skb != NULL); + } else { + /* + * This fragment shows a missing tail in + * previous one, but is itself possibly OK + */ + DPRINTK("sop and in_progress => no tail\n"); + + /* Release the socket buffer we already had */ + discard_jumbo_state(vnic); + + /* Now start processing this fragment */ + vnic->jumbo_state.in_progress = 1; + skb = NULL; + } + } else if (!vnic->jumbo_state.in_progress) { + DPRINTK("!sop and !in_progress => missing head\n"); + goto missing_head; + } + + if (!cont) { + /* Update state for next time */ + vnic->jumbo_state.in_progress = 0; + vnic->jumbo_state.skb = NULL; + } else if (!vnic->jumbo_state.in_progress) { + DPRINTK("cont and !in_progress => missing head\n"); + goto missing_head; + } + + if (skb == NULL) { + BUG_ON(!sop); + + if (!cont) + skb = alloc_skb(len+NET_IP_ALIGN, GFP_ATOMIC); + else + skb = alloc_skb(vnic->net_dev->mtu+NET_IP_ALIGN, + GFP_ATOMIC); + + if (skb == NULL) { + DPRINTK("%s: Couldn't get an rx skb.\n", + __FUNCTION__); + netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf); + /* + * Dropping this fragment means we + * should discard the rest too + */ + discard_jumbo_state(vnic); + + /* Carry on round the loop if more events */ + return 0; + } + + } + + /* Copy the data to required end destination */ + if (ef_vnic_copy_to_skb(vnic, skb, buf, len) != 0) { + /* + * No space in the skb - suggests > MTU packet + * received + */ + EPRINTK("%s: Rx packet too large (%d)\n", + __FUNCTION__, len); + netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf); + discard_jumbo_state(vnic); + return 0; + } + + /* Put the buffer back in the DMA queue. */ + netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf); + + if (cont) { + vnic->jumbo_state.skb = skb; + + return 0; + } else { + /* Track number of rx fastpath packets */ + vnic->netdev_stats.fastpath_rx_pkts++; + vnic->netdev_stats.fastpath_rx_bytes += len; + + netfront_accel_vi_rx_complete(vnic, skb); + + return 1; + } + } else { + BUG_ON(EF_EVENT_TYPE(*ev) != EF_EVENT_TYPE_RX_DISCARD); + + if (EF_EVENT_RX_DISCARD_TYPE(*ev) + == EF_EVENT_RX_DISCARD_TRUNC) { + DPRINTK("%s: " EF_EVENT_FMT + " buffer %d FRM_TRUNC q_id %d\n", + __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id, + EF_EVENT_RX_DISCARD_Q_ID(*ev) ); + NETFRONT_ACCEL_STATS_OP(++vnic->stats.fastpath_frm_trunc); + } else if (EF_EVENT_RX_DISCARD_TYPE(*ev) + == EF_EVENT_RX_DISCARD_OTHER) { + DPRINTK("%s: " EF_EVENT_FMT + " buffer %d RX_DISCARD_OTHER q_id %d\n", + __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id, + EF_EVENT_RX_DISCARD_Q_ID(*ev) ); + /* + * Probably tail of packet for which error has + * already been logged, so don't count in + * stats + */ + } else { + EPRINTK("%s: " EF_EVENT_FMT + " buffer %d rx discard type %d q_id %d\n", + __FUNCTION__, EF_EVENT_PRI_ARG(*ev), id, + EF_EVENT_RX_DISCARD_TYPE(*ev), + EF_EVENT_RX_DISCARD_Q_ID(*ev) ); + NETFRONT_ACCEL_STATS_OP(++vnic->stats.bad_event_count); + } + } + + /* discard type drops through here */ + +bad_packet: + /* Release the socket buffer we already had */ + discard_jumbo_state(vnic); + +missing_head: + BUG_ON(vnic->jumbo_state.in_progress != 0); + BUG_ON(vnic->jumbo_state.skb != NULL); + + if (id >= 0 && id < bufinfo->npages*NETFRONT_ACCEL_BUFS_PER_PAGE) + /* Put the buffer back in the DMA queue. */ + netfront_accel_vi_post_rx_or_free(vnic, (u16)id, buf); + + vnic->netdev_stats.fastpath_rx_errors++; + + DPRINTK("%s experienced bad packet/missing fragment error: %d \n", + __FUNCTION__, ev->rx.flags); + + return 0; +} + + +static void netfront_accel_vi_not_busy(netfront_accel_vnic *vnic) +{ + struct netfront_info *np = ((struct netfront_info *) + netdev_priv(vnic->net_dev)); + struct sk_buff *skb; + int handled; + unsigned long flags; + + /* + * TODO if we could safely check tx_skb == NULL and return + * early without taking the lock, that would obviously help + * performance + */ + + /* Take the netfront lock which protects tx_skb. */ + spin_lock_irqsave(&np->tx_lock, flags); + if (vnic->tx_skb != NULL) { + DPRINTK("%s trying to send spare buffer\n", __FUNCTION__); + + skb = vnic->tx_skb; + vnic->tx_skb = NULL; + + spin_unlock_irqrestore(&np->tx_lock, flags); + + handled = netfront_accel_vi_tx_post(vnic, skb); + + spin_lock_irqsave(&np->tx_lock, flags); + + if (handled != NETFRONT_ACCEL_STATUS_BUSY) { + DPRINTK("%s restarting tx\n", __FUNCTION__); + if (netfront_check_queue_ready(vnic->net_dev)) { + netif_wake_queue(vnic->net_dev); + NETFRONT_ACCEL_STATS_OP + (vnic->stats.queue_wakes++); + } + } else { + vnic->tx_skb = skb; + } + + /* + * Should never get a CANT, as it checks that before + * deciding it was BUSY first time round + */ + BUG_ON(handled == NETFRONT_ACCEL_STATUS_CANT); + } + spin_unlock_irqrestore(&np->tx_lock, flags); +} + + +static void netfront_accel_vi_tx_complete(netfront_accel_vnic *vnic, + struct netfront_accel_tso_buffer *tso_buf, + int is_last) +{ + struct netfront_accel_tso_buffer *next; + + /* + * We get a single completion for every call to + * ef_vi_transmitv so handle any other buffers which are part + * of the same packet + */ + while (tso_buf != NULL) { + if (tso_buf->buf->skb != NULL) { + dev_kfree_skb_any(tso_buf->buf->skb); + tso_buf->buf->skb = NULL; + } + + next = tso_buf->next; + + netfront_accel_buf_put(vnic->tx_bufs, tso_buf->buf->buf_id); + + tso_buf = next; + } + + /* + * If this was the last one in the batch, we try and send any + * pending tx_skb. There should now be buffers and + * descriptors + */ + if (is_last) + netfront_accel_vi_not_busy(vnic); +} + + +static void netfront_accel_vi_poll_process_tx(netfront_accel_vnic *vnic, + ef_event *ev) +{ + struct netfront_accel_pkt_desc *buf; + struct netfront_accel_tso_buffer *tso_buf; + ef_request_id ids[EF_VI_TRANSMIT_BATCH]; + int i, n_ids; + unsigned long flags; + + /* Get the request ids for this tx completion event. */ + n_ids = ef_vi_transmit_unbundle(&vnic->vi, ev, ids); + + /* Take the tx buffer spin lock and hold for the duration */ + spin_lock_irqsave(&vnic->tx_lock, flags); + + for (i = 0; i < n_ids; ++i) { + VPRINTK("Tx packet %d complete\n", ids[i]); + buf = netfront_accel_buf_find(vnic->tx_bufs, ids[i]); + NETFRONT_ACCEL_STATS_OP(vnic->stats.fastpath_tx_completions++); + + tso_buf = (struct netfront_accel_tso_buffer *) + (buf->pkt_kva + NETFRONT_ACCEL_TX_BUF_LENGTH); + BUG_ON(tso_buf->buf != buf); + + netfront_accel_vi_tx_complete(vnic, tso_buf, i == (n_ids-1)); + } + + spin_unlock_irqrestore(&vnic->tx_lock, flags); +} + + +int netfront_accel_vi_poll(netfront_accel_vnic *vnic, int rx_packets) +{ + ef_event ev[ACCEL_VI_POLL_EVENTS]; + int rx_remain = rx_packets, rc, events, i; +#if NETFRONT_ACCEL_STATS + int n_evs_polled = 0, rx_evs_polled = 0, tx_evs_polled = 0; +#endif + BUG_ON(rx_packets <= 0); + + events = ef_eventq_poll(&vnic->vi, ev, + min(rx_remain, ACCEL_VI_POLL_EVENTS)); + i = 0; + NETFRONT_ACCEL_STATS_OP(n_evs_polled += events); + + VPRINTK("%s: %d events\n", __FUNCTION__, events); + + /* Loop over each event */ + while (events) { + VPRINTK("%s: Event "EF_EVENT_FMT", index %lu\n", __FUNCTION__, + EF_EVENT_PRI_ARG(ev[i]), + (unsigned long)(vnic->vi.evq_state->evq_ptr)); + + if ((EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX) || + (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_RX_DISCARD)) { + rc = netfront_accel_vi_poll_process_rx(vnic, &ev[i]); + rx_remain -= rc; + BUG_ON(rx_remain < 0); + NETFRONT_ACCEL_STATS_OP(rx_evs_polled++); + } else if (EF_EVENT_TYPE(ev[i]) == EF_EVENT_TYPE_TX) { + netfront_accel_vi_poll_process_tx(vnic, &ev[i]); + NETFRONT_ACCEL_STATS_OP(tx_evs_polled++); + } else if (EF_EVENT_TYPE(ev[i]) == + EF_EVENT_TYPE_RX_NO_DESC_TRUNC) { + DPRINTK("%s: RX_NO_DESC_TRUNC " EF_EVENT_FMT "\n", + __FUNCTION__, EF_EVENT_PRI_ARG(ev[i])); + discard_jumbo_state(vnic); + NETFRONT_ACCEL_STATS_OP(vnic->stats.rx_no_desc_trunc++); + } else { + EPRINTK("Unexpected event " EF_EVENT_FMT "\n", + EF_EVENT_PRI_ARG(ev[i])); + NETFRONT_ACCEL_STATS_OP(vnic->stats.bad_event_count++); + } + + i++; + + /* Carry on round the loop if more events and more space */ + if (i == events) { + if (rx_remain == 0) + break; + + events = ef_eventq_poll(&vnic->vi, ev, + min(rx_remain, + ACCEL_VI_POLL_EVENTS)); + i = 0; + NETFRONT_ACCEL_STATS_OP(n_evs_polled += events); + } + } + +#if NETFRONT_ACCEL_STATS + vnic->stats.event_count += n_evs_polled; + vnic->stats.event_count_since_irq += n_evs_polled; + if (n_evs_polled > vnic->stats.events_per_poll_max) + vnic->stats.events_per_poll_max = n_evs_polled; + if (rx_evs_polled > vnic->stats.events_per_poll_rx_max) + vnic->stats.events_per_poll_rx_max = rx_evs_polled; + if (tx_evs_polled > vnic->stats.events_per_poll_tx_max) + vnic->stats.events_per_poll_tx_max = tx_evs_polled; +#endif + + return rx_packets - rx_remain; +} + + +int netfront_accel_vi_enable_interrupts(netfront_accel_vnic *vnic) +{ + u32 sw_evq_ptr; + + VPRINTK("%s: checking for event on %p\n", __FUNCTION__, &vnic->vi.evq_state); + + BUG_ON(vnic == NULL); + BUG_ON(vnic->vi.evq_state == NULL); + + /* Do a quick check for an event. */ + if (ef_eventq_has_event(&vnic->vi)) { + VPRINTK("%s: found event\n", __FUNCTION__); + return 0; + } + + VPRINTK("evq_ptr=0x%08x evq_mask=0x%08x\n", + vnic->evq_state.evq_ptr, vnic->vi.evq_mask); + + /* Request a wakeup from the hardware. */ + sw_evq_ptr = vnic->evq_state.evq_ptr & vnic->vi.evq_mask; + + BUG_ON(vnic->hw.falcon.evq_rptr == NULL); + + VPRINTK("Requesting wakeup at 0x%08x, rptr %p\n", sw_evq_ptr, + vnic->hw.falcon.evq_rptr); + *(volatile u32 *)(vnic->hw.falcon.evq_rptr) = (sw_evq_ptr >> 3); + + return 1; +} diff -r dd748ded9ba8 drivers/xen/sfc_netfront/accel_xenbus.c --- /dev/null +++ b/drivers/xen/sfc_netfront/accel_xenbus.c @@ -0,0 +1,776 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include + +#include +#include +#include + +#include "accel.h" +#include "accel_util.h" +#include "accel_msg_iface.h" +#include "accel_bufs.h" +#include "accel_ssr.h" +/* drivers/xen/netfront/netfront.h */ +#include "netfront.h" + +void netfront_accel_set_closing(netfront_accel_vnic *vnic) +{ + + vnic->frontend_state = XenbusStateClosing; + net_accel_update_state(vnic->dev, XenbusStateClosing); +} + + +static void mac_address_change(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + netfront_accel_vnic *vnic; + struct xenbus_device *dev; + int rc; + + DPRINTK("%s\n", __FUNCTION__); + + vnic = container_of(watch, netfront_accel_vnic, + mac_address_watch); + dev = vnic->dev; + + rc = net_accel_xen_net_read_mac(dev, vnic->mac); + + if (rc != 0) + EPRINTK("%s: failed to read mac (%d)\n", __FUNCTION__, rc); +} + + +static int setup_mac_address_watch(struct xenbus_device *dev, + netfront_accel_vnic *vnic) +{ + int err; + + DPRINTK("Setting watch on %s/%s\n", dev->nodename, "mac"); + + err = xenbus_watch_path2(dev, dev->nodename, "mac", + &vnic->mac_address_watch, + mac_address_change); + if (err) { + EPRINTK("%s: Failed to register xenbus watch: %d\n", + __FUNCTION__, err); + goto fail; + } + + return 0; + fail: + vnic->mac_address_watch.node = NULL; + return err; +} + + +/* Grant access to some pages and publish through xenbus */ +static int make_named_grant(struct xenbus_device *dev, void *page, + const char *name, grant_ref_t *gnt_ref) +{ + struct xenbus_transaction tr; + int err; + grant_ref_t gnt; + + gnt = net_accel_grant_page(dev, virt_to_mfn(page), 0); + if (gnt < 0) + return gnt; + + do { + err = xenbus_transaction_start(&tr); + if (err != 0) { + EPRINTK("%s: transaction start failed %d\n", + __FUNCTION__, err); + return err; + } + err = xenbus_printf(tr, dev->nodename, name, "%d", gnt); + if (err != 0) { + EPRINTK("%s: xenbus_printf failed %d\n", __FUNCTION__, + err); + xenbus_transaction_end(tr, 1); + return err; + } + err = xenbus_transaction_end(tr, 0); + } while (err == -EAGAIN); + + if (err != 0) { + EPRINTK("%s: transaction end failed %d\n", __FUNCTION__, err); + return err; + } + + *gnt_ref = gnt; + + return 0; +} + + +static int remove_named_grant(struct xenbus_device *dev, + const char *name, grant_ref_t gnt_ref) +{ + struct xenbus_transaction tr; + int err; + + net_accel_ungrant_page(gnt_ref); + + do { + err = xenbus_transaction_start(&tr); + if (err != 0) { + EPRINTK("%s: transaction start failed %d\n", + __FUNCTION__, err); + return err; + } + err = xenbus_rm(tr, dev->nodename, name); + if (err != 0) { + EPRINTK("%s: xenbus_rm failed %d\n", __FUNCTION__, + err); + xenbus_transaction_end(tr, 1); + return err; + } + err = xenbus_transaction_end(tr, 0); + } while (err == -EAGAIN); + + if (err != 0) { + EPRINTK("%s: transaction end failed %d\n", __FUNCTION__, err); + return err; + } + + return 0; +} + + +static +netfront_accel_vnic *netfront_accel_vnic_ctor(struct net_device *net_dev, + struct xenbus_device *dev) +{ + struct netfront_info *np = + (struct netfront_info *)netdev_priv(net_dev); + netfront_accel_vnic *vnic; + int err; + + /* + * A bug in earlier versions of Xen accel plugin system meant + * you could be probed twice for the same device on suspend + * cancel. Be tolerant of that. + */ + if (np->accel_priv != NULL) + return ERR_PTR(-EALREADY); + + /* Alloc mem for state */ + vnic = kzalloc(sizeof(netfront_accel_vnic), GFP_KERNEL); + if (vnic == NULL) { + EPRINTK("%s: no memory for vnic state\n", __FUNCTION__); + return ERR_PTR(-ENOMEM); + } + + spin_lock_init(&vnic->tx_lock); + + mutex_init(&vnic->vnic_mutex); + mutex_lock(&vnic->vnic_mutex); + + /* Store so state can be retrieved from device */ + BUG_ON(np->accel_priv != NULL); + np->accel_priv = vnic; + vnic->dev = dev; + vnic->net_dev = net_dev; + spin_lock_init(&vnic->irq_enabled_lock); + netfront_accel_ssr_init(&vnic->ssr_state); + + init_waitqueue_head(&vnic->state_wait_queue); + vnic->backend_state = XenbusStateUnknown; + vnic->frontend_state = XenbusStateClosed; + vnic->removing = 0; + vnic->domU_state_is_setup = 0; + vnic->dom0_state_is_setup = 0; + vnic->poll_enabled = 0; + vnic->tx_enabled = 0; + vnic->tx_skb = NULL; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,20) + INIT_WORK(&vnic->msg_from_bend, netfront_accel_msg_from_bend); +#else + INIT_WORK(&vnic->msg_from_bend, netfront_accel_msg_from_bend, vnic); +#endif + + netfront_accel_debugfs_create(vnic); + + mutex_unlock(&vnic->vnic_mutex); + + err = net_accel_xen_net_read_mac(dev, vnic->mac); + if (err) + goto fail_mac; + + /* Setup a watch on the frontend's MAC address */ + err = setup_mac_address_watch(dev, vnic); + if (err) + goto fail_mac; + + return vnic; + +fail_mac: + + mutex_lock(&vnic->vnic_mutex); + + netfront_accel_debugfs_remove(vnic); + + netfront_accel_ssr_fini(vnic, &vnic->ssr_state); + + EPRINTK_ON(vnic->tx_skb != NULL); + + vnic->frontend_state = XenbusStateUnknown; + net_accel_update_state(dev, XenbusStateUnknown); + + mutex_unlock(&vnic->vnic_mutex); + + np->accel_priv = NULL; + kfree(vnic); + + return ERR_PTR(err); +} + + +static void netfront_accel_vnic_dtor(netfront_accel_vnic *vnic) +{ + struct net_device *net_dev = vnic->net_dev; + struct netfront_info *np = + (struct netfront_info *)netdev_priv(net_dev); + + /* + * Now we don't hold the lock any more it is safe to remove + * this watch and synchonrise with the completion of + * watches + */ + DPRINTK("%s: unregistering xenbus mac watch\n", __FUNCTION__); + unregister_xenbus_watch(&vnic->mac_address_watch); + kfree(vnic->mac_address_watch.node); + + flush_workqueue(netfront_accel_workqueue); + + mutex_lock(&vnic->vnic_mutex); + + netfront_accel_debugfs_remove(vnic); + + netfront_accel_ssr_fini(vnic, &vnic->ssr_state); + + EPRINTK_ON(vnic->tx_skb != NULL); + + vnic->frontend_state = XenbusStateUnknown; + net_accel_update_state(vnic->dev, XenbusStateUnknown); + + mutex_unlock(&vnic->vnic_mutex); + + np->accel_priv = NULL; + kfree(vnic); +} + + +static int vnic_setup_domU_shared_state(struct xenbus_device *dev, + netfront_accel_vnic *vnic) +{ + struct xenbus_transaction tr; + int err; + int msgs_per_queue; + + + DPRINTK("Setting up domU shared state.\n"); + + msgs_per_queue = (PAGE_SIZE/2) / sizeof(struct net_accel_msg); + + /* Allocate buffer state */ + vnic->tx_bufs = netfront_accel_init_bufs(&vnic->tx_lock); + if (vnic->tx_bufs == NULL) { + err = -ENOMEM; + EPRINTK("%s: Failed to allocate tx buffers\n", __FUNCTION__); + goto fail_tx_bufs; + } + + vnic->rx_bufs = netfront_accel_init_bufs(NULL); + if (vnic->rx_bufs == NULL) { + err = -ENOMEM; + EPRINTK("%s: Failed to allocate rx buffers\n", __FUNCTION__); + goto fail_rx_bufs; + } + + /* + * This allocates two pages, one for the shared page and one + * for the message queue. + */ + vnic->shared_page = (struct net_accel_shared_page *) + __get_free_pages(GFP_KERNEL, 1); + if (vnic->shared_page == NULL) { + EPRINTK("%s: no memory for shared pages\n", __FUNCTION__); + err = -ENOMEM; + goto fail_shared_page; + } + + net_accel_msg_init_queue + (&vnic->from_dom0, &vnic->shared_page->queue0, + (struct net_accel_msg *)((u8*)vnic->shared_page + PAGE_SIZE), + msgs_per_queue); + + net_accel_msg_init_queue + (&vnic->to_dom0, &vnic->shared_page->queue1, + (struct net_accel_msg *)((u8*)vnic->shared_page + + (3 * PAGE_SIZE / 2)), + msgs_per_queue); + + vnic->msg_state = NETFRONT_ACCEL_MSG_NONE; + + err = make_named_grant(dev, vnic->shared_page, "accel-ctrl-page", + &vnic->ctrl_page_gnt); + if (err) { + EPRINTK("couldn't make ctrl-page named grant\n"); + goto fail_ctrl_page_grant; + } + + err = make_named_grant(dev, (u8*)vnic->shared_page + PAGE_SIZE, + "accel-msg-page", &vnic->msg_page_gnt); + if (err) { + EPRINTK("couldn't make msg-page named grant\n"); + goto fail_msg_page_grant; + } + + /* Create xenbus msg event channel */ + err = bind_listening_port_to_irqhandler + (dev->otherend_id, netfront_accel_msg_channel_irq_from_bend, + SA_SAMPLE_RANDOM, "vnicctrl", vnic); + if (err < 0) { + EPRINTK("Couldn't bind msg event channel\n"); + goto fail_msg_irq; + } + vnic->msg_channel_irq = err; + vnic->msg_channel = irq_to_evtchn_port(vnic->msg_channel_irq); + + /* Create xenbus net event channel */ + err = bind_listening_port_to_irqhandler + (dev->otherend_id, netfront_accel_net_channel_irq_from_bend, + SA_SAMPLE_RANDOM, "vnicfront", vnic); + if (err < 0) { + EPRINTK("Couldn't bind net event channel\n"); + goto fail_net_irq; + } + vnic->net_channel_irq = err; + vnic->net_channel = irq_to_evtchn_port(vnic->net_channel_irq); + /* Want to ensure we don't get interrupts before we're ready */ + netfront_accel_disable_net_interrupts(vnic); + + DPRINTK("otherend %d has msg ch %u (%u) and net ch %u (%u)\n", + dev->otherend_id, vnic->msg_channel, vnic->msg_channel_irq, + vnic->net_channel, vnic->net_channel_irq); + + do { + err = xenbus_transaction_start(&tr); + if (err != 0) { + EPRINTK("%s: Transaction start failed %d\n", + __FUNCTION__, err); + goto fail_transaction; + } + + err = xenbus_printf(tr, dev->nodename, "accel-msg-channel", + "%u", vnic->msg_channel); + if (err != 0) { + EPRINTK("%s: event channel xenbus write failed %d\n", + __FUNCTION__, err); + xenbus_transaction_end(tr, 1); + goto fail_transaction; + } + + err = xenbus_printf(tr, dev->nodename, "accel-net-channel", + "%u", vnic->net_channel); + if (err != 0) { + EPRINTK("%s: net channel xenbus write failed %d\n", + __FUNCTION__, err); + xenbus_transaction_end(tr, 1); + goto fail_transaction; + } + + err = xenbus_transaction_end(tr, 0); + } while (err == -EAGAIN); + + if (err != 0) { + EPRINTK("%s: Transaction end failed %d\n", __FUNCTION__, err); + goto fail_transaction; + } + + DPRINTK("Completed setting up domU shared state\n"); + + return 0; + +fail_transaction: + + unbind_from_irqhandler(vnic->net_channel_irq, vnic); +fail_net_irq: + + unbind_from_irqhandler(vnic->msg_channel_irq, vnic); +fail_msg_irq: + + remove_named_grant(dev, "accel-ctrl-page", vnic->ctrl_page_gnt); +fail_msg_page_grant: + + remove_named_grant(dev, "accel-msg-page", vnic->msg_page_gnt); +fail_ctrl_page_grant: + + free_pages((unsigned long)vnic->shared_page, 1); + vnic->shared_page = NULL; +fail_shared_page: + + netfront_accel_fini_bufs(vnic->rx_bufs); +fail_rx_bufs: + + netfront_accel_fini_bufs(vnic->tx_bufs); +fail_tx_bufs: + + /* Undo the memory allocation created when we got the HELLO */ + netfront_accel_free_buffer_mem(&vnic->bufpages, + vnic->rx_bufs, + vnic->tx_bufs); + + DPRINTK("Failed to setup domU shared state with code %d\n", err); + + return err; +} + + +static void vnic_remove_domU_shared_state(struct xenbus_device *dev, + netfront_accel_vnic *vnic) +{ + struct xenbus_transaction tr; + + /* + * Don't remove any watches because we currently hold the + * mutex and the watches take the mutex. + */ + + DPRINTK("%s: removing event channel irq handlers %d %d\n", + __FUNCTION__, vnic->net_channel_irq, vnic->msg_channel_irq); + do { + if (xenbus_transaction_start(&tr) != 0) + break; + xenbus_rm(tr, dev->nodename, "accel-msg-channel"); + xenbus_rm(tr, dev->nodename, "accel-net-channel"); + } while (xenbus_transaction_end(tr, 0) == -EAGAIN); + + unbind_from_irqhandler(vnic->net_channel_irq, vnic); + unbind_from_irqhandler(vnic->msg_channel_irq, vnic); + + /* ungrant pages for msg channel */ + remove_named_grant(dev, "accel-ctrl-page", vnic->ctrl_page_gnt); + remove_named_grant(dev, "accel-msg-page", vnic->msg_page_gnt); + free_pages((unsigned long)vnic->shared_page, 1); + vnic->shared_page = NULL; + + /* ungrant pages for buffers, and free buffer memory */ + netfront_accel_free_buffer_mem(&vnic->bufpages, + vnic->rx_bufs, + vnic->tx_bufs); + netfront_accel_fini_bufs(vnic->rx_bufs); + netfront_accel_fini_bufs(vnic->tx_bufs); +} + + +static void vnic_setup_dom0_shared_state(struct xenbus_device *dev, + netfront_accel_vnic *vnic) +{ + DPRINTK("Setting up dom0 shared state\n"); + + netfront_accel_vi_ctor(vnic); + + /* + * Message processing will be enabled when this function + * returns, but we might have missed an interrupt. Schedule a + * check just in case. + */ + queue_work(netfront_accel_workqueue, &vnic->msg_from_bend); +} + + +static void vnic_remove_dom0_shared_state(struct xenbus_device *dev, + netfront_accel_vnic *vnic) +{ + DPRINTK("Removing dom0 shared state\n"); + + vnic_stop_fastpath(vnic); + + netfront_accel_vi_dtor(vnic); +} + + +/*************************************************************************/ + +/* + * The following code handles accelstate changes between the frontend + * and the backend. In response to transitions, calls the following + * functions in matching pairs: + * + * vnic_setup_domU_shared_state + * vnic_remove_domU_shared_state + * + * vnic_setup_dom0_shared_state + * vnic_remove_dom0_shared_state + * + * Valid state transitions for DomU are as follows: + * + * Closed->Init on probe or in response to Init from dom0 + * + * Init->Connected in response to Init from dom0 + * Init->Closing on error providing dom0 is in Init + * Init->Closed on remove or in response to Closing from dom0 + * + * Connected->Closing on error/remove + * Connected->Closed in response to Closing from dom0 + * + * Closing->Closed in response to Closing from dom0 + * + */ + + +/* Function to deal with Xenbus accel state change in backend */ +static void netfront_accel_backend_accel_changed(netfront_accel_vnic *vnic, + XenbusState backend_state) +{ + struct xenbus_device *dev = vnic->dev; + XenbusState frontend_state; + int state; + + DPRINTK("%s: changing from %s to %s. nodename %s, otherend %s\n", + __FUNCTION__, xenbus_strstate(vnic->backend_state), + xenbus_strstate(backend_state), dev->nodename, dev->otherend); + + /* + * Ignore duplicate state changes. This can happen if the + * backend changes state twice in quick succession and the + * first watch fires in the frontend after the second + * transition has completed. + */ + if (vnic->backend_state == backend_state) + return; + + vnic->backend_state = backend_state; + frontend_state = vnic->frontend_state; + + switch (backend_state) { + case XenbusStateInitialising: + /* + * It's possible for us to miss the closed state from + * dom0, so do the work here. + */ + if (vnic->domU_state_is_setup) { + vnic_remove_domU_shared_state(dev, vnic); + vnic->domU_state_is_setup = 0; + } + + if (frontend_state != XenbusStateInitialising) { + /* Make sure the backend doesn't go away. */ + frontend_state = XenbusStateInitialising; + net_accel_update_state(dev, frontend_state); + xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d", &state); + backend_state = (XenbusState)state; + if (backend_state != XenbusStateInitialising) + break; + } + + /* Start the new connection. */ + if (!vnic->removing) { + BUG_ON(vnic->domU_state_is_setup); + if (vnic_setup_domU_shared_state(dev, vnic) == 0) { + vnic->domU_state_is_setup = 1; + frontend_state = XenbusStateConnected; + } else + frontend_state = XenbusStateClosing; + } + break; + case XenbusStateConnected: + if (vnic->domU_state_is_setup && + !vnic->dom0_state_is_setup) { + vnic_setup_dom0_shared_state(dev, vnic); + vnic->dom0_state_is_setup = 1; + } + break; + default: + case XenbusStateClosing: + if (vnic->dom0_state_is_setup) { + vnic_remove_dom0_shared_state(dev, vnic); + vnic->dom0_state_is_setup = 0; + } + frontend_state = XenbusStateClosed; + break; + case XenbusStateUnknown: + case XenbusStateClosed: + if (vnic->domU_state_is_setup) { + vnic_remove_domU_shared_state(dev, vnic); + vnic->domU_state_is_setup = 0; + } + break; + } + + if (frontend_state != vnic->frontend_state) { + DPRINTK("Switching from state %s (%d) to %s (%d)\n", + xenbus_strstate(vnic->frontend_state), + vnic->frontend_state, + xenbus_strstate(frontend_state), frontend_state); + vnic->frontend_state = frontend_state; + net_accel_update_state(dev, frontend_state); + } + + wake_up(&vnic->state_wait_queue); +} + + +static void backend_accel_state_change(struct xenbus_watch *watch, + const char **vec, unsigned int len) +{ + int state; + netfront_accel_vnic *vnic; + struct xenbus_device *dev; + + DPRINTK("%s\n", __FUNCTION__); + + vnic = container_of(watch, struct netfront_accel_vnic, + backend_accel_watch); + + mutex_lock(&vnic->vnic_mutex); + + dev = vnic->dev; + + state = (int)XenbusStateUnknown; + xenbus_scanf(XBT_NIL, dev->otherend, "accelstate", "%d", &state); + netfront_accel_backend_accel_changed(vnic, state); + + mutex_unlock(&vnic->vnic_mutex); +} + + +static int setup_dom0_accel_watch(struct xenbus_device *dev, + netfront_accel_vnic *vnic) +{ + int err; + + DPRINTK("Setting watch on %s/%s\n", dev->otherend, "accelstate"); + + err = xenbus_watch_path2(dev, dev->otherend, "accelstate", + &vnic->backend_accel_watch, + backend_accel_state_change); + if (err) { + EPRINTK("%s: Failed to register xenbus watch: %d\n", + __FUNCTION__, err); + goto fail; + } + return 0; + fail: + vnic->backend_accel_watch.node = NULL; + return err; +} + + +int netfront_accel_probe(struct net_device *net_dev, struct xenbus_device *dev) +{ + netfront_accel_vnic *vnic; + int err; + + DPRINTK("Probe passed device %s\n", dev->nodename); + + vnic = netfront_accel_vnic_ctor(net_dev, dev); + if (IS_ERR(vnic)) + return PTR_ERR(vnic); + + /* + * Setup a watch on the backend accel state. This sets things + * going. + */ + err = setup_dom0_accel_watch(dev, vnic); + if (err) { + netfront_accel_vnic_dtor(vnic); + EPRINTK("%s: probe failed with code %d\n", __FUNCTION__, err); + return err; + } + + /* + * Indicate to the other end that we're ready to start unless + * the watch has already fired. + */ + mutex_lock(&vnic->vnic_mutex); + VPRINTK("setup success, updating accelstate\n"); + if (vnic->frontend_state == XenbusStateClosed) { + vnic->frontend_state = XenbusStateInitialising; + net_accel_update_state(dev, XenbusStateInitialising); + } + mutex_unlock(&vnic->vnic_mutex); + + DPRINTK("Probe done device %s\n", dev->nodename); + + return 0; +} + + +int netfront_accel_remove(struct xenbus_device *dev) +{ + struct netfront_info *np = + (struct netfront_info *)dev->dev.driver_data; + netfront_accel_vnic *vnic = (netfront_accel_vnic *)np->accel_priv; + + DPRINTK("%s %s\n", __FUNCTION__, dev->nodename); + + BUG_ON(vnic == NULL); + + mutex_lock(&vnic->vnic_mutex); + + /* Reject any attempts to connect. */ + vnic->removing = 1; + + /* Close any existing connection. */ + if (vnic->frontend_state == XenbusStateConnected) { + vnic->frontend_state = XenbusStateClosing; + net_accel_update_state(dev, XenbusStateClosing); + } + + mutex_unlock(&vnic->vnic_mutex); + + DPRINTK("%s waiting for release of %s\n", __FUNCTION__, dev->nodename); + + /* + * Wait for the xenbus watch to release the shared resources. + * This indicates that dom0 has made the transition + * Closing->Closed or that dom0 was in Closed or Init and no + * resources were mapped. + */ + wait_event(vnic->state_wait_queue, + !vnic->domU_state_is_setup); + + /* + * Now we don't need this watch anymore it is safe to remove + * it (and so synchronise with it completing if outstanding) + */ + DPRINTK("%s: unregistering xenbus accel watch\n", + __FUNCTION__); + unregister_xenbus_watch(&vnic->backend_accel_watch); + kfree(vnic->backend_accel_watch.node); + + netfront_accel_vnic_dtor(vnic); + + DPRINTK("%s done %s\n", __FUNCTION__, dev->nodename); + + return 0; +} diff -r dd748ded9ba8 drivers/xen/sfc_netfront/ef_vi_falcon.h --- /dev/null +++ b/drivers/xen/sfc_netfront/ef_vi_falcon.h @@ -0,0 +1,172 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/* + * \author slp + * \brief Falcon specific definitions + * \date 2004/08 + */ + +#ifndef __EF_VI_FALCON_H__ +#define __EF_VI_FALCON_H__ + +#define EFHW_4K 0x00001000u +#define EFHW_8K 0x00002000u + +/* include the autogenerated register definitions */ + +#include "ef_vi_falcon_core.h" +#include "ef_vi_falcon_desc.h" +#include "ef_vi_falcon_event.h" + + +/*---------------------------------------------------------------------------- + * + * Helpers to turn bit shifts into dword shifts and check that the bit fields + * haven't overflown the dword etc. Aim is to preserve consistency with the + * autogenerated headers - once stable we could hard code. + * + *---------------------------------------------------------------------------*/ + +/* mask constructors */ +#define __FALCON_MASK(WIDTH,T) ((((T)1) << (WIDTH)) - 1) +#define __EFVI_MASK32(WIDTH) __FALCON_MASK((WIDTH),uint32_t) +#define __EFVI_MASK64(WIDTH) __FALCON_MASK((WIDTH),uint64_t) + +#define __EFVI_FALCON_MASKFIELD32(LBN, WIDTH) ((uint32_t) \ + (__EFVI_MASK32(WIDTH) << (LBN))) + +/* constructors for fields which span the first and second dwords */ +#define __LW(LBN) (32 - LBN) +#define LOW(v, LBN, WIDTH) ((uint32_t) \ + (((v) & __EFVI_MASK64(__LW((LBN)))) << (LBN))) +#define HIGH(v, LBN, WIDTH) ((uint32_t)(((v) >> __LW((LBN))) & \ + __EFVI_MASK64((WIDTH - __LW((LBN)))))) +/* constructors for fields within the second dword */ +#define __DW2(LBN) ((LBN) - 32) + +/* constructors for fields which span the second and third dwords */ +#define __LW2(LBN) (64 - LBN) +#define LOW2(v, LBN, WIDTH) ((uint32_t) \ + (((v) & __EFVI_MASK64(__LW2((LBN)))) << ((LBN) - 32))) +#define HIGH2(v, LBN, WIDTH) ((uint32_t) \ + (((v) >> __LW2((LBN))) & __EFVI_MASK64((WIDTH - __LW2((LBN)))))) + +/* constructors for fields within the third dword */ +#define __DW3(LBN) ((LBN) - 64) + + +/* constructors for fields which span the third and fourth dwords */ +#define __LW3(LBN) (96 - LBN) +#define LOW3(v, LBN, WIDTH) ((uint32_t) \ + (((v) & __EFVI_MASK64(__LW3((LBN)))) << ((LBN) - 64))) +#define HIGH3(v, LBN, WIDTH) ((unit32_t) \ + (((v) >> __LW3((LBN))) & __EFVI_MASK64((WIDTH - __LW3((LBN)))))) + +/* constructors for fields within the fourth dword */ +#define __DW4(LBN) ((LBN) - 96) + +/* checks that the autogenerated headers our consistent with our model */ +#define WIDTHCHCK(a, b) ef_assert((a) == (b)) +#define RANGECHCK(v, WIDTH) \ + ef_assert(((uint64_t)(v) & ~(__EFVI_MASK64((WIDTH)))) == 0) + +/* fields within the first dword */ +#define DWCHCK(LBN, WIDTH) ef_assert(((LBN) >= 0) &&(((LBN)+(WIDTH)) <= 32)) + +/* fields which span the first and second dwords */ +#define LWCHK(LBN, WIDTH) ef_assert(WIDTH >= __LW(LBN)) + +/*---------------------------------------------------------------------------- + * + * Buffer virtual addresses (4K buffers) + * + *---------------------------------------------------------------------------*/ + +/* Form a buffer virtual address from buffer ID and offset. If the offset +** is larger than the buffer size, then the buffer indexed will be +** calculated appropriately. It is the responsibility of the caller to +** ensure that they have valid buffers programmed at that address. +*/ +#define EFVI_FALCON_VADDR_4K_S (12) +#define EFVI_FALCON_VADDR_M 0xfffff /* post shift mask */ + + +#define EFVI_FALCON_BUFFER_4K_ADDR(id,off) \ + (((id) << EFVI_FALCON_VADDR_4K_S) + (off)) + +#define EFVI_FALCON_BUFFER_4K_PAGE(vaddr) \ + (((vaddr) >> EFVI_FALCON_VADDR_4K_S) & EFVI_FALCON_VADDR_M) + +#define EFVI_FALCON_BUFFER_4K_OFF(vaddr) \ + ((vaddr) & __EFVI_MASK32(EFVI_FALCON_VADDR_4K_S)) + + +/*---------------------------------------------------------------------------- + * + * Masks + * + *---------------------------------------------------------------------------*/ + +#define EFVI_FALCON_CLOCK_ASIC_HZ (125000) +#define EFVI_FALCON_CLOCK_FPGA_HZ (62500) +#define EFVI_FALCON_CLOCK_HZ EFVI_FALCON_CLOCK_ASIC_HZ + + +/*---------------------------------------------------------------------------- + * + * Timers + * + *---------------------------------------------------------------------------*/ + +/* Event-Queue Timer granularity - measured in us + Given by: 4096 * 3 cycle * clock period */ + +#define EFVI_FALCON_EVQTIMER_PERIOD_US ((4096 * 3 * 1000) / EFVI_FALCON_CLOCK_HZ) + +/* mode bits */ +#define EFVI_FALCON_TIMER_MODE_DIS 0 /* disabled */ +#define EFVI_FALCON_TIMER_MODE_RUN 1 /* started counting right away */ +#define EFVI_FALCON_TIMER_MODE_HOLD 2 /* trigger mode (user queues) */ + +#define EFVI_FALCON_EVQTIMER_HOLD (EFVI_FALCON_TIMER_MODE_HOLD << TIMER_MODE_LBN) +#define EFVI_FALCON_EVQTIMER_RUN (EFVI_FALCON_TIMER_MODE_RUN << TIMER_MODE_LBN) +#define EFVI_FALCON_EVQTIMER_DISABLE (EFVI_FALCON_TIMER_MODE_DIS << TIMER_MODE_LBN) + + +/* ---- efhw_event_t helpers --- */ + +#define EFVI_FALCON_EVENT_CODE(evp) \ + ((evp)->u64 & EFVI_FALCON_EVENT_CODE_MASK) + +#define EFVI_FALCON_EVENT_SW_DATA_MASK 0x0000ffff + +#define __EFVI_FALCON_OPEN_MASK(WIDTH) ((((uint64_t)1) << (WIDTH)) - 1) + +#define EFVI_FALCON_EVENT_CODE_MASK \ + (__EFVI_FALCON_OPEN_MASK(EV_CODE_WIDTH) << EV_CODE_LBN) + + +#endif /* __EF_VI_FALCON_H__ */ diff -r dd748ded9ba8 drivers/xen/sfc_netfront/ef_vi_falcon_core.h --- /dev/null +++ b/drivers/xen/sfc_netfront/ef_vi_falcon_core.h @@ -0,0 +1,1075 @@ + +#define EFVI_FALCON_EXTENDED_P_BAR 1 + +//////////////---- Bus Interface Unit Registers C Header ----////////////// +#define IOM_IND_ADR_REG_OFST 0x0 // IO-mapped indirect access address register + #define IOM_AUTO_ADR_INC_EN_LBN 16 + #define IOM_AUTO_ADR_INC_EN_WIDTH 1 + #define IOM_IND_ADR_LBN 0 + #define IOM_IND_ADR_WIDTH 16 +#define IOM_IND_DAT_REG_OFST 0x4 // IO-mapped indirect access data register + #define IOM_IND_DAT_LBN 0 + #define IOM_IND_DAT_WIDTH 32 +#define ADR_REGION_REG_KER_OFST 0x0 // Address region register +#define ADR_REGION_REG_OFST 0x0 // Address region register + #define ADR_REGION3_LBN 96 + #define ADR_REGION3_WIDTH 18 + #define ADR_REGION2_LBN 64 + #define ADR_REGION2_WIDTH 18 + #define ADR_REGION1_LBN 32 + #define ADR_REGION1_WIDTH 18 + #define ADR_REGION0_LBN 0 + #define ADR_REGION0_WIDTH 18 +#define INT_EN_REG_KER_OFST 0x10 // Kernel driver Interrupt enable register + #define KER_INT_CHAR_LBN 4 + #define KER_INT_CHAR_WIDTH 1 + #define KER_INT_KER_LBN 3 + #define KER_INT_KER_WIDTH 1 + #define ILL_ADR_ERR_INT_EN_KER_LBN 2 + #define ILL_ADR_ERR_INT_EN_KER_WIDTH 1 + #define SRM_PERR_INT_EN_KER_LBN 1 + #define SRM_PERR_INT_EN_KER_WIDTH 1 + #define DRV_INT_EN_KER_LBN 0 + #define DRV_INT_EN_KER_WIDTH 1 +#define INT_EN_REG_CHAR_OFST 0x20 // Char Driver interrupt enable register + #define CHAR_INT_CHAR_LBN 4 + #define CHAR_INT_CHAR_WIDTH 1 + #define CHAR_INT_KER_LBN 3 + #define CHAR_INT_KER_WIDTH 1 + #define ILL_ADR_ERR_INT_EN_CHAR_LBN 2 + #define ILL_ADR_ERR_INT_EN_CHAR_WIDTH 1 + #define SRM_PERR_INT_EN_CHAR_LBN 1 + #define SRM_PERR_INT_EN_CHAR_WIDTH 1 + #define DRV_INT_EN_CHAR_LBN 0 + #define DRV_INT_EN_CHAR_WIDTH 1 +#define INT_ADR_REG_KER_OFST 0x30 // Interrupt host address for Kernel driver + #define INT_ADR_KER_LBN 0 + #define INT_ADR_KER_WIDTH 64 + #define DRV_INT_KER_LBN 32 + #define DRV_INT_KER_WIDTH 1 + #define EV_FF_HALF_INT_KER_LBN 3 + #define EV_FF_HALF_INT_KER_WIDTH 1 + #define EV_FF_FULL_INT_KER_LBN 2 + #define EV_FF_FULL_INT_KER_WIDTH 1 + #define ILL_ADR_ERR_INT_KER_LBN 1 + #define ILL_ADR_ERR_INT_KER_WIDTH 1 + #define SRAM_PERR_INT_KER_LBN 0 + #define SRAM_PERR_INT_KER_WIDTH 1 +#define INT_ADR_REG_CHAR_OFST 0x40 // Interrupt host address for Char driver + #define INT_ADR_CHAR_LBN 0 + #define INT_ADR_CHAR_WIDTH 64 + #define DRV_INT_CHAR_LBN 32 + #define DRV_INT_CHAR_WIDTH 1 + #define EV_FF_HALF_INT_CHAR_LBN 3 + #define EV_FF_HALF_INT_CHAR_WIDTH 1 + #define EV_FF_FULL_INT_CHAR_LBN 2 + #define EV_FF_FULL_INT_CHAR_WIDTH 1 + #define ILL_ADR_ERR_INT_CHAR_LBN 1 + #define ILL_ADR_ERR_INT_CHAR_WIDTH 1 + #define SRAM_PERR_INT_CHAR_LBN 0 + #define SRAM_PERR_INT_CHAR_WIDTH 1 +#define INT_ISR0_B0_OFST 0x90 // B0 only +#define INT_ISR1_B0_OFST 0xA0 +#define INT_ACK_REG_KER_A1_OFST 0x50 // Kernel interrupt acknowledge register + #define RESERVED_LBN 0 + #define RESERVED_WIDTH 32 +#define INT_ACK_REG_CHAR_A1_OFST 0x60 // CHAR interrupt acknowledge register + #define RESERVED_LBN 0 + #define RESERVED_WIDTH 32 +//////////////---- Global CSR Registers C Header ----////////////// +#define STRAP_REG_KER_OFST 0x200 // ASIC strap status register +#define STRAP_REG_OFST 0x200 // ASIC strap status register + #define ONCHIP_SRAM_LBN 16 + #define ONCHIP_SRAM_WIDTH 0 + #define STRAP_ISCSI_EN_LBN 3 + #define STRAP_ISCSI_EN_WIDTH 1 + #define STRAP_PINS_LBN 0 + #define STRAP_PINS_WIDTH 3 +#define GPIO_CTL_REG_KER_OFST 0x210 // GPIO control register +#define GPIO_CTL_REG_OFST 0x210 // GPIO control register + #define GPIO_OEN_LBN 24 + #define GPIO_OEN_WIDTH 4 + #define GPIO_OUT_LBN 16 + #define GPIO_OUT_WIDTH 4 + #define GPIO_IN_LBN 8 + #define GPIO_IN_WIDTH 4 + #define GPIO_PWRUP_VALUE_LBN 0 + #define GPIO_PWRUP_VALUE_WIDTH 4 +#define GLB_CTL_REG_KER_OFST 0x220 // Global control register +#define GLB_CTL_REG_OFST 0x220 // Global control register + #define SWRST_LBN 0 + #define SWRST_WIDTH 1 +#define FATAL_INTR_REG_KER_OFST 0x230 // Fatal interrupt register for Kernel + #define PCI_BUSERR_INT_KER_EN_LBN 43 + #define PCI_BUSERR_INT_KER_EN_WIDTH 1 + #define SRAM_OOB_INT_KER_EN_LBN 42 + #define SRAM_OOB_INT_KER_EN_WIDTH 1 + #define BUFID_OOB_INT_KER_EN_LBN 41 + #define BUFID_OOB_INT_KER_EN_WIDTH 1 + #define MEM_PERR_INT_KER_EN_LBN 40 + #define MEM_PERR_INT_KER_EN_WIDTH 1 + #define RBUF_OWN_INT_KER_EN_LBN 39 + #define RBUF_OWN_INT_KER_EN_WIDTH 1 + #define TBUF_OWN_INT_KER_EN_LBN 38 + #define TBUF_OWN_INT_KER_EN_WIDTH 1 + #define RDESCQ_OWN_INT_KER_EN_LBN 37 + #define RDESCQ_OWN_INT_KER_EN_WIDTH 1 + #define TDESCQ_OWN_INT_KER_EN_LBN 36 + #define TDESCQ_OWN_INT_KER_EN_WIDTH 1 + #define EVQ_OWN_INT_KER_EN_LBN 35 + #define EVQ_OWN_INT_KER_EN_WIDTH 1 + #define EVFF_OFLO_INT_KER_EN_LBN 34 + #define EVFF_OFLO_INT_KER_EN_WIDTH 1 + #define ILL_ADR_INT_KER_EN_LBN 33 + #define ILL_ADR_INT_KER_EN_WIDTH 1 + #define SRM_PERR_INT_KER_EN_LBN 32 + #define SRM_PERR_INT_KER_EN_WIDTH 1 + #define PCI_BUSERR_INT_KER_LBN 11 + #define PCI_BUSERR_INT_KER_WIDTH 1 + #define SRAM_OOB_INT_KER_LBN 10 + #define SRAM_OOB_INT_KER_WIDTH 1 + #define BUFID_OOB_INT_KER_LBN 9 + #define BUFID_OOB_INT_KER_WIDTH 1 + #define MEM_PERR_INT_KER_LBN 8 + #define MEM_PERR_INT_KER_WIDTH 1 + #define RBUF_OWN_INT_KER_LBN 7 + #define RBUF_OWN_INT_KER_WIDTH 1 + #define TBUF_OWN_INT_KER_LBN 6 + #define TBUF_OWN_INT_KER_WIDTH 1 + #define RDESCQ_OWN_INT_KER_LBN 5 + #define RDESCQ_OWN_INT_KER_WIDTH 1 + #define TDESCQ_OWN_INT_KER_LBN 4 + #define TDESCQ_OWN_INT_KER_WIDTH 1 + #define EVQ_OWN_INT_KER_LBN 3 + #define EVQ_OWN_INT_KER_WIDTH 1 + #define EVFF_OFLO_INT_KER_LBN 2 + #define EVFF_OFLO_INT_KER_WIDTH 1 + #define ILL_ADR_INT_KER_LBN 1 + #define ILL_ADR_INT_KER_WIDTH 1 + #define SRM_PERR_INT_KER_LBN 0 + #define SRM_PERR_INT_KER_WIDTH 1 +#define FATAL_INTR_REG_OFST 0x240 // Fatal interrupt register for Char + #define PCI_BUSERR_INT_CHAR_EN_LBN 43 + #define PCI_BUSERR_INT_CHAR_EN_WIDTH 1 + #define SRAM_OOB_INT_CHAR_EN_LBN 42 + #define SRAM_OOB_INT_CHAR_EN_WIDTH 1 + #define BUFID_OOB_INT_CHAR_EN_LBN 41 + #define BUFID_OOB_INT_CHAR_EN_WIDTH 1 + #define MEM_PERR_INT_CHAR_EN_LBN 40 + #define MEM_PERR_INT_CHAR_EN_WIDTH 1 + #define RBUF_OWN_INT_CHAR_EN_LBN 39 + #define RBUF_OWN_INT_CHAR_EN_WIDTH 1 + #define TBUF_OWN_INT_CHAR_EN_LBN 38 + #define TBUF_OWN_INT_CHAR_EN_WIDTH 1 + #define RDESCQ_OWN_INT_CHAR_EN_LBN 37 + #define RDESCQ_OWN_INT_CHAR_EN_WIDTH 1 + #define TDESCQ_OWN_INT_CHAR_EN_LBN 36 + #define TDESCQ_OWN_INT_CHAR_EN_WIDTH 1 + #define EVQ_OWN_INT_CHAR_EN_LBN 35 + #define EVQ_OWN_INT_CHAR_EN_WIDTH 1 + #define EVFF_OFLO_INT_CHAR_EN_LBN 34 + #define EVFF_OFLO_INT_CHAR_EN_WIDTH 1 + #define ILL_ADR_INT_CHAR_EN_LBN 33 + #define ILL_ADR_INT_CHAR_EN_WIDTH 1 + #define SRM_PERR_INT_CHAR_EN_LBN 32 + #define SRM_PERR_INT_CHAR_EN_WIDTH 1 + #define FATAL_INTR_REG_EN_BITS 0xffffffffffffffffULL + #define PCI_BUSERR_INT_CHAR_LBN 11 + #define PCI_BUSERR_INT_CHAR_WIDTH 1 + #define SRAM_OOB_INT_CHAR_LBN 10 + #define SRAM_OOB_INT_CHAR_WIDTH 1 + #define BUFID_OOB_INT_CHAR_LBN 9 + #define BUFID_OOB_INT_CHAR_WIDTH 1 + #define MEM_PERR_INT_CHAR_LBN 8 + #define MEM_PERR_INT_CHAR_WIDTH 1 + #define RBUF_OWN_INT_CHAR_LBN 7 + #define RBUF_OWN_INT_CHAR_WIDTH 1 + #define TBUF_OWN_INT_CHAR_LBN 6 + #define TBUF_OWN_INT_CHAR_WIDTH 1 + #define RDESCQ_OWN_INT_CHAR_LBN 5 + #define RDESCQ_OWN_INT_CHAR_WIDTH 1 + #define TDESCQ_OWN_INT_CHAR_LBN 4 + #define TDESCQ_OWN_INT_CHAR_WIDTH 1 + #define EVQ_OWN_INT_CHAR_LBN 3 + #define EVQ_OWN_INT_CHAR_WIDTH 1 + #define EVFF_OFLO_INT_CHAR_LBN 2 + #define EVFF_OFLO_INT_CHAR_WIDTH 1 + #define ILL_ADR_INT_CHAR_LBN 1 + #define ILL_ADR_INT_CHAR_WIDTH 1 + #define SRM_PERR_INT_CHAR_LBN 0 + #define SRM_PERR_INT_CHAR_WIDTH 1 +#define DP_CTRL_REG_OFST 0x250 // Datapath control register + #define FLS_EVQ_ID_LBN 0 + #define FLS_EVQ_ID_WIDTH 12 +#define MEM_STAT_REG_KER_OFST 0x260 // Memory status register +#define MEM_STAT_REG_OFST 0x260 // Memory status register + #define MEM_PERR_VEC_LBN 53 + #define MEM_PERR_VEC_WIDTH 38 + #define MBIST_CORR_LBN 38 + #define MBIST_CORR_WIDTH 15 + #define MBIST_ERR_LBN 0 + #define MBIST_ERR_WIDTH 38 +#define DEBUG_REG_KER_OFST 0x270 // Debug register +#define DEBUG_REG_OFST 0x270 // Debug register + #define DEBUG_BLK_SEL2_LBN 47 + #define DEBUG_BLK_SEL2_WIDTH 3 + #define DEBUG_BLK_SEL1_LBN 44 + #define DEBUG_BLK_SEL1_WIDTH 3 + #define DEBUG_BLK_SEL0_LBN 41 + #define DEBUG_BLK_SEL0_WIDTH 3 + #define MISC_DEBUG_ADDR_LBN 36 + #define MISC_DEBUG_ADDR_WIDTH 5 + #define SERDES_DEBUG_ADDR_LBN 31 + #define SERDES_DEBUG_ADDR_WIDTH 5 + #define EM_DEBUG_ADDR_LBN 26 + #define EM_DEBUG_ADDR_WIDTH 5 + #define SR_DEBUG_ADDR_LBN 21 + #define SR_DEBUG_ADDR_WIDTH 5 + #define EV_DEBUG_ADDR_LBN 16 + #define EV_DEBUG_ADDR_WIDTH 5 + #define RX_DEBUG_ADDR_LBN 11 + #define RX_DEBUG_ADDR_WIDTH 5 + #define TX_DEBUG_ADDR_LBN 6 + #define TX_DEBUG_ADDR_WIDTH 5 + #define BIU_DEBUG_ADDR_LBN 1 + #define BIU_DEBUG_ADDR_WIDTH 5 + #define DEBUG_EN_LBN 0 + #define DEBUG_EN_WIDTH 1 +#define DRIVER_REG0_KER_OFST 0x280 // Driver scratch register 0 +#define DRIVER_REG0_OFST 0x280 // Driver scratch register 0 + #define DRIVER_DW0_LBN 0 + #define DRIVER_DW0_WIDTH 32 +#define DRIVER_REG1_KER_OFST 0x290 // Driver scratch register 1 +#define DRIVER_REG1_OFST 0x290 // Driver scratch register 1 + #define DRIVER_DW1_LBN 0 + #define DRIVER_DW1_WIDTH 32 +#define DRIVER_REG2_KER_OFST 0x2A0 // Driver scratch register 2 +#define DRIVER_REG2_OFST 0x2A0 // Driver scratch register 2 + #define DRIVER_DW2_LBN 0 + #define DRIVER_DW2_WIDTH 32 +#define DRIVER_REG3_KER_OFST 0x2B0 // Driver scratch register 3 +#define DRIVER_REG3_OFST 0x2B0 // Driver scratch register 3 + #define DRIVER_DW3_LBN 0 + #define DRIVER_DW3_WIDTH 32 +#define DRIVER_REG4_KER_OFST 0x2C0 // Driver scratch register 4 +#define DRIVER_REG4_OFST 0x2C0 // Driver scratch register 4 + #define DRIVER_DW4_LBN 0 + #define DRIVER_DW4_WIDTH 32 +#define DRIVER_REG5_KER_OFST 0x2D0 // Driver scratch register 5 +#define DRIVER_REG5_OFST 0x2D0 // Driver scratch register 5 + #define DRIVER_DW5_LBN 0 + #define DRIVER_DW5_WIDTH 32 +#define DRIVER_REG6_KER_OFST 0x2E0 // Driver scratch register 6 +#define DRIVER_REG6_OFST 0x2E0 // Driver scratch register 6 + #define DRIVER_DW6_LBN 0 + #define DRIVER_DW6_WIDTH 32 +#define DRIVER_REG7_KER_OFST 0x2F0 // Driver scratch register 7 +#define DRIVER_REG7_OFST 0x2F0 // Driver scratch register 7 + #define DRIVER_DW7_LBN 0 + #define DRIVER_DW7_WIDTH 32 +#define ALTERA_BUILD_REG_OFST 0x300 // Altera build register +#define ALTERA_BUILD_REG_OFST 0x300 // Altera build register + #define ALTERA_BUILD_VER_LBN 0 + #define ALTERA_BUILD_VER_WIDTH 32 + +/* so called CSR spare register + - contains separate parity enable bits for the various internal memory blocks */ +#define MEM_PARITY_ERR_EN_REG_KER 0x310 +#define MEM_PARITY_ALL_BLOCKS_EN_LBN 64 +#define MEM_PARITY_ALL_BLOCKS_EN_WIDTH 38 +#define MEM_PARITY_TX_DATA_EN_LBN 72 +#define MEM_PARITY_TX_DATA_EN_WIDTH 2 + +//////////////---- Event & Timer Module Registers C Header ----////////////// + +#if EFVI_FALCON_EXTENDED_P_BAR +#define EVQ_RPTR_REG_KER_OFST 0x11B00 // Event queue read pointer register +#else +#define EVQ_RPTR_REG_KER_OFST 0x1B00 // Event queue read pointer register +#endif + +#define EVQ_RPTR_REG_OFST 0xFA0000 // Event queue read pointer register array. + #define EVQ_RPTR_LBN 0 + #define EVQ_RPTR_WIDTH 15 + +#if EFVI_FALCON_EXTENDED_P_BAR +#define EVQ_PTR_TBL_KER_OFST 0x11A00 // Event queue pointer table for kernel access +#else +#define EVQ_PTR_TBL_KER_OFST 0x1A00 // Event queue pointer table for kernel access +#endif + +#define EVQ_PTR_TBL_CHAR_OFST 0xF60000 // Event queue pointer table for char direct access + #define EVQ_WKUP_OR_INT_EN_LBN 39 + #define EVQ_WKUP_OR_INT_EN_WIDTH 1 + #define EVQ_NXT_WPTR_LBN 24 + #define EVQ_NXT_WPTR_WIDTH 15 + #define EVQ_EN_LBN 23 + #define EVQ_EN_WIDTH 1 + #define EVQ_SIZE_LBN 20 + #define EVQ_SIZE_WIDTH 3 + #define EVQ_BUF_BASE_ID_LBN 0 + #define EVQ_BUF_BASE_ID_WIDTH 20 +#define TIMER_CMD_REG_KER_OFST 0x420 // Timer table for kernel access. Page-mapped +#define TIMER_CMD_REG_PAGE4_OFST 0x8420 // Timer table for user-level access. Page-mapped. For lowest 1K queues. +#define TIMER_CMD_REG_PAGE123K_OFST 0x1000420 // Timer table for user-level access. Page-mapped. For upper 3K queues. +#define TIMER_TBL_OFST 0xF70000 // Timer table for char driver direct access + #define TIMER_MODE_LBN 12 + #define TIMER_MODE_WIDTH 2 + #define TIMER_VAL_LBN 0 + #define TIMER_VAL_WIDTH 12 + #define TIMER_MODE_INT_HLDOFF 2 + #define EVQ_BUF_SIZE_LBN 0 + #define EVQ_BUF_SIZE_WIDTH 1 +#define DRV_EV_REG_KER_OFST 0x440 // Driver generated event register +#define DRV_EV_REG_OFST 0x440 // Driver generated event register + #define DRV_EV_QID_LBN 64 + #define DRV_EV_QID_WIDTH 12 + #define DRV_EV_DATA_LBN 0 + #define DRV_EV_DATA_WIDTH 64 +#define EVQ_CTL_REG_KER_OFST 0x450 // Event queue control register +#define EVQ_CTL_REG_OFST 0x450 // Event queue control register + #define RX_EVQ_WAKEUP_MASK_B0_LBN 15 + #define RX_EVQ_WAKEUP_MASK_B0_WIDTH 6 + #define EVQ_OWNERR_CTL_LBN 14 + #define EVQ_OWNERR_CTL_WIDTH 1 + #define EVQ_FIFO_AF_TH_LBN 8 + #define EVQ_FIFO_AF_TH_WIDTH 6 + #define EVQ_FIFO_NOTAF_TH_LBN 0 + #define EVQ_FIFO_NOTAF_TH_WIDTH 6 +//////////////---- SRAM Module Registers C Header ----////////////// +#define BUF_TBL_CFG_REG_KER_OFST 0x600 // Buffer table configuration register +#define BUF_TBL_CFG_REG_OFST 0x600 // Buffer table configuration register + #define BUF_TBL_MODE_LBN 3 + #define BUF_TBL_MODE_WIDTH 1 +#define SRM_RX_DC_CFG_REG_KER_OFST 0x610 // SRAM receive descriptor cache configuration register +#define SRM_RX_DC_CFG_REG_OFST 0x610 // SRAM receive descriptor cache configuration register + #define SRM_RX_DC_BASE_ADR_LBN 0 + #define SRM_RX_DC_BASE_ADR_WIDTH 21 +#define SRM_TX_DC_CFG_REG_KER_OFST 0x620 // SRAM transmit descriptor cache configuration register +#define SRM_TX_DC_CFG_REG_OFST 0x620 // SRAM transmit descriptor cache configuration register + #define SRM_TX_DC_BASE_ADR_LBN 0 + #define SRM_TX_DC_BASE_ADR_WIDTH 21 +#define SRM_CFG_REG_KER_OFST 0x630 // SRAM configuration register +#define SRM_CFG_REG_OFST 0x630 // SRAM configuration register + #define SRAM_OOB_ADR_INTEN_LBN 5 + #define SRAM_OOB_ADR_INTEN_WIDTH 1 + #define SRAM_OOB_BUF_INTEN_LBN 4 + #define SRAM_OOB_BUF_INTEN_WIDTH 1 + #define SRAM_BT_INIT_EN_LBN 3 + #define SRAM_BT_INIT_EN_WIDTH 1 + #define SRM_NUM_BANK_LBN 2 + #define SRM_NUM_BANK_WIDTH 1 + #define SRM_BANK_SIZE_LBN 0 + #define SRM_BANK_SIZE_WIDTH 2 +#define BUF_TBL_UPD_REG_KER_OFST 0x650 // Buffer table update register +#define BUF_TBL_UPD_REG_OFST 0x650 // Buffer table update register + #define BUF_UPD_CMD_LBN 63 + #define BUF_UPD_CMD_WIDTH 1 + #define BUF_CLR_CMD_LBN 62 + #define BUF_CLR_CMD_WIDTH 1 + #define BUF_CLR_END_ID_LBN 32 + #define BUF_CLR_END_ID_WIDTH 20 + #define BUF_CLR_START_ID_LBN 0 + #define BUF_CLR_START_ID_WIDTH 20 +#define SRM_UPD_EVQ_REG_KER_OFST 0x660 // Buffer table update register +#define SRM_UPD_EVQ_REG_OFST 0x660 // Buffer table update register + #define SRM_UPD_EVQ_ID_LBN 0 + #define SRM_UPD_EVQ_ID_WIDTH 12 +#define SRAM_PARITY_REG_KER_OFST 0x670 // SRAM parity register. +#define SRAM_PARITY_REG_OFST 0x670 // SRAM parity register. + #define FORCE_SRAM_PERR_LBN 0 + #define FORCE_SRAM_PERR_WIDTH 1 + +#if EFVI_FALCON_EXTENDED_P_BAR +#define BUF_HALF_TBL_KER_OFST 0x18000 // Buffer table in half buffer table mode direct access by kernel driver +#else +#define BUF_HALF_TBL_KER_OFST 0x8000 // Buffer table in half buffer table mode direct access by kernel driver +#endif + + +#define BUF_HALF_TBL_OFST 0x800000 // Buffer table in half buffer table mode direct access by char driver + #define BUF_ADR_HBUF_ODD_LBN 44 + #define BUF_ADR_HBUF_ODD_WIDTH 20 + #define BUF_OWNER_ID_HBUF_ODD_LBN 32 + #define BUF_OWNER_ID_HBUF_ODD_WIDTH 12 + #define BUF_ADR_HBUF_EVEN_LBN 12 + #define BUF_ADR_HBUF_EVEN_WIDTH 20 + #define BUF_OWNER_ID_HBUF_EVEN_LBN 0 + #define BUF_OWNER_ID_HBUF_EVEN_WIDTH 12 + + +#if EFVI_FALCON_EXTENDED_P_BAR +#define BUF_FULL_TBL_KER_OFST 0x18000 // Buffer table in full buffer table mode direct access by kernel driver +#else +#define BUF_FULL_TBL_KER_OFST 0x8000 // Buffer table in full buffer table mode direct access by kernel driver +#endif + + + + +#define BUF_FULL_TBL_OFST 0x800000 // Buffer table in full buffer table mode direct access by char driver + #define IP_DAT_BUF_SIZE_LBN 50 + #define IP_DAT_BUF_SIZE_WIDTH 1 + #define BUF_ADR_REGION_LBN 48 + #define BUF_ADR_REGION_WIDTH 2 + #define BUF_ADR_FBUF_LBN 14 + #define BUF_ADR_FBUF_WIDTH 34 + #define BUF_OWNER_ID_FBUF_LBN 0 + #define BUF_OWNER_ID_FBUF_WIDTH 14 +#define SRM_DBG_REG_OFST 0x3000000 // SRAM debug access + #define SRM_DBG_LBN 0 + #define SRM_DBG_WIDTH 64 +//////////////---- RX Datapath Registers C Header ----////////////// + +#define RX_CFG_REG_KER_OFST 0x800 // Receive configuration register +#define RX_CFG_REG_OFST 0x800 // Receive configuration register + +#if !defined(FALCON_64K_RXFIFO) && !defined(FALCON_PRE_02020029) +# if !defined(FALCON_128K_RXFIFO) +# define FALCON_128K_RXFIFO +# endif +#endif + +#if defined(FALCON_128K_RXFIFO) + +/* new for B0 */ + #define RX_TOEP_TCP_SUPPRESS_B0_LBN 48 + #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1 + #define RX_INGR_EN_B0_LBN 47 + #define RX_INGR_EN_B0_WIDTH 1 + #define RX_TOEP_IPV4_B0_LBN 46 + #define RX_TOEP_IPV4_B0_WIDTH 1 + #define RX_HASH_ALG_B0_LBN 45 + #define RX_HASH_ALG_B0_WIDTH 1 + #define RX_HASH_INSERT_HDR_B0_LBN 44 + #define RX_HASH_INSERT_HDR_B0_WIDTH 1 +/* moved for B0 */ + #define RX_DESC_PUSH_EN_B0_LBN 43 + #define RX_DESC_PUSH_EN_B0_WIDTH 1 + #define RX_RDW_PATCH_EN_LBN 42 /* Non head of line blocking */ + #define RX_RDW_PATCH_EN_WIDTH 1 + #define RX_PCI_BURST_SIZE_B0_LBN 39 + #define RX_PCI_BURST_SIZE_B0_WIDTH 3 + #define RX_OWNERR_CTL_B0_LBN 38 + #define RX_OWNERR_CTL_B0_WIDTH 1 + #define RX_XON_TX_TH_B0_LBN 33 + #define RX_XON_TX_TH_B0_WIDTH 5 + #define RX_XOFF_TX_TH_B0_LBN 28 + #define RX_XOFF_TX_TH_B0_WIDTH 5 + #define RX_USR_BUF_SIZE_B0_LBN 19 + #define RX_USR_BUF_SIZE_B0_WIDTH 9 + #define RX_XON_MAC_TH_B0_LBN 10 + #define RX_XON_MAC_TH_B0_WIDTH 9 + #define RX_XOFF_MAC_TH_B0_LBN 1 + #define RX_XOFF_MAC_TH_B0_WIDTH 9 + #define RX_XOFF_MAC_EN_B0_LBN 0 + #define RX_XOFF_MAC_EN_B0_WIDTH 1 + +#elif !defined(FALCON_PRE_02020029) +/* new for B0 */ + #define RX_TOEP_TCP_SUPPRESS_B0_LBN 46 + #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1 + #define RX_INGR_EN_B0_LBN 45 + #define RX_INGR_EN_B0_WIDTH 1 + #define RX_TOEP_IPV4_B0_LBN 44 + #define RX_TOEP_IPV4_B0_WIDTH 1 + #define RX_HASH_ALG_B0_LBN 43 + #define RX_HASH_ALG_B0_WIDTH 41 + #define RX_HASH_INSERT_HDR_B0_LBN 42 + #define RX_HASH_INSERT_HDR_B0_WIDTH 1 +/* moved for B0 */ + #define RX_DESC_PUSH_EN_B0_LBN 41 + #define RX_DESC_PUSH_EN_B0_WIDTH 1 + #define RX_PCI_BURST_SIZE_B0_LBN 37 + #define RX_PCI_BURST_SIZE_B0_WIDTH 3 + #define RX_OWNERR_CTL_B0_LBN 36 + #define RX_OWNERR_CTL_B0_WIDTH 1 + #define RX_XON_TX_TH_B0_LBN 31 + #define RX_XON_TX_TH_B0_WIDTH 5 + #define RX_XOFF_TX_TH_B0_LBN 26 + #define RX_XOFF_TX_TH_B0_WIDTH 5 + #define RX_USR_BUF_SIZE_B0_LBN 17 + #define RX_USR_BUF_SIZE_B0_WIDTH 9 + #define RX_XON_MAC_TH_B0_LBN 9 + #define RX_XON_MAC_TH_B0_WIDTH 8 + #define RX_XOFF_MAC_TH_B0_LBN 1 + #define RX_XOFF_MAC_TH_B0_WIDTH 8 + #define RX_XOFF_MAC_EN_B0_LBN 0 + #define RX_XOFF_MAC_EN_B0_WIDTH 1 + +#else +/* new for B0 */ + #define RX_TOEP_TCP_SUPPRESS_B0_LBN 44 + #define RX_TOEP_TCP_SUPPRESS_B0_WIDTH 1 + #define RX_INGR_EN_B0_LBN 43 + #define RX_INGR_EN_B0_WIDTH 1 + #define RX_TOEP_IPV4_B0_LBN 42 + #define RX_TOEP_IPV4_B0_WIDTH 1 + #define RX_HASH_ALG_B0_LBN 41 + #define RX_HASH_ALG_B0_WIDTH 41 + #define RX_HASH_INSERT_HDR_B0_LBN 40 + #define RX_HASH_INSERT_HDR_B0_WIDTH 1 +/* moved for B0 */ + #define RX_DESC_PUSH_EN_B0_LBN 35 + #define RX_DESC_PUSH_EN_B0_WIDTH 1 + #define RX_PCI_BURST_SIZE_B0_LBN 35 + #define RX_PCI_BURST_SIZE_B0_WIDTH 2 + #define RX_OWNERR_CTL_B0_LBN 34 + #define RX_OWNERR_CTL_B0_WIDTH 1 + #define RX_XON_TX_TH_B0_LBN 29 + #define RX_XON_TX_TH_B0_WIDTH 5 + #define RX_XOFF_TX_TH_B0_LBN 24 + #define RX_XOFF_TX_TH_B0_WIDTH 5 + #define RX_USR_BUF_SIZE_B0_LBN 15 + #define RX_USR_BUF_SIZE_B0_WIDTH 9 + #define RX_XON_MAC_TH_B0_LBN 8 + #define RX_XON_MAC_TH_B0_WIDTH 7 + #define RX_XOFF_MAC_TH_B0_LBN 1 + #define RX_XOFF_MAC_TH_B0_WIDTH 7 + #define RX_XOFF_MAC_EN_B0_LBN 0 + #define RX_XOFF_MAC_EN_B0_WIDTH 1 + +#endif + +/* A0/A1 */ + #define RX_PUSH_EN_A1_LBN 35 + #define RX_PUSH_EN_A1_WIDTH 1 + #define RX_PCI_BURST_SIZE_A1_LBN 31 + #define RX_PCI_BURST_SIZE_A1_WIDTH 3 + #define RX_OWNERR_CTL_A1_LBN 30 + #define RX_OWNERR_CTL_A1_WIDTH 1 + #define RX_XON_TX_TH_A1_LBN 25 + #define RX_XON_TX_TH_A1_WIDTH 5 + #define RX_XOFF_TX_TH_A1_LBN 20 + #define RX_XOFF_TX_TH_A1_WIDTH 5 + #define RX_USR_BUF_SIZE_A1_LBN 11 + #define RX_USR_BUF_SIZE_A1_WIDTH 9 + #define RX_XON_MAC_TH_A1_LBN 6 + #define RX_XON_MAC_TH_A1_WIDTH 5 + #define RX_XOFF_MAC_TH_A1_LBN 1 + #define RX_XOFF_MAC_TH_A1_WIDTH 5 + #define RX_XOFF_MAC_EN_A1_LBN 0 + #define RX_XOFF_MAC_EN_A1_WIDTH 1 + +#define RX_FILTER_CTL_REG_OFST 0x810 // Receive filter control registers + #define SCATTER_ENBL_NO_MATCH_Q_B0_LBN 40 + #define SCATTER_ENBL_NO_MATCH_Q_B0_WIDTH 1 + #define UDP_FULL_SRCH_LIMIT_LBN 32 + #define UDP_FULL_SRCH_LIMIT_WIDTH 8 + #define NUM_KER_LBN 24 + #define NUM_KER_WIDTH 2 + #define UDP_WILD_SRCH_LIMIT_LBN 16 + #define UDP_WILD_SRCH_LIMIT_WIDTH 8 + #define TCP_WILD_SRCH_LIMIT_LBN 8 + #define TCP_WILD_SRCH_LIMIT_WIDTH 8 + #define TCP_FULL_SRCH_LIMIT_LBN 0 + #define TCP_FULL_SRCH_LIMIT_WIDTH 8 +#define RX_FLUSH_DESCQ_REG_KER_OFST 0x820 // Receive flush descriptor queue register +#define RX_FLUSH_DESCQ_REG_OFST 0x820 // Receive flush descriptor queue register + #define RX_FLUSH_DESCQ_CMD_LBN 24 + #define RX_FLUSH_DESCQ_CMD_WIDTH 1 + #define RX_FLUSH_EVQ_ID_LBN 12 + #define RX_FLUSH_EVQ_ID_WIDTH 12 + #define RX_FLUSH_DESCQ_LBN 0 + #define RX_FLUSH_DESCQ_WIDTH 12 +#define RX_DESC_UPD_REG_KER_OFST 0x830 // Kernel receive descriptor update register. Page-mapped +#define RX_DESC_UPD_REG_PAGE4_OFST 0x8830 // Char & user receive descriptor update register. Page-mapped. For lowest 1K queues. +#define RX_DESC_UPD_REG_PAGE123K_OFST 0x1000830 // Char & user receive descriptor update register. Page-mapped. For upper 3K queues. + #define RX_DESC_WPTR_LBN 96 + #define RX_DESC_WPTR_WIDTH 12 + #define RX_DESC_PUSH_CMD_LBN 95 + #define RX_DESC_PUSH_CMD_WIDTH 1 + #define RX_DESC_LBN 0 + #define RX_DESC_WIDTH 64 + #define RX_KER_DESC_LBN 0 + #define RX_KER_DESC_WIDTH 64 + #define RX_USR_DESC_LBN 0 + #define RX_USR_DESC_WIDTH 32 +#define RX_DC_CFG_REG_KER_OFST 0x840 // Receive descriptor cache configuration register +#define RX_DC_CFG_REG_OFST 0x840 // Receive descriptor cache configuration register + #define RX_DC_SIZE_LBN 0 + #define RX_DC_SIZE_WIDTH 2 +#define RX_DC_PF_WM_REG_KER_OFST 0x850 // Receive descriptor cache pre-fetch watermark register +#define RX_DC_PF_WM_REG_OFST 0x850 // Receive descriptor cache pre-fetch watermark register + #define RX_DC_PF_LWM_LO_LBN 0 + #define RX_DC_PF_LWM_LO_WIDTH 6 + +#define RX_RSS_TKEY_B0_OFST 0x860 // RSS Toeplitz hash key (B0 only) + +#define RX_NODESC_DROP_REG 0x880 + #define RX_NODESC_DROP_CNT_LBN 0 + #define RX_NODESC_DROP_CNT_WIDTH 16 + +#define XM_TX_CFG_REG_OFST 0x1230 + #define XM_AUTO_PAD_LBN 5 + #define XM_AUTO_PAD_WIDTH 1 + +#define RX_FILTER_TBL0_OFST 0xF00000 // Receive filter table - even entries + #define RSS_EN_0_B0_LBN 110 + #define RSS_EN_0_B0_WIDTH 1 + #define SCATTER_EN_0_B0_LBN 109 + #define SCATTER_EN_0_B0_WIDTH 1 + #define TCP_UDP_0_LBN 108 + #define TCP_UDP_0_WIDTH 1 + #define RXQ_ID_0_LBN 96 + #define RXQ_ID_0_WIDTH 12 + #define DEST_IP_0_LBN 64 + #define DEST_IP_0_WIDTH 32 + #define DEST_PORT_TCP_0_LBN 48 + #define DEST_PORT_TCP_0_WIDTH 16 + #define SRC_IP_0_LBN 16 + #define SRC_IP_0_WIDTH 32 + #define SRC_TCP_DEST_UDP_0_LBN 0 + #define SRC_TCP_DEST_UDP_0_WIDTH 16 +#define RX_FILTER_TBL1_OFST 0xF00010 // Receive filter table - odd entries + #define RSS_EN_1_B0_LBN 110 + #define RSS_EN_1_B0_WIDTH 1 + #define SCATTER_EN_1_B0_LBN 109 + #define SCATTER_EN_1_B0_WIDTH 1 + #define TCP_UDP_1_LBN 108 + #define TCP_UDP_1_WIDTH 1 + #define RXQ_ID_1_LBN 96 + #define RXQ_ID_1_WIDTH 12 + #define DEST_IP_1_LBN 64 + #define DEST_IP_1_WIDTH 32 + #define DEST_PORT_TCP_1_LBN 48 + #define DEST_PORT_TCP_1_WIDTH 16 + #define SRC_IP_1_LBN 16 + #define SRC_IP_1_WIDTH 32 + #define SRC_TCP_DEST_UDP_1_LBN 0 + #define SRC_TCP_DEST_UDP_1_WIDTH 16 + +#if EFVI_FALCON_EXTENDED_P_BAR +#define RX_DESC_PTR_TBL_KER_OFST 0x11800 // Receive descriptor pointer kernel access +#else +#define RX_DESC_PTR_TBL_KER_OFST 0x1800 // Receive descriptor pointer kernel access +#endif + + +#define RX_DESC_PTR_TBL_OFST 0xF40000 // Receive descriptor pointer table + #define RX_ISCSI_DDIG_EN_LBN 88 + #define RX_ISCSI_DDIG_EN_WIDTH 1 + #define RX_ISCSI_HDIG_EN_LBN 87 + #define RX_ISCSI_HDIG_EN_WIDTH 1 + #define RX_DESC_PREF_ACT_LBN 86 + #define RX_DESC_PREF_ACT_WIDTH 1 + #define RX_DC_HW_RPTR_LBN 80 + #define RX_DC_HW_RPTR_WIDTH 6 + #define RX_DESCQ_HW_RPTR_LBN 68 + #define RX_DESCQ_HW_RPTR_WIDTH 12 + #define RX_DESCQ_SW_WPTR_LBN 56 + #define RX_DESCQ_SW_WPTR_WIDTH 12 + #define RX_DESCQ_BUF_BASE_ID_LBN 36 + #define RX_DESCQ_BUF_BASE_ID_WIDTH 20 + #define RX_DESCQ_EVQ_ID_LBN 24 + #define RX_DESCQ_EVQ_ID_WIDTH 12 + #define RX_DESCQ_OWNER_ID_LBN 10 + #define RX_DESCQ_OWNER_ID_WIDTH 14 + #define RX_DESCQ_LABEL_LBN 5 + #define RX_DESCQ_LABEL_WIDTH 5 + #define RX_DESCQ_SIZE_LBN 3 + #define RX_DESCQ_SIZE_WIDTH 2 + #define RX_DESCQ_TYPE_LBN 2 + #define RX_DESCQ_TYPE_WIDTH 1 + #define RX_DESCQ_JUMBO_LBN 1 + #define RX_DESCQ_JUMBO_WIDTH 1 + #define RX_DESCQ_EN_LBN 0 + #define RX_DESCQ_EN_WIDTH 1 + + +#define RX_RSS_INDIR_TBL_B0_OFST 0xFB0000 // RSS indirection table (B0 only) + #define RX_RSS_INDIR_ENT_B0_LBN 0 + #define RX_RSS_INDIR_ENT_B0_WIDTH 6 + +//////////////---- TX Datapath Registers C Header ----////////////// +#define TX_FLUSH_DESCQ_REG_KER_OFST 0xA00 // Transmit flush descriptor queue register +#define TX_FLUSH_DESCQ_REG_OFST 0xA00 // Transmit flush descriptor queue register + #define TX_FLUSH_DESCQ_CMD_LBN 12 + #define TX_FLUSH_DESCQ_CMD_WIDTH 1 + #define TX_FLUSH_DESCQ_LBN 0 + #define TX_FLUSH_DESCQ_WIDTH 12 +#define TX_DESC_UPD_REG_KER_OFST 0xA10 // Kernel transmit descriptor update register. Page-mapped +#define TX_DESC_UPD_REG_PAGE4_OFST 0x8A10 // Char & user transmit descriptor update register. Page-mapped +#define TX_DESC_UPD_REG_PAGE123K_OFST 0x1000A10 // Char & user transmit descriptor update register. Page-mapped + #define TX_DESC_WPTR_LBN 96 + #define TX_DESC_WPTR_WIDTH 12 + #define TX_DESC_PUSH_CMD_LBN 95 + #define TX_DESC_PUSH_CMD_WIDTH 1 + #define TX_DESC_LBN 0 + #define TX_DESC_WIDTH 95 + #define TX_KER_DESC_LBN 0 + #define TX_KER_DESC_WIDTH 64 + #define TX_USR_DESC_LBN 0 + #define TX_USR_DESC_WIDTH 64 +#define TX_DC_CFG_REG_KER_OFST 0xA20 // Transmit descriptor cache configuration register +#define TX_DC_CFG_REG_OFST 0xA20 // Transmit descriptor cache configuration register + #define TX_DC_SIZE_LBN 0 + #define TX_DC_SIZE_WIDTH 2 + +#if EFVI_FALCON_EXTENDED_P_BAR +#define TX_DESC_PTR_TBL_KER_OFST 0x11900 // Transmit descriptor pointer. +#else +#define TX_DESC_PTR_TBL_KER_OFST 0x1900 // Transmit descriptor pointer. +#endif + + +#define TX_DESC_PTR_TBL_OFST 0xF50000 // Transmit descriptor pointer + #define TX_NON_IP_DROP_DIS_B0_LBN 91 + #define TX_NON_IP_DROP_DIS_B0_WIDTH 1 + #define TX_IP_CHKSM_DIS_B0_LBN 90 + #define TX_IP_CHKSM_DIS_B0_WIDTH 1 + #define TX_TCP_CHKSM_DIS_B0_LBN 89 + #define TX_TCP_CHKSM_DIS_B0_WIDTH 1 + #define TX_DESCQ_EN_LBN 88 + #define TX_DESCQ_EN_WIDTH 1 + #define TX_ISCSI_DDIG_EN_LBN 87 + #define TX_ISCSI_DDIG_EN_WIDTH 1 + #define TX_ISCSI_HDIG_EN_LBN 86 + #define TX_ISCSI_HDIG_EN_WIDTH 1 + #define TX_DC_HW_RPTR_LBN 80 + #define TX_DC_HW_RPTR_WIDTH 6 + #define TX_DESCQ_HW_RPTR_LBN 68 + #define TX_DESCQ_HW_RPTR_WIDTH 12 + #define TX_DESCQ_SW_WPTR_LBN 56 + #define TX_DESCQ_SW_WPTR_WIDTH 12 + #define TX_DESCQ_BUF_BASE_ID_LBN 36 + #define TX_DESCQ_BUF_BASE_ID_WIDTH 20 + #define TX_DESCQ_EVQ_ID_LBN 24 + #define TX_DESCQ_EVQ_ID_WIDTH 12 + #define TX_DESCQ_OWNER_ID_LBN 10 + #define TX_DESCQ_OWNER_ID_WIDTH 14 + #define TX_DESCQ_LABEL_LBN 5 + #define TX_DESCQ_LABEL_WIDTH 5 + #define TX_DESCQ_SIZE_LBN 3 + #define TX_DESCQ_SIZE_WIDTH 2 + #define TX_DESCQ_TYPE_LBN 1 + #define TX_DESCQ_TYPE_WIDTH 2 + #define TX_DESCQ_FLUSH_LBN 0 + #define TX_DESCQ_FLUSH_WIDTH 1 +#define TX_CFG_REG_KER_OFST 0xA50 // Transmit configuration register +#define TX_CFG_REG_OFST 0xA50 // Transmit configuration register + #define TX_IP_ID_P1_OFS_LBN 32 + #define TX_IP_ID_P1_OFS_WIDTH 15 + #define TX_IP_ID_P0_OFS_LBN 16 + #define TX_IP_ID_P0_OFS_WIDTH 15 + #define TX_TURBO_EN_LBN 3 + #define TX_TURBO_EN_WIDTH 1 + #define TX_OWNERR_CTL_LBN 2 + #define TX_OWNERR_CTL_WIDTH 2 + #define TX_NON_IP_DROP_DIS_LBN 1 + #define TX_NON_IP_DROP_DIS_WIDTH 1 + #define TX_IP_ID_REP_EN_LBN 0 + #define TX_IP_ID_REP_EN_WIDTH 1 +#define TX_RESERVED_REG_KER_OFST 0xA80 // Transmit configuration register +#define TX_RESERVED_REG_OFST 0xA80 // Transmit configuration register + #define TX_CSR_PUSH_EN_LBN 89 + #define TX_CSR_PUSH_EN_WIDTH 1 + #define TX_RX_SPACER_LBN 64 + #define TX_RX_SPACER_WIDTH 8 + #define TX_SW_EV_EN_LBN 59 + #define TX_SW_EV_EN_WIDTH 1 + #define TX_RX_SPACER_EN_LBN 57 + #define TX_RX_SPACER_EN_WIDTH 1 + #define TX_CSR_PREF_WD_TMR_LBN 24 + #define TX_CSR_PREF_WD_TMR_WIDTH 16 + #define TX_CSR_ONLY1TAG_LBN 21 + #define TX_CSR_ONLY1TAG_WIDTH 1 + #define TX_PREF_THRESHOLD_LBN 19 + #define TX_PREF_THRESHOLD_WIDTH 2 + #define TX_ONE_PKT_PER_Q_LBN 18 + #define TX_ONE_PKT_PER_Q_WIDTH 1 + #define TX_DIS_NON_IP_EV_LBN 17 + #define TX_DIS_NON_IP_EV_WIDTH 1 + #define TX_DMA_SPACER_LBN 8 + #define TX_DMA_SPACER_WIDTH 8 + #define TX_FLUSH_MIN_LEN_EN_B0_LBN 7 + #define TX_FLUSH_MIN_LEN_EN_B0_WIDTH 1 + #define TX_TCP_DIS_A1_LBN 7 + #define TX_TCP_DIS_A1_WIDTH 1 + #define TX_IP_DIS_A1_LBN 6 + #define TX_IP_DIS_A1_WIDTH 1 + #define TX_MAX_CPL_LBN 2 + #define TX_MAX_CPL_WIDTH 2 + #define TX_MAX_PREF_LBN 0 + #define TX_MAX_PREF_WIDTH 2 +#define TX_VLAN_REG_OFST 0xAE0 // Transmit VLAN tag register + #define TX_VLAN_EN_LBN 127 + #define TX_VLAN_EN_WIDTH 1 + #define TX_VLAN7_PORT1_EN_LBN 125 + #define TX_VLAN7_PORT1_EN_WIDTH 1 + #define TX_VLAN7_PORT0_EN_LBN 124 + #define TX_VLAN7_PORT0_EN_WIDTH 1 + #define TX_VLAN7_LBN 112 + #define TX_VLAN7_WIDTH 12 + #define TX_VLAN6_PORT1_EN_LBN 109 + #define TX_VLAN6_PORT1_EN_WIDTH 1 + #define TX_VLAN6_PORT0_EN_LBN 108 + #define TX_VLAN6_PORT0_EN_WIDTH 1 + #define TX_VLAN6_LBN 96 + #define TX_VLAN6_WIDTH 12 + #define TX_VLAN5_PORT1_EN_LBN 93 + #define TX_VLAN5_PORT1_EN_WIDTH 1 + #define TX_VLAN5_PORT0_EN_LBN 92 + #define TX_VLAN5_PORT0_EN_WIDTH 1 + #define TX_VLAN5_LBN 80 + #define TX_VLAN5_WIDTH 12 + #define TX_VLAN4_PORT1_EN_LBN 77 + #define TX_VLAN4_PORT1_EN_WIDTH 1 + #define TX_VLAN4_PORT0_EN_LBN 76 + #define TX_VLAN4_PORT0_EN_WIDTH 1 + #define TX_VLAN4_LBN 64 + #define TX_VLAN4_WIDTH 12 + #define TX_VLAN3_PORT1_EN_LBN 61 + #define TX_VLAN3_PORT1_EN_WIDTH 1 + #define TX_VLAN3_PORT0_EN_LBN 60 + #define TX_VLAN3_PORT0_EN_WIDTH 1 + #define TX_VLAN3_LBN 48 + #define TX_VLAN3_WIDTH 12 + #define TX_VLAN2_PORT1_EN_LBN 45 + #define TX_VLAN2_PORT1_EN_WIDTH 1 + #define TX_VLAN2_PORT0_EN_LBN 44 + #define TX_VLAN2_PORT0_EN_WIDTH 1 + #define TX_VLAN2_LBN 32 + #define TX_VLAN2_WIDTH 12 + #define TX_VLAN1_PORT1_EN_LBN 29 + #define TX_VLAN1_PORT1_EN_WIDTH 1 + #define TX_VLAN1_PORT0_EN_LBN 28 + #define TX_VLAN1_PORT0_EN_WIDTH 1 + #define TX_VLAN1_LBN 16 + #define TX_VLAN1_WIDTH 12 + #define TX_VLAN0_PORT1_EN_LBN 13 + #define TX_VLAN0_PORT1_EN_WIDTH 1 + #define TX_VLAN0_PORT0_EN_LBN 12 + #define TX_VLAN0_PORT0_EN_WIDTH 1 + #define TX_VLAN0_LBN 0 + #define TX_VLAN0_WIDTH 12 +#define TX_FIL_CTL_REG_OFST 0xAF0 // Transmit filter control register + #define TX_MADR1_FIL_EN_LBN 65 + #define TX_MADR1_FIL_EN_WIDTH 1 + #define TX_MADR0_FIL_EN_LBN 64 + #define TX_MADR0_FIL_EN_WIDTH 1 + #define TX_IPFIL31_PORT1_EN_LBN 63 + #define TX_IPFIL31_PORT1_EN_WIDTH 1 + #define TX_IPFIL31_PORT0_EN_LBN 62 + #define TX_IPFIL31_PORT0_EN_WIDTH 1 + #define TX_IPFIL30_PORT1_EN_LBN 61 + #define TX_IPFIL30_PORT1_EN_WIDTH 1 + #define TX_IPFIL30_PORT0_EN_LBN 60 + #define TX_IPFIL30_PORT0_EN_WIDTH 1 + #define TX_IPFIL29_PORT1_EN_LBN 59 + #define TX_IPFIL29_PORT1_EN_WIDTH 1 + #define TX_IPFIL29_PORT0_EN_LBN 58 + #define TX_IPFIL29_PORT0_EN_WIDTH 1 + #define TX_IPFIL28_PORT1_EN_LBN 57 + #define TX_IPFIL28_PORT1_EN_WIDTH 1 + #define TX_IPFIL28_PORT0_EN_LBN 56 + #define TX_IPFIL28_PORT0_EN_WIDTH 1 + #define TX_IPFIL27_PORT1_EN_LBN 55 + #define TX_IPFIL27_PORT1_EN_WIDTH 1 + #define TX_IPFIL27_PORT0_EN_LBN 54 + #define TX_IPFIL27_PORT0_EN_WIDTH 1 + #define TX_IPFIL26_PORT1_EN_LBN 53 + #define TX_IPFIL26_PORT1_EN_WIDTH 1 + #define TX_IPFIL26_PORT0_EN_LBN 52 + #define TX_IPFIL26_PORT0_EN_WIDTH 1 + #define TX_IPFIL25_PORT1_EN_LBN 51 + #define TX_IPFIL25_PORT1_EN_WIDTH 1 + #define TX_IPFIL25_PORT0_EN_LBN 50 + #define TX_IPFIL25_PORT0_EN_WIDTH 1 + #define TX_IPFIL24_PORT1_EN_LBN 49 + #define TX_IPFIL24_PORT1_EN_WIDTH 1 + #define TX_IPFIL24_PORT0_EN_LBN 48 + #define TX_IPFIL24_PORT0_EN_WIDTH 1 + #define TX_IPFIL23_PORT1_EN_LBN 47 + #define TX_IPFIL23_PORT1_EN_WIDTH 1 + #define TX_IPFIL23_PORT0_EN_LBN 46 + #define TX_IPFIL23_PORT0_EN_WIDTH 1 + #define TX_IPFIL22_PORT1_EN_LBN 45 + #define TX_IPFIL22_PORT1_EN_WIDTH 1 + #define TX_IPFIL22_PORT0_EN_LBN 44 + #define TX_IPFIL22_PORT0_EN_WIDTH 1 + #define TX_IPFIL21_PORT1_EN_LBN 43 + #define TX_IPFIL21_PORT1_EN_WIDTH 1 + #define TX_IPFIL21_PORT0_EN_LBN 42 + #define TX_IPFIL21_PORT0_EN_WIDTH 1 + #define TX_IPFIL20_PORT1_EN_LBN 41 + #define TX_IPFIL20_PORT1_EN_WIDTH 1 + #define TX_IPFIL20_PORT0_EN_LBN 40 + #define TX_IPFIL20_PORT0_EN_WIDTH 1 + #define TX_IPFIL19_PORT1_EN_LBN 39 + #define TX_IPFIL19_PORT1_EN_WIDTH 1 + #define TX_IPFIL19_PORT0_EN_LBN 38 + #define TX_IPFIL19_PORT0_EN_WIDTH 1 + #define TX_IPFIL18_PORT1_EN_LBN 37 + #define TX_IPFIL18_PORT1_EN_WIDTH 1 + #define TX_IPFIL18_PORT0_EN_LBN 36 + #define TX_IPFIL18_PORT0_EN_WIDTH 1 + #define TX_IPFIL17_PORT1_EN_LBN 35 + #define TX_IPFIL17_PORT1_EN_WIDTH 1 + #define TX_IPFIL17_PORT0_EN_LBN 34 + #define TX_IPFIL17_PORT0_EN_WIDTH 1 + #define TX_IPFIL16_PORT1_EN_LBN 33 + #define TX_IPFIL16_PORT1_EN_WIDTH 1 + #define TX_IPFIL16_PORT0_EN_LBN 32 + #define TX_IPFIL16_PORT0_EN_WIDTH 1 + #define TX_IPFIL15_PORT1_EN_LBN 31 + #define TX_IPFIL15_PORT1_EN_WIDTH 1 + #define TX_IPFIL15_PORT0_EN_LBN 30 + #define TX_IPFIL15_PORT0_EN_WIDTH 1 + #define TX_IPFIL14_PORT1_EN_LBN 29 + #define TX_IPFIL14_PORT1_EN_WIDTH 1 + #define TX_IPFIL14_PORT0_EN_LBN 28 + #define TX_IPFIL14_PORT0_EN_WIDTH 1 + #define TX_IPFIL13_PORT1_EN_LBN 27 + #define TX_IPFIL13_PORT1_EN_WIDTH 1 + #define TX_IPFIL13_PORT0_EN_LBN 26 + #define TX_IPFIL13_PORT0_EN_WIDTH 1 + #define TX_IPFIL12_PORT1_EN_LBN 25 + #define TX_IPFIL12_PORT1_EN_WIDTH 1 + #define TX_IPFIL12_PORT0_EN_LBN 24 + #define TX_IPFIL12_PORT0_EN_WIDTH 1 + #define TX_IPFIL11_PORT1_EN_LBN 23 + #define TX_IPFIL11_PORT1_EN_WIDTH 1 + #define TX_IPFIL11_PORT0_EN_LBN 22 + #define TX_IPFIL11_PORT0_EN_WIDTH 1 + #define TX_IPFIL10_PORT1_EN_LBN 21 + #define TX_IPFIL10_PORT1_EN_WIDTH 1 + #define TX_IPFIL10_PORT0_EN_LBN 20 + #define TX_IPFIL10_PORT0_EN_WIDTH 1 + #define TX_IPFIL9_PORT1_EN_LBN 19 + #define TX_IPFIL9_PORT1_EN_WIDTH 1 + #define TX_IPFIL9_PORT0_EN_LBN 18 + #define TX_IPFIL9_PORT0_EN_WIDTH 1 + #define TX_IPFIL8_PORT1_EN_LBN 17 + #define TX_IPFIL8_PORT1_EN_WIDTH 1 + #define TX_IPFIL8_PORT0_EN_LBN 16 + #define TX_IPFIL8_PORT0_EN_WIDTH 1 + #define TX_IPFIL7_PORT1_EN_LBN 15 + #define TX_IPFIL7_PORT1_EN_WIDTH 1 + #define TX_IPFIL7_PORT0_EN_LBN 14 + #define TX_IPFIL7_PORT0_EN_WIDTH 1 + #define TX_IPFIL6_PORT1_EN_LBN 13 + #define TX_IPFIL6_PORT1_EN_WIDTH 1 + #define TX_IPFIL6_PORT0_EN_LBN 12 + #define TX_IPFIL6_PORT0_EN_WIDTH 1 + #define TX_IPFIL5_PORT1_EN_LBN 11 + #define TX_IPFIL5_PORT1_EN_WIDTH 1 + #define TX_IPFIL5_PORT0_EN_LBN 10 + #define TX_IPFIL5_PORT0_EN_WIDTH 1 + #define TX_IPFIL4_PORT1_EN_LBN 9 + #define TX_IPFIL4_PORT1_EN_WIDTH 1 + #define TX_IPFIL4_PORT0_EN_LBN 8 + #define TX_IPFIL4_PORT0_EN_WIDTH 1 + #define TX_IPFIL3_PORT1_EN_LBN 7 + #define TX_IPFIL3_PORT1_EN_WIDTH 1 + #define TX_IPFIL3_PORT0_EN_LBN 6 + #define TX_IPFIL3_PORT0_EN_WIDTH 1 + #define TX_IPFIL2_PORT1_EN_LBN 5 + #define TX_IPFIL2_PORT1_EN_WIDTH 1 + #define TX_IPFIL2_PORT0_EN_LBN 4 + #define TX_IPFIL2_PORT0_EN_WIDTH 1 + #define TX_IPFIL1_PORT1_EN_LBN 3 + #define TX_IPFIL1_PORT1_EN_WIDTH 1 + #define TX_IPFIL1_PORT0_EN_LBN 2 + #define TX_IPFIL1_PORT0_EN_WIDTH 1 + #define TX_IPFIL0_PORT1_EN_LBN 1 + #define TX_IPFIL0_PORT1_EN_WIDTH 1 + #define TX_IPFIL0_PORT0_EN_LBN 0 + #define TX_IPFIL0_PORT0_EN_WIDTH 1 +#define TX_IPFIL_TBL_OFST 0xB00 // Transmit IP source address filter table + #define TX_IPFIL_MASK_LBN 32 + #define TX_IPFIL_MASK_WIDTH 32 + #define TX_IP_SRC_ADR_LBN 0 + #define TX_IP_SRC_ADR_WIDTH 32 +#define TX_PACE_REG_A1_OFST 0xF80000 // Transmit pace control register +#define TX_PACE_REG_B0_OFST 0xA90 // Transmit pace control register + #define TX_PACE_SB_AF_LBN 19 + #define TX_PACE_SB_AF_WIDTH 10 + #define TX_PACE_SB_NOTAF_LBN 9 + #define TX_PACE_SB_NOTAF_WIDTH 10 + #define TX_PACE_FB_BASE_LBN 5 + #define TX_PACE_FB_BASE_WIDTH 4 + #define TX_PACE_BIN_TH_LBN 0 + #define TX_PACE_BIN_TH_WIDTH 5 +#define TX_PACE_TBL_A1_OFST 0xF80040 // Transmit pacing table +#define TX_PACE_TBL_FIRST_QUEUE_A1 4 +#define TX_PACE_TBL_B0_OFST 0xF80000 // Transmit pacing table +#define TX_PACE_TBL_FIRST_QUEUE_B0 0 + #define TX_PACE_LBN 0 + #define TX_PACE_WIDTH 5 + +//////////////---- EE/Flash Registers C Header ----////////////// +#define EE_SPI_HCMD_REG_KER_OFST 0x100 // SPI host command register +#define EE_SPI_HCMD_REG_OFST 0x100 // SPI host command register + #define EE_SPI_HCMD_CMD_EN_LBN 31 + #define EE_SPI_HCMD_CMD_EN_WIDTH 1 + #define EE_WR_TIMER_ACTIVE_LBN 28 + #define EE_WR_TIMER_ACTIVE_WIDTH 1 + #define EE_SPI_HCMD_SF_SEL_LBN 24 + #define EE_SPI_HCMD_SF_SEL_WIDTH 1 + #define EE_SPI_HCMD_DABCNT_LBN 16 + #define EE_SPI_HCMD_DABCNT_WIDTH 5 + #define EE_SPI_HCMD_READ_LBN 15 + #define EE_SPI_HCMD_READ_WIDTH 1 + #define EE_SPI_HCMD_DUBCNT_LBN 12 + #define EE_SPI_HCMD_DUBCNT_WIDTH 2 + #define EE_SPI_HCMD_ADBCNT_LBN 8 + #define EE_SPI_HCMD_ADBCNT_WIDTH 2 + #define EE_SPI_HCMD_ENC_LBN 0 + #define EE_SPI_HCMD_ENC_WIDTH 8 +#define EE_SPI_HADR_REG_KER_OFST 0X110 // SPI host address register +#define EE_SPI_HADR_REG_OFST 0X110 // SPI host address register + #define EE_SPI_HADR_DUBYTE_LBN 24 + #define EE_SPI_HADR_DUBYTE_WIDTH 8 + #define EE_SPI_HADR_ADR_LBN 0 + #define EE_SPI_HADR_ADR_WIDTH 24 +#define EE_SPI_HDATA_REG_KER_OFST 0x120 // SPI host data register +#define EE_SPI_HDATA_REG_OFST 0x120 // SPI host data register + #define EE_SPI_HDATA3_LBN 96 + #define EE_SPI_HDATA3_WIDTH 32 + #define EE_SPI_HDATA2_LBN 64 + #define EE_SPI_HDATA2_WIDTH 32 + #define EE_SPI_HDATA1_LBN 32 + #define EE_SPI_HDATA1_WIDTH 32 + #define EE_SPI_HDATA0_LBN 0 + #define EE_SPI_HDATA0_WIDTH 32 +#define EE_BASE_PAGE_REG_KER_OFST 0x130 // Expansion ROM base mirror register +#define EE_BASE_PAGE_REG_OFST 0x130 // Expansion ROM base mirror register + #define EE_EXP_ROM_WINDOW_BASE_LBN 16 + #define EE_EXP_ROM_WINDOW_BASE_WIDTH 13 + #define EE_EXPROM_MASK_LBN 0 + #define EE_EXPROM_MASK_WIDTH 13 +#define EE_VPD_CFG0_REG_KER_OFST 0X140 // SPI/VPD configuration register +#define EE_VPD_CFG0_REG_OFST 0X140 // SPI/VPD configuration register + #define EE_SF_FASTRD_EN_LBN 127 + #define EE_SF_FASTRD_EN_WIDTH 1 + #define EE_SF_CLOCK_DIV_LBN 120 + #define EE_SF_CLOCK_DIV_WIDTH 7 + #define EE_VPD_WIP_POLL_LBN 119 + #define EE_VPD_WIP_POLL_WIDTH 1 + #define EE_VPDW_LENGTH_LBN 80 + #define EE_VPDW_LENGTH_WIDTH 15 + #define EE_VPDW_BASE_LBN 64 + #define EE_VPDW_BASE_WIDTH 15 + #define EE_VPD_WR_CMD_EN_LBN 56 + #define EE_VPD_WR_CMD_EN_WIDTH 8 + #define EE_VPD_BASE_LBN 32 + #define EE_VPD_BASE_WIDTH 24 + #define EE_VPD_LENGTH_LBN 16 + #define EE_VPD_LENGTH_WIDTH 13 + #define EE_VPD_AD_SIZE_LBN 8 + #define EE_VPD_AD_SIZE_WIDTH 5 + #define EE_VPD_ACCESS_ON_LBN 5 + #define EE_VPD_ACCESS_ON_WIDTH 1 +#define EE_VPD_SW_CNTL_REG_KER_OFST 0X150 // VPD access SW control register +#define EE_VPD_SW_CNTL_REG_OFST 0X150 // VPD access SW control register + #define EE_VPD_CYCLE_PENDING_LBN 31 + #define EE_VPD_CYCLE_PENDING_WIDTH 1 + #define EE_VPD_CYC_WRITE_LBN 28 + #define EE_VPD_CYC_WRITE_WIDTH 1 + #define EE_VPD_CYC_ADR_LBN 0 + #define EE_VPD_CYC_ADR_WIDTH 15 +#define EE_VPD_SW_DATA_REG_KER_OFST 0x160 // VPD access SW data register +#define EE_VPD_SW_DATA_REG_OFST 0x160 // VPD access SW data register + #define EE_VPD_CYC_DAT_LBN 0 + #define EE_VPD_CYC_DAT_WIDTH 32 diff -r dd748ded9ba8 drivers/xen/sfc_netfront/ef_vi_falcon_desc.h --- /dev/null +++ b/drivers/xen/sfc_netfront/ef_vi_falcon_desc.h @@ -0,0 +1,43 @@ +//////////////---- Descriptors C Headers ----////////////// +// Receive Kernel IP Descriptor + #define RX_KER_BUF_SIZE_LBN 48 + #define RX_KER_BUF_SIZE_WIDTH 14 + #define RX_KER_BUF_REGION_LBN 46 + #define RX_KER_BUF_REGION_WIDTH 2 + #define RX_KER_BUF_REGION0_DECODE 0 + #define RX_KER_BUF_REGION1_DECODE 1 + #define RX_KER_BUF_REGION2_DECODE 2 + #define RX_KER_BUF_REGION3_DECODE 3 + #define RX_KER_BUF_ADR_LBN 0 + #define RX_KER_BUF_ADR_WIDTH 46 +// Receive User IP Descriptor + #define RX_USR_2BYTE_OFS_LBN 20 + #define RX_USR_2BYTE_OFS_WIDTH 12 + #define RX_USR_BUF_ID_LBN 0 + #define RX_USR_BUF_ID_WIDTH 20 +// Transmit Kernel IP Descriptor + #define TX_KER_PORT_LBN 63 + #define TX_KER_PORT_WIDTH 1 + #define TX_KER_CONT_LBN 62 + #define TX_KER_CONT_WIDTH 1 + #define TX_KER_BYTE_CNT_LBN 48 + #define TX_KER_BYTE_CNT_WIDTH 14 + #define TX_KER_BUF_REGION_LBN 46 + #define TX_KER_BUF_REGION_WIDTH 2 + #define TX_KER_BUF_REGION0_DECODE 0 + #define TX_KER_BUF_REGION1_DECODE 1 + #define TX_KER_BUF_REGION2_DECODE 2 + #define TX_KER_BUF_REGION3_DECODE 3 + #define TX_KER_BUF_ADR_LBN 0 + #define TX_KER_BUF_ADR_WIDTH 46 +// Transmit User IP Descriptor + #define TX_USR_PORT_LBN 47 + #define TX_USR_PORT_WIDTH 1 + #define TX_USR_CONT_LBN 46 + #define TX_USR_CONT_WIDTH 1 + #define TX_USR_BYTE_CNT_LBN 33 + #define TX_USR_BYTE_CNT_WIDTH 13 + #define TX_USR_BUF_ID_LBN 13 + #define TX_USR_BUF_ID_WIDTH 20 + #define TX_USR_BYTE_OFS_LBN 0 + #define TX_USR_BYTE_OFS_WIDTH 13 diff -r dd748ded9ba8 drivers/xen/sfc_netfront/ef_vi_falcon_event.h --- /dev/null +++ b/drivers/xen/sfc_netfront/ef_vi_falcon_event.h @@ -0,0 +1,123 @@ +//////////////---- Events Format C Header ----////////////// +//////////////---- Event entry ----////////////// + #define EV_CODE_LBN 60 + #define EV_CODE_WIDTH 4 + #define RX_IP_EV_DECODE 0 + #define TX_IP_EV_DECODE 2 + #define DRIVER_EV_DECODE 5 + #define GLOBAL_EV_DECODE 6 + #define DRV_GEN_EV_DECODE 7 + #define EV_DATA_LBN 0 + #define EV_DATA_WIDTH 60 +//////////////---- Receive IP events for both Kernel & User event queues ----////////////// + #define RX_EV_PKT_OK_LBN 56 + #define RX_EV_PKT_OK_WIDTH 1 + #define RX_EV_BUF_OWNER_ID_ERR_LBN 54 + #define RX_EV_BUF_OWNER_ID_ERR_WIDTH 1 + #define RX_EV_IP_HDR_CHKSUM_ERR_LBN 52 + #define RX_EV_IP_HDR_CHKSUM_ERR_WIDTH 1 + #define RX_EV_TCP_UDP_CHKSUM_ERR_LBN 51 + #define RX_EV_TCP_UDP_CHKSUM_ERR_WIDTH 1 + #define RX_EV_ETH_CRC_ERR_LBN 50 + #define RX_EV_ETH_CRC_ERR_WIDTH 1 + #define RX_EV_FRM_TRUNC_LBN 49 + #define RX_EV_FRM_TRUNC_WIDTH 1 + #define RX_EV_DRIB_NIB_LBN 48 + #define RX_EV_DRIB_NIB_WIDTH 1 + #define RX_EV_TOBE_DISC_LBN 47 + #define RX_EV_TOBE_DISC_WIDTH 1 + #define RX_EV_PKT_TYPE_LBN 44 + #define RX_EV_PKT_TYPE_WIDTH 3 + #define RX_EV_PKT_TYPE_ETH_DECODE 0 + #define RX_EV_PKT_TYPE_LLC_DECODE 1 + #define RX_EV_PKT_TYPE_JUMBO_DECODE 2 + #define RX_EV_PKT_TYPE_VLAN_DECODE 3 + #define RX_EV_PKT_TYPE_VLAN_LLC_DECODE 4 + #define RX_EV_PKT_TYPE_VLAN_JUMBO_DECODE 5 + #define RX_EV_HDR_TYPE_LBN 42 + #define RX_EV_HDR_TYPE_WIDTH 2 + #define RX_EV_HDR_TYPE_TCP_IPV4_DECODE 0 + #define RX_EV_HDR_TYPE_UDP_IPV4_DECODE 1 + #define RX_EV_HDR_TYPE_OTHER_IP_DECODE 2 + #define RX_EV_HDR_TYPE_NON_IP_DECODE 3 + #define RX_EV_DESC_Q_EMPTY_LBN 41 + #define RX_EV_DESC_Q_EMPTY_WIDTH 1 + #define RX_EV_MCAST_HASH_MATCH_LBN 40 + #define RX_EV_MCAST_HASH_MATCH_WIDTH 1 + #define RX_EV_MCAST_PKT_LBN 39 + #define RX_EV_MCAST_PKT_WIDTH 1 + #define RX_EV_Q_LABEL_LBN 32 + #define RX_EV_Q_LABEL_WIDTH 5 + #define RX_JUMBO_CONT_LBN 31 + #define RX_JUMBO_CONT_WIDTH 1 + #define RX_SOP_LBN 15 + #define RX_SOP_WIDTH 1 + #define RX_PORT_LBN 30 + #define RX_PORT_WIDTH 1 + #define RX_EV_BYTE_CNT_LBN 16 + #define RX_EV_BYTE_CNT_WIDTH 14 + #define RX_iSCSI_PKT_OK_LBN 14 + #define RX_iSCSI_PKT_OK_WIDTH 1 + #define RX_ISCSI_DDIG_ERR_LBN 13 + #define RX_ISCSI_DDIG_ERR_WIDTH 1 + #define RX_ISCSI_HDIG_ERR_LBN 12 + #define RX_ISCSI_HDIG_ERR_WIDTH 1 + #define RX_EV_DESC_PTR_LBN 0 + #define RX_EV_DESC_PTR_WIDTH 12 +//////////////---- Transmit IP events for both Kernel & User event queues ----////////////// + #define TX_EV_PKT_ERR_LBN 38 + #define TX_EV_PKT_ERR_WIDTH 1 + #define TX_EV_PKT_TOO_BIG_LBN 37 + #define TX_EV_PKT_TOO_BIG_WIDTH 1 + #define TX_EV_Q_LABEL_LBN 32 + #define TX_EV_Q_LABEL_WIDTH 5 + #define TX_EV_PORT_LBN 16 + #define TX_EV_PORT_WIDTH 1 + #define TX_EV_WQ_FF_FULL_LBN 15 + #define TX_EV_WQ_FF_FULL_WIDTH 1 + #define TX_EV_BUF_OWNER_ID_ERR_LBN 14 + #define TX_EV_BUF_OWNER_ID_ERR_WIDTH 1 + #define TX_EV_COMP_LBN 12 + #define TX_EV_COMP_WIDTH 1 + #define TX_EV_DESC_PTR_LBN 0 + #define TX_EV_DESC_PTR_WIDTH 12 +//////////////---- Char or Kernel driver events ----////////////// + #define DRIVER_EV_SUB_CODE_LBN 56 + #define DRIVER_EV_SUB_CODE_WIDTH 4 + #define TX_DESCQ_FLS_DONE_EV_DECODE 0x0 + #define RX_DESCQ_FLS_DONE_EV_DECODE 0x1 + #define EVQ_INIT_DONE_EV_DECODE 0x2 + #define EVQ_NOT_EN_EV_DECODE 0x3 + #define RX_DESCQ_FLSFF_OVFL_EV_DECODE 0x4 + #define SRM_UPD_DONE_EV_DECODE 0x5 + #define WAKE_UP_EV_DECODE 0x6 + #define TX_PKT_NON_TCP_UDP_DECODE 0x9 + #define TIMER_EV_DECODE 0xA + #define RX_DSC_ERROR_EV_DECODE 0xE + #define DRIVER_EV_TX_DESCQ_ID_LBN 0 + #define DRIVER_EV_TX_DESCQ_ID_WIDTH 12 + #define DRIVER_EV_RX_DESCQ_ID_LBN 0 + #define DRIVER_EV_RX_DESCQ_ID_WIDTH 12 + #define DRIVER_EV_EVQ_ID_LBN 0 + #define DRIVER_EV_EVQ_ID_WIDTH 12 + #define DRIVER_TMR_ID_LBN 0 + #define DRIVER_TMR_ID_WIDTH 12 + #define DRIVER_EV_SRM_UPD_LBN 0 + #define DRIVER_EV_SRM_UPD_WIDTH 2 + #define SRM_CLR_EV_DECODE 0 + #define SRM_UPD_EV_DECODE 1 + #define SRM_ILLCLR_EV_DECODE 2 +//////////////---- Global events. Sent to both event queue 0 and 4. ----////////////// + #define XFP_PHY_INTR_LBN 10 + #define XFP_PHY_INTR_WIDTH 1 + #define XG_PHY_INTR_LBN 9 + #define XG_PHY_INTR_WIDTH 1 + #define G_PHY1_INTR_LBN 8 + #define G_PHY1_INTR_WIDTH 1 + #define G_PHY0_INTR_LBN 7 + #define G_PHY0_INTR_WIDTH 1 +//////////////---- Driver generated events ----////////////// + #define DRV_GEN_EV_CODE_LBN 60 + #define DRV_GEN_EV_CODE_WIDTH 4 + #define DRV_GEN_EV_DATA_LBN 0 + #define DRV_GEN_EV_DATA_WIDTH 60 diff -r dd748ded9ba8 drivers/xen/sfc_netfront/ef_vi_internal.h --- /dev/null +++ b/drivers/xen/sfc_netfront/ef_vi_internal.h @@ -0,0 +1,256 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/* + * \author djr + * \brief Really-and-truely-honestly internal stuff for libef. + * \date 2004/06/13 + */ + +/*! \cidoxg_include_ci_ul */ +#ifndef __CI_EF_VI_INTERNAL_H__ +#define __CI_EF_VI_INTERNAL_H__ + + +/* These flags share space with enum ef_vi_flags. */ +#define EF_VI_BUG5692_WORKAROUND 0x10000 + + +/* *********************************************************************** + * COMPILATION CONTROL FLAGS (see ef_vi.h for "workaround" controls) + */ + +#define EF_VI_DO_MAGIC_CHECKS 1 + + +/********************************************************************** + * Headers + */ + +#include +#include "sysdep.h" +#include "ef_vi_falcon.h" + + +/********************************************************************** + * Debugging. + */ + +#ifndef NDEBUG + +# define _ef_assert(exp, file, line) BUG_ON(!(exp)); + +# define _ef_assert2(exp, x, y, file, line) do { \ + if (unlikely(!(exp))) \ + BUG(); \ + } while (0) + +#else + +# define _ef_assert(exp, file, line) +# define _ef_assert2(e, x, y, file, line) + +#endif + +#define ef_assert(a) do{ _ef_assert((a),__FILE__,__LINE__); } while(0) +#define ef_assert_equal(a,b) _ef_assert2((a)==(b),(a),(b),__FILE__,__LINE__) +#define ef_assert_eq ef_assert_equal +#define ef_assert_lt(a,b) _ef_assert2((a)<(b),(a),(b),__FILE__,__LINE__) +#define ef_assert_le(a,b) _ef_assert2((a)<=(b),(a),(b),__FILE__,__LINE__) +#define ef_assert_nequal(a,b) _ef_assert2((a)!=(b),(a),(b),__FILE__,__LINE__) +#define ef_assert_ne ef_assert_nequal +#define ef_assert_ge(a,b) _ef_assert2((a)>=(b),(a),(b),__FILE__,__LINE__) +#define ef_assert_gt(a,b) _ef_assert2((a)>(b),(a),(b),__FILE__,__LINE__) + +/********************************************************************** + * Debug checks. ****************************************************** + **********************************************************************/ + +#ifdef NDEBUG +# define EF_VI_MAGIC_SET(p, type) +# define EF_VI_CHECK_VI(p) +# define EF_VI_CHECK_EVENT_Q(p) +# define EF_VI_CHECK_IOBUFSET(p) +# define EF_VI_CHECK_FILTER(p) +# define EF_VI_CHECK_SHMBUF(p) +# define EF_VI_CHECK_PT_EP(p) +#else +# define EF_VI 0x3 +# define EF_EPLOCK 0x6 +# define EF_IOBUFSET 0x9 +# define EF_FILTER 0xa +# define EF_SHMBUF 0x11 + +# define EF_VI_MAGIC(p, type) \ + (((unsigned)(type) << 28) | \ + (((unsigned)(intptr_t)(p)) & 0x0fffffffu)) + +# if !EF_VI_DO_MAGIC_CHECKS +# define EF_VI_MAGIC_SET(p, type) +# define EF_VI_MAGIC_CHECK(p, type) +# else +# define EF_VI_MAGIC_SET(p, type) \ + do { \ + (p)->magic = EF_VI_MAGIC((p), (type)); \ + } while (0) + +# define EF_VI_MAGIC_OKAY(p, type) \ + ((p)->magic == EF_VI_MAGIC((p), (type))) + +# define EF_VI_MAGIC_CHECK(p, type) \ + ef_assert(EF_VI_MAGIC_OKAY((p), (type))) + +#endif /* EF_VI_DO_MAGIC_CHECKS */ + +# define EF_VI_CHECK_VI(p) \ + ef_assert(p); \ + EF_VI_MAGIC_CHECK((p), EF_VI); + +# define EF_VI_CHECK_EVENT_Q(p) \ + ef_assert(p); \ + EF_VI_MAGIC_CHECK((p), EF_VI); \ + ef_assert((p)->evq_base); \ + ef_assert((p)->evq_mask); + +# define EF_VI_CHECK_PT_EP(p) \ + ef_assert(p); \ + EF_VI_MAGIC_CHECK((p), EF_VI); \ + ef_assert((p)->ep_state); + +# define EF_VI_CHECK_IOBUFSET(p) \ + ef_assert(p); \ + EF_VI_MAGIC_CHECK((p), EF_IOBUFSET) + +# define EF_VI_CHECK_FILTER(p) \ + ef_assert(p); \ + EF_VI_MAGIC_CHECK((p), EF_FILTER); + +# define EF_VI_CHECK_SHMBUF(p) \ + ef_assert(p); \ + EF_VI_MAGIC_CHECK((p), EF_SHMBUF); + +#endif + +#ifndef NDEBUG +# define EF_DRIVER_MAGIC 0x00f00ba4 +# define EF_ASSERT_THIS_DRIVER_VALID(driver) \ + do{ ef_assert(driver); \ + EF_VI_MAGIC_CHECK((driver), EF_DRIVER_MAGIC); \ + ef_assert((driver)->init); }while(0) + +# define EF_ASSERT_DRIVER_VALID() EF_ASSERT_THIS_DRIVER_VALID(&ci_driver) +#else +# define EF_ASSERT_THIS_DRIVER_VALID(driver) +# define EF_ASSERT_DRIVER_VALID() +#endif + + +/* ************************************* + * Power of 2 FIFO + */ + +#define EF_VI_FIFO2_M(f, x) ((x) & ((f)->fifo_mask)) +#define ef_vi_fifo2_valid(f) ((f) && (f)->fifo && (f)->fifo_mask > 0 && \ + (f)->fifo_rd_i <= (f)->fifo_mask && \ + (f)->fifo_wr_i <= (f)->fifo_mask && \ + EF_VI_IS_POW2((f)->fifo_mask+1u)) + +#define ef_vi_fifo2_init(f, cap) \ + do{ ef_assert(EF_VI_IS_POW2((cap) + 1)); \ + (f)->fifo_rd_i = (f)->fifo_wr_i = 0u; \ + (f)->fifo_mask = (cap); \ + }while(0) + +#define ef_vi_fifo2_is_empty(f) ((f)->fifo_rd_i == (f)->fifo_wr_i) +#define ef_vi_fifo2_capacity(f) ((f)->fifo_mask) +#define ef_vi_fifo2_buf_size(f) ((f)->fifo_mask + 1u) +#define ef_vi_fifo2_end(f) ((f)->fifo + ef_vi_fifo2_buf_size(f)) +#define ef_vi_fifo2_peek(f) ((f)->fifo[(f)->fifo_rd_i]) +#define ef_vi_fifo2_poke(f) ((f)->fifo[(f)->fifo_wr_i]) +#define ef_vi_fifo2_num(f) EF_VI_FIFO2_M((f),(f)->fifo_wr_i-(f)->fifo_rd_i) + +#define ef_vi_fifo2_wr_prev(f) \ + do{ (f)->fifo_wr_i = EF_VI_FIFO2_M((f), (f)->fifo_wr_i - 1u); }while(0) +#define ef_vi_fifo2_wr_next(f) \ + do{ (f)->fifo_wr_i = EF_VI_FIFO2_M((f), (f)->fifo_wr_i + 1u); }while(0) +#define ef_vi_fifo2_rd_adv(f, n) \ + do{ (f)->fifo_rd_i = EF_VI_FIFO2_M((f), (f)->fifo_rd_i + (n)); }while(0) +#define ef_vi_fifo2_rd_prev(f) \ + do{ (f)->fifo_rd_i = EF_VI_FIFO2_M((f), (f)->fifo_rd_i - 1u); }while(0) +#define ef_vi_fifo2_rd_next(f) \ + do{ (f)->fifo_rd_i = EF_VI_FIFO2_M((f), (f)->fifo_rd_i + 1u); }while(0) + +#define ef_vi_fifo2_put(f, v) \ + do{ ef_vi_fifo2_poke(f) = (v); ef_vi_fifo2_wr_next(f); }while(0) +#define ef_vi_fifo2_get(f, pv) \ + do{ *(pv) = ef_vi_fifo2_peek(f); ef_vi_fifo2_rd_next(f); }while(0) + + +/* ********************************************************************* + * Eventq handling + */ + +typedef union { + uint64_t u64; + struct { + uint32_t a; + uint32_t b; + } opaque; +} ef_vi_event; + + +#define EF_VI_EVENT_OFFSET(q, i) \ + (((q)->evq_state->evq_ptr - (i) * sizeof(ef_vi_event)) & (q)->evq_mask) + +#define EF_VI_EVENT_PTR(q, i) \ + ((ef_vi_event*) ((q)->evq_base + EF_VI_EVENT_OFFSET((q), (i)))) + +/* ********************************************************************* + * Miscellaneous goodies + */ +#ifdef NDEBUG +# define EF_VI_DEBUG(x) +#else +# define EF_VI_DEBUG(x) x +#endif + +#define EF_VI_ROUND_UP(i, align) (((i)+(align)-1u) & ~((align)-1u)) +#define EF_VI_ALIGN_FWD(p, align) (((p)+(align)-1u) & ~((align)-1u)) +#define EF_VI_ALIGN_BACK(p, align) ((p) & ~((align)-1u)) +#define EF_VI_PTR_ALIGN_BACK(p, align) \ + ((char*)EF_VI_ALIGN_BACK(((intptr_t)(p)), ((intptr_t)(align)))) +#define EF_VI_IS_POW2(x) ((x) && ! ((x) & ((x) - 1))) + + +/* ******************************************************************** + */ + +extern void falcon_vi_init(ef_vi*, void* vvis ) EF_VI_HF; +extern void ef_eventq_state_init(ef_vi* evq) EF_VI_HF; +extern void __ef_init(void) EF_VI_HF; + + +#endif /* __CI_EF_VI_INTERNAL_H__ */ + diff -r dd748ded9ba8 drivers/xen/sfc_netfront/etherfabric/ef_vi.h --- /dev/null +++ b/drivers/xen/sfc_netfront/etherfabric/ef_vi.h @@ -0,0 +1,665 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/* + * \brief Virtual Interface + * \date 2007/05/16 + */ + +#ifndef __EFAB_EF_VI_H__ +#define __EFAB_EF_VI_H__ + + +/********************************************************************** + * Primitive types **************************************************** + **********************************************************************/ + +/* We standardise on the types from stdint.h and synthesise these types + * for compilers/platforms that don't provide them */ + +# include +# define EF_VI_ALIGN(x) __attribute__ ((aligned (x))) +# define ef_vi_inline static inline + + + +/********************************************************************** + * Types ************************************************************** + **********************************************************************/ + +typedef uint32_t ef_eventq_ptr; + +typedef uint64_t ef_addr; +typedef char* ef_vi_ioaddr_t; + +/********************************************************************** + * ef_event *********************************************************** + **********************************************************************/ + +/*! \i_ef_vi A DMA request identifier. +** +** This is an integer token specified by the transport and associated +** with a DMA request. It is returned to the VI user with DMA completion +** events. It is typically used to identify the buffer associated with +** the transfer. +*/ +typedef int ef_request_id; + +typedef union { + uint64_t u64[1]; + uint32_t u32[2]; +} ef_vi_qword; + +typedef ef_vi_qword ef_hw_event; + +#define EF_REQUEST_ID_BITS 16u +#define EF_REQUEST_ID_MASK ((1u << EF_REQUEST_ID_BITS) - 1u) + +/*! \i_ef_event An [ef_event] is a token that identifies something that +** has happened. Examples include packets received, packets transmitted +** and errors. +*/ +typedef union { + struct { + ef_hw_event ev; + unsigned type :16; + } generic; + struct { + ef_hw_event ev; + unsigned type :16; + /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/ + unsigned q_id :16; + unsigned len :16; + unsigned flags :16; + } rx; + struct { /* This *must* have same layout as [rx]. */ + ef_hw_event ev; + unsigned type :16; + /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/ + unsigned q_id :16; + unsigned len :16; + unsigned flags :16; + unsigned subtype :16; + } rx_discard; + struct { + ef_hw_event ev; + unsigned type :16; + /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/ + unsigned q_id :16; + } tx; + struct { + ef_hw_event ev; + unsigned type :16; + /*ef_request_id request_id :EF_REQUEST_ID_BITS;*/ + unsigned q_id :16; + unsigned subtype :16; + } tx_error; + struct { + ef_hw_event ev; + unsigned type :16; + unsigned q_id :16; + } rx_no_desc_trunc; + struct { + ef_hw_event ev; + unsigned type :16; + unsigned data; + } sw; +} ef_event; + + +#define EF_EVENT_TYPE(e) ((e).generic.type) +enum { + /** Good data was received. */ + EF_EVENT_TYPE_RX, + /** Packets have been sent. */ + EF_EVENT_TYPE_TX, + /** Data received and buffer consumed, but something is wrong. */ + EF_EVENT_TYPE_RX_DISCARD, + /** Transmit of packet failed. */ + EF_EVENT_TYPE_TX_ERROR, + /** Received packet was truncated due to lack of descriptors. */ + EF_EVENT_TYPE_RX_NO_DESC_TRUNC, + /** Software generated event. */ + EF_EVENT_TYPE_SW, + /** Event queue overflow. */ + EF_EVENT_TYPE_OFLOW, +}; + +#define EF_EVENT_RX_BYTES(e) ((e).rx.len) +#define EF_EVENT_RX_Q_ID(e) ((e).rx.q_id) +#define EF_EVENT_RX_CONT(e) ((e).rx.flags & EF_EVENT_FLAG_CONT) +#define EF_EVENT_RX_SOP(e) ((e).rx.flags & EF_EVENT_FLAG_SOP) +#define EF_EVENT_RX_ISCSI_OKAY(e) ((e).rx.flags & EF_EVENT_FLAG_ISCSI_OK) +#define EF_EVENT_FLAG_SOP 0x1 +#define EF_EVENT_FLAG_CONT 0x2 +#define EF_EVENT_FLAG_ISCSI_OK 0x4 + +#define EF_EVENT_TX_Q_ID(e) ((e).tx.q_id) + +#define EF_EVENT_RX_DISCARD_Q_ID(e) ((e).rx_discard.q_id) +#define EF_EVENT_RX_DISCARD_LEN(e) ((e).rx_discard.len) +#define EF_EVENT_RX_DISCARD_TYPE(e) ((e).rx_discard.subtype) +enum { + EF_EVENT_RX_DISCARD_CSUM_BAD, + EF_EVENT_RX_DISCARD_CRC_BAD, + EF_EVENT_RX_DISCARD_TRUNC, + EF_EVENT_RX_DISCARD_RIGHTS, + EF_EVENT_RX_DISCARD_OTHER, +}; + +#define EF_EVENT_TX_ERROR_Q_ID(e) ((e).tx_error.q_id) +#define EF_EVENT_TX_ERROR_TYPE(e) ((e).tx_error.subtype) +enum { + EF_EVENT_TX_ERROR_RIGHTS, + EF_EVENT_TX_ERROR_OFLOW, + EF_EVENT_TX_ERROR_2BIG, + EF_EVENT_TX_ERROR_BUS, +}; + +#define EF_EVENT_RX_NO_DESC_TRUNC_Q_ID(e) ((e).rx_no_desc_trunc.q_id) + +#define EF_EVENT_SW_DATA_MASK 0xffff +#define EF_EVENT_SW_DATA(e) ((e).sw.data) + +#define EF_EVENT_FMT "[ev:%x:%08x:%08x]" +#define EF_EVENT_PRI_ARG(e) (unsigned) (e).generic.type, \ + (unsigned) (e).generic.ev.u32[1], \ + (unsigned) (e).generic.ev.u32[0] + +#define EF_GET_HW_EV(e) ((e).generic.ev) +#define EF_GET_HW_EV_PTR(e) (&(e).generic.ev) +#define EF_GET_HW_EV_U64(e) ((e).generic.ev.u64[0]) + + +/* ***************** */ + +/*! Used by netif shared state. Must use types of explicit size. */ +typedef struct { + uint16_t rx_last_desc_ptr; /* for RX duplicates */ + uint8_t bad_sop; /* bad SOP detected */ + uint8_t frag_num; /* next fragment #, 0=>SOP */ +} ef_rx_dup_state_t; + + +/* Max number of ports on any SF NIC. */ +#define EFAB_DMAQS_PER_EVQ_MAX 32 + +typedef struct { + ef_eventq_ptr evq_ptr; + int32_t trashed; + ef_rx_dup_state_t rx_dup_state[EFAB_DMAQS_PER_EVQ_MAX]; +} ef_eventq_state; + + +/*! \i_ef_base [ef_iovec] is similar the standard [struct iovec]. An +** array of these is used to designate a scatter/gather list of I/O +** buffers. +*/ +typedef struct { + ef_addr iov_base EF_VI_ALIGN(8); + unsigned iov_len; +} ef_iovec; + +/* Falcon constants */ +#define TX_EV_DESC_PTR_LBN 0 + +/********************************************************************** + * ef_iobufset ******************************************************** + **********************************************************************/ + +/*! \i_ef_bufs An [ef_iobufset] is a collection of buffers to be used +** with the NIC. +*/ +typedef struct ef_iobufset { + unsigned magic; + unsigned bufs_mmap_bytes; + unsigned bufs_handle; + int bufs_ptr_off; + ef_addr bufs_addr; + unsigned bufs_size; /* size rounded to pow2 */ + int bufs_num; + int faultonaccess; +} ef_iobufset; + + +/********************************************************************** + * ef_vi ************************************************************** + **********************************************************************/ + +enum ef_vi_flags { + EF_VI_RX_SCATTER = 0x1, + EF_VI_ISCSI_RX_HDIG = 0x2, + EF_VI_ISCSI_TX_HDIG = 0x4, + EF_VI_ISCSI_RX_DDIG = 0x8, + EF_VI_ISCSI_TX_DDIG = 0x10, + EF_VI_TX_PHYS_ADDR = 0x20, + EF_VI_RX_PHYS_ADDR = 0x40, + EF_VI_TX_IP_CSUM_DIS = 0x80, + EF_VI_TX_TCPUDP_CSUM_DIS= 0x100, + EF_VI_TX_TCPUDP_ONLY = 0x200, + /* Flags in range 0xXXXX0000 are for internal use. */ +}; + +typedef struct { + uint32_t added; + uint32_t removed; +} ef_vi_txq_state; + +typedef struct { + uint32_t added; + uint32_t removed; +} ef_vi_rxq_state; + +typedef struct { + uint32_t mask; + void* doorbell; + void* descriptors; + uint16_t* ids; + unsigned misalign_mask; +} ef_vi_txq; + +typedef struct { + uint32_t mask; + void* doorbell; + void* descriptors; + uint16_t* ids; +} ef_vi_rxq; + +typedef struct { + ef_eventq_state evq; + ef_vi_txq_state txq; + ef_vi_rxq_state rxq; + /* Followed by request id fifos. */ +} ef_vi_state; + +/*! \i_ef_vi A virtual interface. +** +** An [ef_vi] represents a virtual interface on a specific NIC. A +** virtual interface is a collection of an event queue and two DMA queues +** used to pass Ethernet frames between the transport implementation and +** the network. +*/ +typedef struct ef_vi { + unsigned magic; + + unsigned vi_resource_id; + unsigned vi_resource_handle_hack; + unsigned vi_i; + + char* vi_mem_mmap_ptr; + int vi_mem_mmap_bytes; + char* vi_io_mmap_ptr; + int vi_io_mmap_bytes; + + ef_eventq_state* evq_state; + char* evq_base; + unsigned evq_mask; + ef_vi_ioaddr_t evq_timer_reg; + + ef_vi_txq vi_txq; + ef_vi_rxq vi_rxq; + ef_vi_state* ep_state; + enum ef_vi_flags vi_flags; +} ef_vi; + + +enum ef_vi_arch { + EF_VI_ARCH_FALCON, +}; + + +struct ef_vi_nic_type { + unsigned char arch; + char variant; + unsigned char revision; +}; + + +/* This structure is opaque to the client & used to pass mapping data + * from the resource manager to the ef_vi lib. for ef_vi_init(). + */ +struct vi_mappings { + uint32_t signature; +# define VI_MAPPING_VERSION 0x02 /*Byte: Increment me if struct altered*/ +# define VI_MAPPING_SIGNATURE (0xBA1150 + VI_MAPPING_VERSION) + + struct ef_vi_nic_type nic_type; + + int vi_instance; + + unsigned evq_bytes; + char* evq_base; + ef_vi_ioaddr_t evq_timer_reg; + + unsigned rx_queue_capacity; + ef_vi_ioaddr_t rx_dma_ef1; + char* rx_dma_falcon; + ef_vi_ioaddr_t rx_bell; + + unsigned tx_queue_capacity; + ef_vi_ioaddr_t tx_dma_ef1; + char* tx_dma_falcon; + ef_vi_ioaddr_t tx_bell; +}; +/* This is used by clients to allocate a suitably sized buffer for the + * resource manager to fill & ef_vi_init() to use. */ +#define VI_MAPPINGS_SIZE (sizeof(struct vi_mappings)) + + +/********************************************************************** + * ef_config ********************************************************** + **********************************************************************/ + +struct ef_config_t { + int log; /* debug logging level */ +}; + +extern struct ef_config_t ef_config; + + +/********************************************************************** + * ef_vi ************************************************************** + **********************************************************************/ + +/* Initialise [data_area] with information required to initialise an ef_vi. + * In the following, an unused param should be set to NULL. Note the case + * marked (*) of [iobuf_mmap] for falcon/driver; for normal driver this + * must be NULL. + * + * \param data_area [in,out] required, must ref at least VI_MAPPINGS_SIZE + * bytes + * \param evq_capacity [in] number of events in event queue. Specify 0 for + * no event queue. + * \param rxq_capacity [in] number of descriptors in RX DMA queue. Specify + * 0 for no RX queue. + * \param txq_capacity [in] number of descriptors in TX DMA queue. Specify + * 0 for no TX queue. + * \param mmap_info [in] mem-map info for resource + * \param io_mmap [in] ef1, required + * falcon, required + * \param iobuf_mmap [in] ef1, UL: unused + * falcon, UL: required + */ +extern void ef_vi_init_mapping_vi(void* data_area, struct ef_vi_nic_type, + unsigned rxq_capacity, + unsigned txq_capacity, int instance, + void* io_mmap, void* iobuf_mmap_rx, + void* iobuf_mmap_tx, enum ef_vi_flags); + + +extern void ef_vi_init_mapping_evq(void* data_area, struct ef_vi_nic_type, + int instance, unsigned evq_bytes, + void* base, void* timer_reg); + +ef_vi_inline unsigned ef_vi_resource_id(ef_vi* vi) +{ + return vi->vi_resource_id; +} + +ef_vi_inline enum ef_vi_flags ef_vi_flags(ef_vi* vi) +{ + return vi->vi_flags; +} + + +/********************************************************************** + * Receive interface ************************************************** + **********************************************************************/ + +/*! \i_ef_vi Returns the amount of space in the RX descriptor ring. +** +** \return the amount of space in the queue. +*/ +ef_vi_inline int ef_vi_receive_space(ef_vi* vi) +{ + ef_vi_rxq_state* qs = &vi->ep_state->rxq; + return vi->vi_rxq.mask - (qs->added - qs->removed); +} + + +/*! \i_ef_vi Returns the fill level of the RX descriptor ring. +** +** \return the fill level of the queue. +*/ +ef_vi_inline int ef_vi_receive_fill_level(ef_vi* vi) +{ + ef_vi_rxq_state* qs = &vi->ep_state->rxq; + return qs->added - qs->removed; +} + + +ef_vi_inline int ef_vi_receive_capacity(ef_vi* vi) +{ + return vi->vi_rxq.mask; +} + +/*! \i_ef_vi Complete a receive operation. +** +** When a receive completion event is received, it should be passed to +** this function. The request-id for the buffer that the packet was +** delivered to is returned. +** +** After this function returns, more space may be available in the +** receive queue. +*/ +extern ef_request_id ef_vi_receive_done(const ef_vi*, const ef_event*); + +/*! \i_ef_vi Return request ID indicated by a receive event + */ +ef_vi_inline ef_request_id ef_vi_receive_request_id(const ef_vi* vi, + const ef_event* ef_ev) +{ + const ef_vi_qword* ev = EF_GET_HW_EV_PTR(*ef_ev); + return ev->u32[0] & vi->vi_rxq.mask; +} + + +/*! \i_ef_vi Form a receive descriptor. +** +** If \c initial_rx_bytes is zero use a reception size at least as large +** as an MTU. +*/ +extern int ef_vi_receive_init(ef_vi* vi, ef_addr addr, ef_request_id dma_id, + int intial_rx_bytes); + +/*! \i_ef_vi Submit initialised receive descriptors to the NIC. */ +extern void ef_vi_receive_push(ef_vi* vi); + +/*! \i_ef_vi Post a buffer on the receive queue. +** +** \return 0 on success, or -EAGAIN if the receive queue is full +*/ +extern int ef_vi_receive_post(ef_vi*, ef_addr addr, + ef_request_id dma_id); + +/********************************************************************** + * Transmit interface ************************************************* + **********************************************************************/ + +/*! \i_ef_vi Return the amount of space (in descriptors) in the transmit +** queue. +** +** \return the amount of space in the queue (in descriptors) +*/ +ef_vi_inline int ef_vi_transmit_space(ef_vi* vi) +{ + ef_vi_txq_state* qs = &vi->ep_state->txq; + return vi->vi_txq.mask - (qs->added - qs->removed); +} + + +/*! \i_ef_vi Returns the fill level of the TX descriptor ring. +** +** \return the fill level of the queue. +*/ +ef_vi_inline int ef_vi_transmit_fill_level(ef_vi* vi) +{ + ef_vi_txq_state* qs = &vi->ep_state->txq; + return qs->added - qs->removed; +} + + +/*! \i_ef_vi Returns the total capacity of the TX descriptor ring. +** +** \return the capacity of the queue. +*/ +ef_vi_inline int ef_vi_transmit_capacity(ef_vi* vi) +{ + return vi->vi_txq.mask; +} + + +/*! \i_ef_vi Transmit a packet. +** +** \param bytes must be greater than ETH_ZLEN. +** \return -EAGAIN if the transmit queue is full, or 0 on success +*/ +extern int ef_vi_transmit(ef_vi*, ef_addr, int bytes, ef_request_id dma_id); + +/*! \i_ef_vi Transmit a packet using a gather list. +** +** \param iov_len must be greater than zero +** \param iov the first must be non-zero in length (but others need not) +** +** \return -EAGAIN if the queue is full, or 0 on success +*/ +extern int ef_vi_transmitv(ef_vi*, const ef_iovec* iov, int iov_len, + ef_request_id dma_id); + +/*! \i_ef_vi Initialise a DMA request. +** +** \return -EAGAIN if the queue is full, or 0 on success +*/ +extern int ef_vi_transmit_init(ef_vi*, ef_addr, int bytes, + ef_request_id dma_id); + +/*! \i_ef_vi Initialise a DMA request. +** +** \return -EAGAIN if the queue is full, or 0 on success +*/ +extern int ef_vi_transmitv_init(ef_vi*, const ef_iovec*, int iov_len, + ef_request_id dma_id); + +/*! \i_ef_vi Submit DMA requests to the NIC. +** +** The DMA requests must have been initialised using +** ef_vi_transmit_init() or ef_vi_transmitv_init(). +*/ +extern void ef_vi_transmit_push(ef_vi*); + + +/*! \i_ef_vi Maximum number of transmit completions per transmit event. */ +#define EF_VI_TRANSMIT_BATCH 64 + +/*! \i_ef_vi Determine the set of [ef_request_id]s for each DMA request +** which has been completed by a given transmit completion +** event. +** +** \param ids must point to an array of length EF_VI_TRANSMIT_BATCH +** \return the number of valid [ef_request_id]s (can be zero) +*/ +extern int ef_vi_transmit_unbundle(ef_vi* ep, const ef_event*, + ef_request_id* ids); + + +/*! \i_ef_event Returns true if ef_eventq_poll() will return event(s). */ +extern int ef_eventq_has_event(ef_vi* vi); + +/*! \i_ef_event Returns true if there are quite a few events in the event +** queue. +** +** This looks ahead in the event queue, so has the property that it will +** not ping-pong a cache-line when it is called concurrently with events +** being delivered. +*/ +extern int ef_eventq_has_many_events(ef_vi* evq, int look_ahead); + +/*! Type of function to handle unknown events arriving on event queue +** Return CI_TRUE iff the event has been handled. +*/ +typedef int/*bool*/ ef_event_handler_fn(void* priv, ef_vi* evq, ef_event* ev); + +/*! Standard poll exception routine */ +extern int/*bool*/ ef_eventq_poll_exception(void* priv, ef_vi* evq, + ef_event* ev); + +/*! \i_ef_event Retrieve events from the event queue, handle RX/TX events +** and pass any others to an exception handler function +** +** \return The number of events retrieved. +*/ +extern int ef_eventq_poll_evs(ef_vi* evq, ef_event* evs, int evs_len, + ef_event_handler_fn *exception, void *expt_priv); + +/*! \i_ef_event Retrieve events from the event queue. +** +** \return The number of events retrieved. +*/ +ef_vi_inline int ef_eventq_poll(ef_vi* evq, ef_event* evs, int evs_len) +{ + return ef_eventq_poll_evs(evq, evs, evs_len, + &ef_eventq_poll_exception, (void*)0); +} + +/*! \i_ef_event Returns the capacity of an event queue. */ +ef_vi_inline int ef_eventq_capacity(ef_vi* vi) +{ + return (vi->evq_mask + 1u) / sizeof(ef_hw_event); +} + +/* Returns the instance ID of [vi] */ +ef_vi_inline unsigned ef_vi_instance(ef_vi* vi) +{ return vi->vi_i; } + + +/********************************************************************** + * Initialisation ***************************************************** + **********************************************************************/ + +/*! Return size of state buffer of an initialised VI. */ +extern int ef_vi_state_bytes(ef_vi*); + +/*! Return size of buffer needed for VI state given sizes of RX and TX +** DMA queues. Queue sizes must be legal sizes (power of 2), or 0 (no +** queue). +*/ +extern int ef_vi_calc_state_bytes(int rxq_size, int txq_size); + +/*! Initialise [ef_vi] from the provided resources. [vvis] must have been +** created by ef_make_vi_data() & remains owned by the caller. +*/ +extern void ef_vi_init(ef_vi*, void* vi_info, ef_vi_state* state, + ef_eventq_state* evq_state, enum ef_vi_flags); + +extern void ef_vi_state_init(ef_vi*); +extern void ef_eventq_state_init(ef_vi*); + +/*! Convert an efhw device arch to ef_vi_arch, or returns -1 if not +** recognised. +*/ +extern int ef_vi_arch_from_efhw_arch(int efhw_arch); + + +#endif /* __EFAB_EF_VI_H__ */ diff -r dd748ded9ba8 drivers/xen/sfc_netfront/falcon_event.c --- /dev/null +++ b/drivers/xen/sfc_netfront/falcon_event.c @@ -0,0 +1,346 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/* + * \author djr + * \brief Routine to poll event queues. + * \date 2003/03/04 + */ + +/*! \cidoxg_lib_ef */ +#include "ef_vi_internal.h" + +/* Be worried about this on byteswapped machines */ +/* Due to crazy chipsets, we see the event words being written in +** arbitrary order (bug4539). So test for presence of event must ensure +** that both halves have changed from the null. +*/ +# define EF_VI_IS_EVENT(evp) \ + ( (((evp)->opaque.a != (uint32_t)-1) && \ + ((evp)->opaque.b != (uint32_t)-1)) ) + + +#ifdef NDEBUG +# define IS_DEBUG 0 +#else +# define IS_DEBUG 1 +#endif + + +/*! Check for RX events with inconsistent SOP/CONT +** +** Returns true if this event should be discarded +*/ +ef_vi_inline int ef_eventq_is_rx_sop_cont_bad_efab(ef_vi* vi, + const ef_vi_qword* ev) +{ + ef_rx_dup_state_t* rx_dup_state; + uint8_t* bad_sop; + + unsigned label = QWORD_GET_U(RX_EV_Q_LABEL, *ev); + unsigned sop = QWORD_TEST_BIT(RX_SOP, *ev); + + ef_assert(vi); + ef_assert_lt(label, EFAB_DMAQS_PER_EVQ_MAX); + + rx_dup_state = &vi->evq_state->rx_dup_state[label]; + bad_sop = &rx_dup_state->bad_sop; + + if( ! ((vi->vi_flags & EF_VI_BUG5692_WORKAROUND) || IS_DEBUG) ) { + *bad_sop = (*bad_sop && !sop); + } + else { + unsigned cont = QWORD_TEST_BIT(RX_JUMBO_CONT, *ev); + uint8_t *frag_num = &rx_dup_state->frag_num; + + /* bad_sop should latch till the next sop */ + *bad_sop = (*bad_sop && !sop) || ( !!sop != (*frag_num==0) ); + + /* we do not check the number of bytes relative to the + * fragment number and size of the user rx buffer here + * because we don't know the size of the user rx + * buffer - we probably should perform this check in + * the nearest code calling this though. + */ + *frag_num = cont ? (*frag_num + 1) : 0; + } + + return *bad_sop; +} + + +ef_vi_inline int falcon_rx_check_dup(ef_vi* evq, ef_event* ev_out, + const ef_vi_qword* ev) +{ + unsigned q_id = QWORD_GET_U(RX_EV_Q_LABEL, *ev); + unsigned desc_ptr = QWORD_GET_U(RX_EV_DESC_PTR, *ev); + ef_rx_dup_state_t* rx_dup_state = &evq->evq_state->rx_dup_state[q_id]; + + if(likely( desc_ptr != rx_dup_state->rx_last_desc_ptr )) { + rx_dup_state->rx_last_desc_ptr = desc_ptr; + return 0; + } + + rx_dup_state->rx_last_desc_ptr = desc_ptr; + rx_dup_state->bad_sop = 1; +#ifndef NDEBUG + rx_dup_state->frag_num = 0; +#endif + BUG_ON(!QWORD_TEST_BIT(RX_EV_FRM_TRUNC, *ev)); + BUG_ON( QWORD_TEST_BIT(RX_EV_PKT_OK, *ev)); + BUG_ON(!QWORD_GET_U(RX_EV_BYTE_CNT, *ev) == 0); + ev_out->rx_no_desc_trunc.type = EF_EVENT_TYPE_RX_NO_DESC_TRUNC; + ev_out->rx_no_desc_trunc.q_id = q_id; + return 1; +} + + +ef_vi_inline void falcon_rx_event(ef_event* ev_out, const ef_vi_qword* ev) +{ + if(likely( QWORD_TEST_BIT(RX_EV_PKT_OK, *ev) )) { + ev_out->rx.type = EF_EVENT_TYPE_RX; + ev_out->rx.q_id = QWORD_GET_U(RX_EV_Q_LABEL, *ev); + ev_out->rx.len = QWORD_GET_U(RX_EV_BYTE_CNT, *ev); + if( QWORD_TEST_BIT(RX_SOP, *ev) ) + ev_out->rx.flags = EF_EVENT_FLAG_SOP; + else + ev_out->rx.flags = 0; + if( QWORD_TEST_BIT(RX_JUMBO_CONT, *ev) ) + ev_out->rx.flags |= EF_EVENT_FLAG_CONT; + if( QWORD_TEST_BIT(RX_iSCSI_PKT_OK, *ev) ) + ev_out->rx.flags |= EF_EVENT_FLAG_ISCSI_OK; + } + else { + ev_out->rx_discard.type = EF_EVENT_TYPE_RX_DISCARD; + ev_out->rx_discard.q_id = QWORD_GET_U(RX_EV_Q_LABEL, *ev); + ev_out->rx_discard.len = QWORD_GET_U(RX_EV_BYTE_CNT, *ev); +#if 1 /* hack for ptloop compatability: ?? TODO purge */ + if( QWORD_TEST_BIT(RX_SOP, *ev) ) + ev_out->rx_discard.flags = EF_EVENT_FLAG_SOP; + else + ev_out->rx_discard.flags = 0; + if( QWORD_TEST_BIT(RX_JUMBO_CONT, *ev) ) + ev_out->rx_discard.flags |= EF_EVENT_FLAG_CONT; + if( QWORD_TEST_BIT(RX_iSCSI_PKT_OK, *ev) ) + ev_out->rx_discard.flags |= EF_EVENT_FLAG_ISCSI_OK; +#endif + /* Order matters here: more fundamental errors first. */ + if( QWORD_TEST_BIT(RX_EV_BUF_OWNER_ID_ERR, *ev) ) + ev_out->rx_discard.subtype = + EF_EVENT_RX_DISCARD_RIGHTS; + else if( QWORD_TEST_BIT(RX_EV_FRM_TRUNC, *ev) ) + ev_out->rx_discard.subtype = + EF_EVENT_RX_DISCARD_TRUNC; + else if( QWORD_TEST_BIT(RX_EV_ETH_CRC_ERR, *ev) ) + ev_out->rx_discard.subtype = + EF_EVENT_RX_DISCARD_CRC_BAD; + else if( QWORD_TEST_BIT(RX_EV_IP_HDR_CHKSUM_ERR, *ev) ) + ev_out->rx_discard.subtype = + EF_EVENT_RX_DISCARD_CSUM_BAD; + else if( QWORD_TEST_BIT(RX_EV_TCP_UDP_CHKSUM_ERR, *ev) ) + ev_out->rx_discard.subtype = + EF_EVENT_RX_DISCARD_CSUM_BAD; + else + ev_out->rx_discard.subtype = + EF_EVENT_RX_DISCARD_OTHER; + } +} + + +ef_vi_inline void falcon_tx_event(ef_event* ev_out, const ef_vi_qword* ev) +{ + /* Danger danger! No matter what we ask for wrt batching, we + ** will get a batched event every 16 descriptors, and we also + ** get dma-queue-empty events. i.e. Duplicates are expected. + ** + ** In addition, if it's been requested in the descriptor, we + ** get an event per descriptor. (We don't currently request + ** this). + */ + if(likely( QWORD_TEST_BIT(TX_EV_COMP, *ev) )) { + ev_out->tx.type = EF_EVENT_TYPE_TX; + ev_out->tx.q_id = QWORD_GET_U(TX_EV_Q_LABEL, *ev); + } + else { + ev_out->tx_error.type = EF_EVENT_TYPE_TX_ERROR; + ev_out->tx_error.q_id = QWORD_GET_U(TX_EV_Q_LABEL, *ev); + if(likely( QWORD_TEST_BIT(TX_EV_BUF_OWNER_ID_ERR, *ev) )) + ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_RIGHTS; + else if(likely( QWORD_TEST_BIT(TX_EV_WQ_FF_FULL, *ev) )) + ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_OFLOW; + else if(likely( QWORD_TEST_BIT(TX_EV_PKT_TOO_BIG, *ev) )) + ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_2BIG; + else if(likely( QWORD_TEST_BIT(TX_EV_PKT_ERR, *ev) )) + ev_out->tx_error.subtype = EF_EVENT_TX_ERROR_BUS; + } +} + + +static void mark_bad(ef_event* ev) +{ + ev->generic.ev.u64[0] &=~ ((uint64_t) 1u << RX_EV_PKT_OK_LBN); +} + + +int ef_eventq_poll_evs(ef_vi* evq, ef_event* evs, int evs_len, + ef_event_handler_fn *exception, void *expt_priv) +{ + int evs_len_orig = evs_len; + + EF_VI_CHECK_EVENT_Q(evq); + ef_assert(evs); + ef_assert_gt(evs_len, 0); + + if(unlikely( EF_VI_IS_EVENT(EF_VI_EVENT_PTR(evq, 1)) )) + goto overflow; + + do { + { /* Read the event out of the ring, then fiddle with + * copied version. Reason is that the ring is + * likely to get pushed out of cache by another + * event being delivered by hardware. */ + ef_vi_event* ev = EF_VI_EVENT_PTR(evq, 0); + if( ! EF_VI_IS_EVENT(ev) ) + break; + evs->generic.ev.u64[0] = cpu_to_le64 (ev->u64); + evq->evq_state->evq_ptr += sizeof(ef_vi_event); + ev->u64 = (uint64_t)(int64_t) -1; + } + + /* Ugly: Exploit the fact that event code lies in top + * bits of event. */ + ef_assert_ge(EV_CODE_LBN, 32u); + switch( evs->generic.ev.u32[1] >> (EV_CODE_LBN - 32u) ) { + case RX_IP_EV_DECODE: + /* Look for duplicate desc_ptr: it signals + * that a jumbo frame was truncated because we + * ran out of descriptors. */ + if(unlikely( falcon_rx_check_dup + (evq, evs, &evs->generic.ev) )) { + --evs_len; + ++evs; + break; + } + else { + /* Cope with FalconA1 bugs where RX + * gives inconsistent RX events Mark + * events as bad until SOP becomes + * consistent again + * ef_eventq_is_rx_sop_cont_bad() has + * side effects - order is important + */ + if(unlikely + (ef_eventq_is_rx_sop_cont_bad_efab + (evq, &evs->generic.ev) )) { + mark_bad(evs); + } + } + falcon_rx_event(evs, &evs->generic.ev); + --evs_len; + ++evs; + break; + + case TX_IP_EV_DECODE: + falcon_tx_event(evs, &evs->generic.ev); + --evs_len; + ++evs; + break; + + default: + break; + } + } while( evs_len ); + + return evs_len_orig - evs_len; + + + overflow: + evs->generic.type = EF_EVENT_TYPE_OFLOW; + evs->generic.ev.u64[0] = (uint64_t)((int64_t)-1); + return 1; +} + + +int/*bool*/ ef_eventq_poll_exception(void* priv, ef_vi* evq, ef_event* ev) +{ + int /*bool*/ handled = 0; + + switch( ev->generic.ev.u32[1] >> (EV_CODE_LBN - 32u) ) { + case DRIVER_EV_DECODE: + if( QWORD_GET_U(DRIVER_EV_SUB_CODE, ev->generic.ev) == + EVQ_INIT_DONE_EV_DECODE ) + /* EVQ initialised event: ignore. */ + handled = 1; + break; + } + return handled; +} + + +void ef_eventq_iterate(ef_vi* vi, + void (*fn)(void* arg, ef_vi*, int rel_pos, + int abs_pos, void* event), + void* arg, int stop_at_end) +{ + int i, size_evs = (vi->evq_mask + 1) / sizeof(ef_vi_event); + + for( i = 0; i < size_evs; ++i ) { + ef_vi_event* e = EF_VI_EVENT_PTR(vi, -i); + if( EF_VI_IS_EVENT(e) ) + fn(arg, vi, i, + EF_VI_EVENT_OFFSET(vi, -i) / sizeof(ef_vi_event), + e); + else if( stop_at_end ) + break; + } +} + + +int ef_eventq_has_event(ef_vi* vi) +{ + return EF_VI_IS_EVENT(EF_VI_EVENT_PTR(vi, 0)); +} + + +int ef_eventq_has_many_events(ef_vi* vi, int look_ahead) +{ + ef_assert_ge(look_ahead, 0); + return EF_VI_IS_EVENT(EF_VI_EVENT_PTR(vi, -look_ahead)); +} + + +int ef_eventq_has_rx_event(ef_vi* vi) +{ + ef_vi_event* ev; + int i, n_evs = 0; + + for( i = 0; EF_VI_IS_EVENT(EF_VI_EVENT_PTR(vi, i)); --i ) { + ev = EF_VI_EVENT_PTR(vi, i); + if( EFVI_FALCON_EVENT_CODE(ev) == EF_EVENT_TYPE_RX ) n_evs++; + } + return n_evs; +} + +/*! \cidoxg_end */ diff -r dd748ded9ba8 drivers/xen/sfc_netfront/falcon_vi.c --- /dev/null +++ b/drivers/xen/sfc_netfront/falcon_vi.c @@ -0,0 +1,465 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/* + * \author djr, stg + * \brief Falcon-specific VI + * \date 2006/11/30 + */ + +#include "ef_vi_internal.h" + + +#define EFVI_FALCON_DMA_TX_FRAG 1 + + +/* TX descriptor for both physical and virtual packet transfers */ +typedef union { + uint32_t dword[2]; +} ef_vi_falcon_dma_tx_buf_desc; +typedef ef_vi_falcon_dma_tx_buf_desc ef_vi_falcon_dma_tx_phys_desc; + + +/* RX descriptor for physical addressed transfers */ +typedef union { + uint32_t dword[2]; +} ef_vi_falcon_dma_rx_phys_desc; + + +/* RX descriptor for virtual packet transfers */ +typedef struct { + uint32_t dword[1]; +} ef_vi_falcon_dma_rx_buf_desc; + +/* Buffer table index */ +typedef uint32_t ef_vi_buffer_addr_t; + +ef_vi_inline int64_t dma_addr_to_u46(int64_t src_dma_addr) +{ + return (src_dma_addr & __FALCON_MASK(46, int64_t)); +} + +/*! Setup a physical address based descriptor with a specified length */ +ef_vi_inline void +__falcon_dma_rx_calc_ip_phys(ef_vi_dma_addr_t dest_pa, + ef_vi_falcon_dma_rx_phys_desc *desc, + int bytes) +{ + int region = 0; /* TODO fixme */ + int64_t dest = dma_addr_to_u46(dest_pa); /* lower 46 bits */ + + DWCHCK(__DW2(RX_KER_BUF_SIZE_LBN), RX_KER_BUF_SIZE_WIDTH); + DWCHCK(__DW2(RX_KER_BUF_REGION_LBN),RX_KER_BUF_REGION_WIDTH); + + LWCHK(RX_KER_BUF_ADR_LBN, RX_KER_BUF_ADR_WIDTH); + + RANGECHCK(bytes, RX_KER_BUF_SIZE_WIDTH); + RANGECHCK(region, RX_KER_BUF_REGION_WIDTH); + + ef_assert(desc); + + desc->dword[1] = ((bytes << __DW2(RX_KER_BUF_SIZE_LBN)) | + (region << __DW2(RX_KER_BUF_REGION_LBN)) | + (HIGH(dest, + RX_KER_BUF_ADR_LBN, + RX_KER_BUF_ADR_WIDTH))); + + desc->dword[0] = LOW(dest, + RX_KER_BUF_ADR_LBN, + RX_KER_BUF_ADR_WIDTH); +} + +/*! Setup a virtual buffer descriptor for an IPMODE transfer */ +ef_vi_inline void +__falcon_dma_tx_calc_ip_buf(unsigned buf_id, unsigned buf_ofs, unsigned bytes, + int port, int frag, + ef_vi_falcon_dma_tx_buf_desc *desc) +{ + DWCHCK(__DW2(TX_USR_PORT_LBN), TX_USR_PORT_WIDTH); + DWCHCK(__DW2(TX_USR_CONT_LBN), TX_USR_CONT_WIDTH); + DWCHCK(__DW2(TX_USR_BYTE_CNT_LBN), TX_USR_BYTE_CNT_WIDTH); + LWCHK(RX_KER_BUF_ADR_LBN, RX_KER_BUF_ADR_WIDTH); + DWCHCK(TX_USR_BYTE_OFS_LBN, TX_USR_BYTE_OFS_WIDTH); + + RANGECHCK(bytes, TX_USR_BYTE_CNT_WIDTH); + RANGECHCK(port, TX_USR_PORT_WIDTH); + RANGECHCK(frag, TX_USR_CONT_WIDTH); + RANGECHCK(buf_id, TX_USR_BUF_ID_WIDTH); + RANGECHCK(buf_ofs, TX_USR_BYTE_OFS_WIDTH); + + ef_assert(desc); + + desc->dword[1] = ((port << __DW2(TX_USR_PORT_LBN)) | + (frag << __DW2(TX_USR_CONT_LBN)) | + (bytes << __DW2(TX_USR_BYTE_CNT_LBN)) | + (HIGH(buf_id, + TX_USR_BUF_ID_LBN, + TX_USR_BUF_ID_WIDTH))); + + desc->dword[0] = ((LOW(buf_id, + TX_USR_BUF_ID_LBN, + (TX_USR_BUF_ID_WIDTH))) | + (buf_ofs << TX_USR_BYTE_OFS_LBN)); +} + +ef_vi_inline void +falcon_dma_tx_calc_ip_buf_4k(unsigned buf_vaddr, unsigned bytes, + int port, int frag, + ef_vi_falcon_dma_tx_buf_desc *desc) +{ + /* TODO FIXME [buf_vaddr] consists of the buffer index in the + ** high bits, and an offset in the low bits. Assumptions + ** permate the code that these can be rolled into one 32bit + ** value, so this is currently preserved for Falcon. But we + ** should change to support 8K pages + */ + unsigned buf_id = EFVI_FALCON_BUFFER_4K_PAGE(buf_vaddr); + unsigned buf_ofs = EFVI_FALCON_BUFFER_4K_OFF(buf_vaddr); + + __falcon_dma_tx_calc_ip_buf( buf_id, buf_ofs, bytes, port, frag, desc); +} + +ef_vi_inline void +falcon_dma_tx_calc_ip_buf(unsigned buf_vaddr, unsigned bytes, int port, + int frag, ef_vi_falcon_dma_tx_buf_desc *desc) +{ + falcon_dma_tx_calc_ip_buf_4k(buf_vaddr, bytes, port, frag, desc); +} + +/*! Setup a virtual buffer based descriptor */ +ef_vi_inline void +__falcon_dma_rx_calc_ip_buf(unsigned buf_id, unsigned buf_ofs, + ef_vi_falcon_dma_rx_buf_desc *desc) +{ + /* check alignment of buffer offset and pack */ + ef_assert((buf_ofs & 0x1) == 0); + + buf_ofs >>= 1; + + DWCHCK(RX_USR_2BYTE_OFS_LBN, RX_USR_2BYTE_OFS_WIDTH); + DWCHCK(RX_USR_BUF_ID_LBN, RX_USR_BUF_ID_WIDTH); + + RANGECHCK(buf_ofs, RX_USR_2BYTE_OFS_WIDTH); + RANGECHCK(buf_id, RX_USR_BUF_ID_WIDTH); + + ef_assert(desc); + + desc->dword[0] = ((buf_ofs << RX_USR_2BYTE_OFS_LBN) | + (buf_id << RX_USR_BUF_ID_LBN)); +} + +ef_vi_inline void +falcon_dma_rx_calc_ip_buf_4k(unsigned buf_vaddr, + ef_vi_falcon_dma_rx_buf_desc *desc) +{ + /* TODO FIXME [buf_vaddr] consists of the buffer index in the + ** high bits, and an offset in the low bits. Assumptions + ** permeate the code that these can be rolled into one 32bit + ** value, so this is currently preserved for Falcon. But we + ** should change to support 8K pages + */ + unsigned buf_id = EFVI_FALCON_BUFFER_4K_PAGE(buf_vaddr); + unsigned buf_ofs = EFVI_FALCON_BUFFER_4K_OFF(buf_vaddr); + + __falcon_dma_rx_calc_ip_buf(buf_id, buf_ofs, desc); +} + +ef_vi_inline void +falcon_dma_rx_calc_ip_buf(unsigned buf_vaddr, + ef_vi_falcon_dma_rx_buf_desc *desc) +{ + falcon_dma_rx_calc_ip_buf_4k(buf_vaddr, desc); +} + + +ef_vi_inline ef_vi_dma_addr_t ef_physaddr(ef_addr efaddr) +{ + return (ef_vi_dma_addr_t) efaddr; +} + + +/*! Convert between an ef_addr and a buffer table index +** Assert that this was not a physical address +*/ +ef_vi_inline ef_vi_buffer_addr_t ef_bufaddr(ef_addr efaddr) +{ + ef_assert(efaddr < ((uint64_t)1 << 32) ); + + return (ef_vi_buffer_addr_t) efaddr; +} + + +/*! Setup an physical address based descriptor for an IPMODE transfer */ +ef_vi_inline void +falcon_dma_tx_calc_ip_phys(ef_vi_dma_addr_t src_dma_addr, unsigned bytes, + int port, int frag, + ef_vi_falcon_dma_tx_phys_desc *desc) +{ + + int region = 0; /* FIXME */ + int64_t src = dma_addr_to_u46(src_dma_addr); /* lower 46 bits */ + + DWCHCK(__DW2(TX_KER_PORT_LBN), TX_KER_PORT_WIDTH); + DWCHCK(__DW2(TX_KER_CONT_LBN), TX_KER_CONT_WIDTH); + DWCHCK(__DW2(TX_KER_BYTE_CNT_LBN), TX_KER_BYTE_CNT_WIDTH); + DWCHCK(__DW2(TX_KER_BUF_REGION_LBN),TX_KER_BUF_REGION_WIDTH); + + LWCHK(TX_KER_BUF_ADR_LBN, TX_KER_BUF_ADR_WIDTH); + + RANGECHCK(port, TX_KER_PORT_WIDTH); + RANGECHCK(frag, TX_KER_CONT_WIDTH); + RANGECHCK(bytes, TX_KER_BYTE_CNT_WIDTH); + RANGECHCK(region, TX_KER_BUF_REGION_WIDTH); + + desc->dword[1] = ((port << __DW2(TX_KER_PORT_LBN)) | + (frag << __DW2(TX_KER_CONT_LBN)) | + (bytes << __DW2(TX_KER_BYTE_CNT_LBN)) | + (region << __DW2(TX_KER_BUF_REGION_LBN)) | + (HIGH(src, + TX_KER_BUF_ADR_LBN, + TX_KER_BUF_ADR_WIDTH))); + + ef_assert_equal(TX_KER_BUF_ADR_LBN, 0); + desc->dword[0] = (uint32_t) src_dma_addr; +} + + +void falcon_vi_init(ef_vi* vi, void* vvis) +{ + struct vi_mappings *vm = (struct vi_mappings*)vvis; + uint16_t* ids; + + ef_assert(vi); + ef_assert(vvis); + ef_assert_equal(vm->signature, VI_MAPPING_SIGNATURE); + ef_assert_equal(vm->nic_type.arch, EF_VI_ARCH_FALCON); + + /* Initialise masks to zero, so that ef_vi_state_init() will + ** not do any harm when we don't have DMA queues. */ + vi->vi_rxq.mask = vi->vi_txq.mask = 0; + + /* Used for BUG5391_WORKAROUND. */ + vi->vi_txq.misalign_mask = 0; + + /* Initialise doorbell addresses to a distinctive small value + ** which will cause a segfault, to trap doorbell pushes to VIs + ** without DMA queues. */ + vi->vi_rxq.doorbell = vi->vi_txq.doorbell = (ef_vi_ioaddr_t)0xdb; + + ids = (uint16_t*) (vi->ep_state + 1); + + if( vm->tx_queue_capacity ) { + vi->vi_txq.mask = vm->tx_queue_capacity - 1; + vi->vi_txq.doorbell = vm->tx_bell + 12; + vi->vi_txq.descriptors = vm->tx_dma_falcon; + vi->vi_txq.ids = ids; + ids += vi->vi_txq.mask + 1; + /* Check that the id fifo fits in the space allocated. */ + ef_assert_le((char*) (vi->vi_txq.ids + vm->tx_queue_capacity), + (char*) vi->ep_state + + ef_vi_calc_state_bytes(vm->rx_queue_capacity, + vm->tx_queue_capacity)); + } + if( vm->rx_queue_capacity ) { + vi->vi_rxq.mask = vm->rx_queue_capacity - 1; + vi->vi_rxq.doorbell = vm->rx_bell + 12; + vi->vi_rxq.descriptors = vm->rx_dma_falcon; + vi->vi_rxq.ids = ids; + /* Check that the id fifo fits in the space allocated. */ + ef_assert_le((char*) (vi->vi_rxq.ids + vm->rx_queue_capacity), + (char*) vi->ep_state + + ef_vi_calc_state_bytes(vm->rx_queue_capacity, + vm->tx_queue_capacity)); + } + + if( vm->nic_type.variant == 'A' ) { + vi->vi_txq.misalign_mask = 15; /* BUG5391_WORKAROUND */ + vi->vi_flags |= EF_VI_BUG5692_WORKAROUND; + } +} + + +int ef_vi_transmitv_init(ef_vi* vi, const ef_iovec* iov, int iov_len, + ef_request_id dma_id) +{ + ef_vi_txq* q = &vi->vi_txq; + ef_vi_txq_state* qs = &vi->ep_state->txq; + ef_vi_falcon_dma_tx_buf_desc* dp; + unsigned len, dma_len, di; + unsigned added_save = qs->added; + ef_addr dma_addr; + unsigned last_len = 0; + + ef_assert(iov_len > 0); + ef_assert(iov); + ef_assert_equal((dma_id & EF_REQUEST_ID_MASK), dma_id); + ef_assert_nequal(dma_id, 0xffff); + + dma_addr = iov->iov_base; + len = iov->iov_len; + + if( vi->vi_flags & EF_VI_ISCSI_TX_DDIG ) { + /* Last 4 bytes of placeholder for digest must be + * removed for h/w */ + ef_assert(len > 4); + last_len = iov[iov_len - 1].iov_len; + if( last_len <= 4 ) { + ef_assert(iov_len > 1); + --iov_len; + last_len = iov[iov_len - 1].iov_len - (4 - last_len); + } + else { + last_len = iov[iov_len - 1].iov_len - 4; + } + if( iov_len == 1 ) + len = last_len; + } + + while( 1 ) { + if( qs->added - qs->removed >= q->mask ) { + qs->added = added_save; + return -EAGAIN; + } + + dma_len = (~((unsigned) dma_addr) & 0xfff) + 1; + if( dma_len > len ) dma_len = len; + { /* BUG5391_WORKAROUND */ + unsigned misalign = + (unsigned) dma_addr & q->misalign_mask; + if( misalign && dma_len + misalign > 512 ) + dma_len = 512 - misalign; + } + + di = qs->added++ & q->mask; + dp = (ef_vi_falcon_dma_tx_buf_desc*) q->descriptors + di; + if( vi->vi_flags & EF_VI_TX_PHYS_ADDR ) + falcon_dma_tx_calc_ip_phys + (ef_physaddr(dma_addr), dma_len, /*port*/ 0, + (iov_len == 1 && dma_len == len) ? 0 : + EFVI_FALCON_DMA_TX_FRAG, dp); + else + falcon_dma_tx_calc_ip_buf + (ef_bufaddr(dma_addr), dma_len, /*port*/ 0, + (iov_len == 1 && dma_len == len) ? 0 : + EFVI_FALCON_DMA_TX_FRAG, dp); + + dma_addr += dma_len; + len -= dma_len; + + if( len == 0 ) { + if( --iov_len == 0 ) break; + ++iov; + dma_addr = iov->iov_base; + len = iov->iov_len; + if( (vi->vi_flags & EF_VI_ISCSI_TX_DDIG) && + (iov_len == 1) ) + len = last_len; + } + } + + q->ids[di] = (uint16_t) dma_id; + return 0; +} + + +void ef_vi_transmit_push(ef_vi* vi) +{ + ef_vi_wiob(); + writel((vi->ep_state->txq.added & vi->vi_txq.mask) << + __DW4(TX_DESC_WPTR_LBN), + vi->vi_txq.doorbell); +} + + +/*! The value of initial_rx_bytes is used to set RX_KER_BUF_SIZE in an initial +** receive descriptor here if physical addressing is being used. A value of +** zero represents 16384 bytes. This is okay, because caller must provide a +** buffer than is > MTU, and mac should filter anything bigger than that. +*/ +int ef_vi_receive_init(ef_vi* vi, ef_addr addr, ef_request_id dma_id, + int initial_rx_bytes) +{ + ef_vi_rxq* q = &vi->vi_rxq; + ef_vi_rxq_state* qs = &vi->ep_state->rxq; + unsigned di; + + if( ef_vi_receive_space(vi) ) { + di = qs->added++ & q->mask; + ef_assert_equal(q->ids[di], 0xffff); + q->ids[di] = (uint16_t) dma_id; + + if( ! (vi->vi_flags & EF_VI_RX_PHYS_ADDR) ) { + ef_vi_falcon_dma_rx_buf_desc* dp; + dp = (ef_vi_falcon_dma_rx_buf_desc*) + q->descriptors + di; + falcon_dma_rx_calc_ip_buf(ef_bufaddr(addr), dp); + } + else { + ef_vi_falcon_dma_rx_phys_desc* dp; + dp = (ef_vi_falcon_dma_rx_phys_desc*) + q->descriptors + di; + __falcon_dma_rx_calc_ip_phys(addr, dp, + initial_rx_bytes); + } + + return 0; + } + + return -EAGAIN; +} + + +void ef_vi_receive_push(ef_vi* vi) +{ + ef_vi_wiob(); + writel ((vi->ep_state->rxq.added & vi->vi_rxq.mask) << + __DW4(RX_DESC_WPTR_LBN), + vi->vi_rxq.doorbell); +} + + +ef_request_id ef_vi_receive_done(const ef_vi* vi, const ef_event* ef_ev) +{ + const ef_vi_qword* ev = EF_GET_HW_EV_PTR(*ef_ev); + unsigned di = ev->u32[0] & vi->vi_rxq.mask; + ef_request_id rq_id; + + ef_assert(EF_EVENT_TYPE(*ef_ev) == EF_EVENT_TYPE_RX || + EF_EVENT_TYPE(*ef_ev) == EF_EVENT_TYPE_RX_DISCARD); + + /* Detect spurious / duplicate RX events. We may need to modify this + ** code so that we are robust if they happen. */ + ef_assert_equal(di, vi->ep_state->rxq.removed & vi->vi_rxq.mask); + + /* We only support 1 port: so events should be in order. */ + ef_assert(vi->vi_rxq.ids[di] != 0xffff); + + rq_id = vi->vi_rxq.ids[di]; + vi->vi_rxq.ids[di] = 0xffff; + ++vi->ep_state->rxq.removed; + return rq_id; +} + +/*! \cidoxg_end */ diff -r dd748ded9ba8 drivers/xen/sfc_netfront/pt_tx.c --- /dev/null +++ b/drivers/xen/sfc_netfront/pt_tx.c @@ -0,0 +1,91 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/* + * \author djr + * \brief Packet-mode transmit interface. + * \date 2003/04/02 + */ + +/*! \cidoxg_lib_ef */ +#include "ef_vi_internal.h" + + +int ef_vi_transmit_init(ef_vi* vi, ef_addr base, int len, ef_request_id dma_id) +{ + ef_iovec iov = { base, len }; + return ef_vi_transmitv_init(vi, &iov, 1, dma_id); +} + + +int ef_vi_transmit(ef_vi* vi, ef_addr base, int len, ef_request_id dma_id) +{ + ef_iovec iov = { base, len }; + int rc = ef_vi_transmitv_init(vi, &iov, 1, dma_id); + if( rc == 0 ) ef_vi_transmit_push(vi); + return rc; +} + + +int ef_vi_transmitv(ef_vi* vi, const ef_iovec* iov, int iov_len, + ef_request_id dma_id) +{ + int rc = ef_vi_transmitv_init(vi, iov, iov_len, dma_id); + if( rc == 0 ) ef_vi_transmit_push(vi); + return rc; +} + + +int ef_vi_transmit_unbundle(ef_vi* vi, const ef_event* __ev, + ef_request_id* ids) +{ + ef_request_id* ids_in = ids; + ef_vi_txq* q = &vi->vi_txq; + ef_vi_txq_state* qs = &vi->ep_state->txq; + const ef_vi_qword* ev = EF_GET_HW_EV_PTR(*__ev); + unsigned i, stop = (ev->u32[0] + 1) & q->mask; + + ef_assert(EF_EVENT_TYPE(*__ev) == EF_EVENT_TYPE_TX || + EF_EVENT_TYPE(*__ev) == EF_EVENT_TYPE_TX_ERROR); + + /* Shouldn't be batching more than 64 descriptors, and should not go + ** backwards. */ + ef_assert_le((((ev->u32[0] + 1) - qs->removed) & q->mask), 64); + /* Should not complete more than we've posted. */ + ef_assert_le((((ev->u32[0] + 1) - qs->removed) & q->mask), + qs->added - qs->removed); + + for( i = qs->removed & q->mask; i != stop; i = ++qs->removed & q->mask ) + if( q->ids[i] != 0xffff ) { + *ids++ = q->ids[i]; + q->ids[i] = 0xffff; + } + + ef_assert_le(ids - ids_in, EF_VI_TRANSMIT_BATCH); + + return (int) (ids - ids_in); +} + +/*! \cidoxg_end */ diff -r dd748ded9ba8 drivers/xen/sfc_netfront/sysdep.h --- /dev/null +++ b/drivers/xen/sfc_netfront/sysdep.h @@ -0,0 +1,184 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/* + * \author stg + * \brief System dependent support for ef vi lib + * \date 2007/05/10 + */ + +/*! \cidoxg_include_ci_ul */ +#ifndef __CI_CIUL_SYSDEP_LINUX_H__ +#define __CI_CIUL_SYSDEP_LINUX_H__ + +/********************************************************************** + * Kernel version compatability + */ + +#if defined(__GNUC__) + +/* Linux kernel doesn't have stdint.h or [u]intptr_t. */ +# if !defined(LINUX_VERSION_CODE) +# include +# endif +# include + +/* In Linux 2.6.24, linux/types.h has uintptr_t */ +# if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) +# if BITS_PER_LONG == 32 + typedef __u32 uintptr_t; +# else + typedef __u64 uintptr_t; +# endif +# endif + +/* But even 2.6.24 doesn't define intptr_t */ +# if BITS_PER_LONG == 32 + typedef __s32 intptr_t; +# else + typedef __s64 intptr_t; +# endif + +# if defined(__ia64__) +# define EF_VI_PRIx64 "lx" +# else +# define EF_VI_PRIx64 "llx" +# endif + +# define EF_VI_HF __attribute__((visibility("hidden"))) +# define EF_VI_HV __attribute__((visibility("hidden"))) + +# if defined(__i386__) || defined(__x86_64__) /* GCC x86/x64 */ + typedef unsigned long long ef_vi_dma_addr_t; +# if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) +# define ef_vi_wiob() __asm__ __volatile__ ("sfence") +# else +# define ef_vi_wiob() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF8") +# endif + +# endif +#endif + +#ifdef EFX_NOT_UPSTREAM + +/* Stuff for architectures/compilers not officially supported */ + +#if !defined(__GNUC__) +# if defined(__PPC__) /* GCC, PPC */ + typedef unsigned long ef_vi_dma_addr_t; +# define ef_vi_wiob() wmb() + +# ifdef __powerpc64__ +# ifdef CONFIG_SMP +# define CI_SMP_SYNC "\n eieio \n" /* memory cache sync */ +# define CI_SMP_ISYNC "\n isync \n" /* instr cache sync */ +# else +# define CI_SMP_SYNC +# define CI_SMP_ISYNC +# endif +# else /* for ppc32 systems */ +# ifdef CONFIG_SMP +# define CI_SMP_SYNC "\n eieio \n" +# define CI_SMP_ISYNC "\n sync \n" +# else +# define CI_SMP_SYNC +# define CI_SMP_ISYNC +# endif +# endif + +# elif defined(__ia64__) /* GCC, IA64 */ + typedef unsigned long ef_vi_dma_addr_t; +# define ef_vi_wiob() __asm__ __volatile__("mf.a": : :"memory") + +# else +# error Unknown processor - GNU C +# endif + +#elif defined(__PGI) +# error PGI not supported + +#elif defined(__INTEL_COMPILER) + +/* Intel compilers v7 claim to be very gcc compatible. */ +# if __INTEL_COMPILER >= 700 +# if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ > 91) +# define EF_VI_LIKELY(t) __builtin_expect((t), 1) +# define EF_VI_UNLIKELY(t) __builtin_expect((t), 0) +# endif + +# if __GNUC__ >= 3 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) +# define ef_vi_wiob() __asm__ __volatile__ ("sfence") +# else +# define ef_vi_wiob() __asm__ __volatile__ (".byte 0x0F, 0xAE, 0xF8") +# endif + +# else +# error Old Intel compiler not supported. +# endif + +#else +# error Unknown compiler. +#endif + +#endif + + +# include + + +/********************************************************************** + * Extracting bit fields. + */ + +#define _QWORD_GET_LOW(f, v) \ + (((v).u32[0] >> (f##_LBN)) & ((1u << f##_WIDTH) - 1u)) +#define _QWORD_GET_HIGH(f, v) \ + (((v).u32[1] >> (f##_LBN - 32u)) & ((1u << f##_WIDTH) - 1u)) +#define _QWORD_GET_ANY(f, v) \ + (((v).u64[0] >> f##_LBN) & (((uint64_t) 1u << f##_WIDTH) - 1u)) + +#define QWORD_GET(f, v) \ + ((f##_LBN + f##_WIDTH) <= 32u \ + ? _QWORD_GET_LOW(f, (v)) \ + : ((f##_LBN >= 32u) ? _QWORD_GET_HIGH(f, (v)) : _QWORD_GET_ANY(f, (v)))) + +#define QWORD_GET_U(f, v) ((unsigned) QWORD_GET(f, (v))) + +#define _QWORD_TEST_BIT_LOW(f, v) ((v).u32[0] & (1u << (f##_LBN))) +#define _QWORD_TEST_BIT_HIGH(f, v) ((v).u32[1] & (1u << (f##_LBN - 32u))) + +#define QWORD_TEST_BIT(f, v) \ + (f##_LBN < 32 ? _QWORD_TEST_BIT_LOW(f, (v)) : _QWORD_TEST_BIT_HIGH(f, (v))) + + + + +#ifndef DECLSPEC_NORETURN +/* normally defined on Windows to expand to a declaration that the + function will not return */ +# define DECLSPEC_NORETURN +#endif + +#endif /* __CI_CIUL_SYSDEP_LINUX_H__ */ diff -r dd748ded9ba8 drivers/xen/sfc_netfront/vi_init.c --- /dev/null +++ b/drivers/xen/sfc_netfront/vi_init.c @@ -0,0 +1,183 @@ +/**************************************************************************** + * Copyright 2002-2005: Level 5 Networks Inc. + * Copyright 2005-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/* + * \author djr + * \brief Initialisation of VIs. + * \date 2007/06/08 + */ + +#include "ef_vi_internal.h" + +#define EF_VI_STATE_BYTES(rxq_sz, txq_sz) \ + (sizeof(ef_vi_state) + (rxq_sz) * sizeof(uint16_t) \ + + (txq_sz) * sizeof(uint16_t)) + +int ef_vi_calc_state_bytes(int rxq_sz, int txq_sz) +{ + ef_assert(rxq_sz == 0 || EF_VI_IS_POW2(rxq_sz)); + ef_assert(txq_sz == 0 || EF_VI_IS_POW2(txq_sz)); + + return EF_VI_STATE_BYTES(rxq_sz, txq_sz); +} + + +int ef_vi_state_bytes(ef_vi* vi) +{ + int rxq_sz = 0, txq_sz = 0; + if( ef_vi_receive_capacity(vi) ) + rxq_sz = ef_vi_receive_capacity(vi) + 1; + if( ef_vi_transmit_capacity(vi) ) + txq_sz = ef_vi_transmit_capacity(vi) + 1; + + ef_assert(rxq_sz == 0 || EF_VI_IS_POW2(rxq_sz)); + ef_assert(txq_sz == 0 || EF_VI_IS_POW2(txq_sz)); + + return EF_VI_STATE_BYTES(rxq_sz, txq_sz); +} + + +void ef_eventq_state_init(ef_vi* evq) +{ + int j; + + for (j = 0; jevq_state->rx_dup_state[j]; + rx_dup_state->bad_sop = 0; + rx_dup_state->rx_last_desc_ptr = -1; + rx_dup_state->frag_num = 0; + } + + evq->evq_state->evq_ptr = 0; +} + + +void ef_vi_state_init(ef_vi* vi) +{ + ef_vi_state* state = vi->ep_state; + unsigned i; + + state->txq.added = state->txq.removed = 0; + state->rxq.added = state->rxq.removed = 0; + + if( vi->vi_rxq.mask ) + for( i = 0; i <= vi->vi_rxq.mask; ++i ) + vi->vi_rxq.ids[i] = (uint16_t) -1; + if( vi->vi_txq.mask ) + for( i = 0; i <= vi->vi_txq.mask; ++i ) + vi->vi_txq.ids[i] = (uint16_t) -1; +} + + +void ef_vi_init_mapping_evq(void* data_area, struct ef_vi_nic_type nic_type, + int instance, unsigned evq_bytes, void* base, + void* timer_reg) +{ + struct vi_mappings* vm = (struct vi_mappings*) data_area; + + vm->signature = VI_MAPPING_SIGNATURE; + vm->vi_instance = instance; + vm->nic_type = nic_type; + vm->evq_bytes = evq_bytes; + vm->evq_base = base; + vm->evq_timer_reg = timer_reg; +} + + +void ef_vi_init(ef_vi* vi, void* vvis, ef_vi_state* state, + ef_eventq_state* evq_state, enum ef_vi_flags vi_flags) +{ + struct vi_mappings* vm = (struct vi_mappings*) vvis; + + vi->vi_i = vm->vi_instance; + vi->ep_state = state; + vi->vi_flags = vi_flags; + + switch( vm->nic_type.arch ) { + case EF_VI_ARCH_FALCON: + falcon_vi_init(vi, vvis); + break; + default: + /* ?? TODO: We should return an error code. */ + ef_assert(0); + break; + } + + if( vm->evq_bytes ) { + vi->evq_state = evq_state; + vi->evq_mask = vm->evq_bytes - 1u; + vi->evq_base = vm->evq_base; + vi->evq_timer_reg = vm->evq_timer_reg; + } + + EF_VI_MAGIC_SET(vi, EF_VI); +} + + +/* Initialise [data_area] with information required to initialise an ef_vi. + * In the following, an unused param should be set to NULL. Note the case + * marked (*) of [iobuf_mmap] for falcon/driver; for the normal driver this + * must be NULL. + * + * \param data_area [in,out] required, must ref at least VI_MAPPING_SIZE + * bytes + * \param io_mmap [in] ef1, required + * falcon, required + * \param iobuf_mmap [in] ef1, unused + * falcon, required + */ +void ef_vi_init_mapping_vi(void* data_area, struct ef_vi_nic_type nic_type, + unsigned rxq_capacity, unsigned txq_capacity, + int instance, void* io_mmap, + void* iobuf_mmap_rx, void* iobuf_mmap_tx, + enum ef_vi_flags vi_flags) +{ + struct vi_mappings* vm = (struct vi_mappings*) data_area; + int rx_desc_bytes, rxq_bytes; + + ef_assert(rxq_capacity > 0 || txq_capacity > 0); + ef_assert(vm); + ef_assert(io_mmap); + ef_assert(iobuf_mmap_rx || iobuf_mmap_tx); + + vm->signature = VI_MAPPING_SIGNATURE; + vm->vi_instance = instance; + vm->nic_type = nic_type; + + rx_desc_bytes = (vi_flags & EF_VI_RX_PHYS_ADDR) ? 8 : 4; + rxq_bytes = rxq_capacity * rx_desc_bytes; + rxq_bytes = (rxq_bytes + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); + + if( iobuf_mmap_rx == iobuf_mmap_tx ) + iobuf_mmap_tx = (char*) iobuf_mmap_rx + rxq_bytes; + + vm->rx_queue_capacity = rxq_capacity; + vm->rx_dma_falcon = iobuf_mmap_rx; + vm->rx_bell = (char*) io_mmap + (RX_DESC_UPD_REG_KER_OFST & 4095); + vm->tx_queue_capacity = txq_capacity; + vm->tx_dma_falcon = iobuf_mmap_tx; + vm->tx_bell = (char*) io_mmap + (TX_DESC_UPD_REG_KER_OFST & 4095); +} diff -r dd748ded9ba8 drivers/xen/sfc_netutil/Makefile --- /dev/null +++ b/drivers/xen/sfc_netutil/Makefile @@ -0,0 +1,10 @@ +EXTRA_CFLAGS += -Werror + +ifdef GGOV +EXTRA_CFLAGS += -fprofile-arcs -ftest-coverage -DEFX_GCOV +endif + +obj-$(CONFIG_XEN_NETDEV_ACCEL_SFC_UTIL) := sfc_netutil.o + +sfc_netutil-objs := accel_cuckoo_hash.o accel_msg_iface.o accel_util.o + diff -r dd748ded9ba8 drivers/xen/sfc_netutil/accel_cuckoo_hash.c --- /dev/null +++ b/drivers/xen/sfc_netutil/accel_cuckoo_hash.c @@ -0,0 +1,651 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include /* needed for linux/random.h */ +#include + +#include "accel_cuckoo_hash.h" +#include "accel_util.h" + +static inline int cuckoo_hash_key_compare(cuckoo_hash_table *hashtab, + cuckoo_hash_key *key1, + cuckoo_hash_key *key2) +{ + return !memcmp(key1, key2, hashtab->key_length); +} + + +static inline void cuckoo_hash_key_set(cuckoo_hash_key *key1, + cuckoo_hash_key *key2) +{ + *key1 = *key2; +} + + +/* + * Sets hash function parameters. Chooses "a" to be odd, 0 < a < 2^w + * where w is the length of the key + */ +static void set_hash_parameters(cuckoo_hash_table *hashtab) +{ + again: + hashtab->a0 = hashtab->a1 = 0; + + /* Make sure random */ + get_random_bytes(&hashtab->a0, hashtab->key_length); + get_random_bytes(&hashtab->a1, hashtab->key_length); + + /* Make sure odd */ + hashtab->a0 |= 1; + hashtab->a1 |= 1; + + /* Being different is good */ + if (hashtab->a0 != hashtab->a1) + return; + + goto again; +} + +int cuckoo_hash_init(cuckoo_hash_table *hashtab, unsigned length_bits, + unsigned key_length) +{ + char *table_mem; + unsigned length = 1 << length_bits; + + BUG_ON(length_bits >= sizeof(unsigned) * 8); + BUG_ON(key_length > sizeof(cuckoo_hash_key)); + + table_mem = kmalloc(sizeof(cuckoo_hash_entry) * 2 * length, GFP_KERNEL); + + if (table_mem == NULL) + return -ENOMEM; + + hashtab->length = length; + hashtab->length_bits = length_bits; + hashtab->key_length = key_length; + hashtab->entries = 0; + + hashtab->table0 = (cuckoo_hash_entry *)table_mem; + hashtab->table1 = (cuckoo_hash_entry *) + (table_mem + length * sizeof(cuckoo_hash_entry)); + + set_hash_parameters(hashtab); + + /* Zero the table */ + memset(hashtab->table0, 0, length * 2 * sizeof(cuckoo_hash_entry)); + + return 0; +} +EXPORT_SYMBOL_GPL(cuckoo_hash_init); + +void cuckoo_hash_destroy(cuckoo_hash_table *hashtab) +{ + if (hashtab->table0 != NULL) + kfree(hashtab->table0); +} + +EXPORT_SYMBOL_GPL(cuckoo_hash_destroy); + +/* + * This computes sizeof(cuckoo_hash) bits of hash, not all will be + * necessarily used, but the hash function throws away any that + * aren't + */ +static inline void cuckoo_compute_hash_helper(cuckoo_hash_table *hashtab, + cuckoo_hash_key *a, + cuckoo_hash_key *x, + cuckoo_hash *result) +{ + u64 multiply_result = 0, a_temp, x_temp; + u32 carry = 0; + u32 *a_words; + u32 *x_words; + int i; + + /* + * As the mod and div operations in the function effectively + * reduce and shift the bits of the product down to just the + * third word, we need only compute that and return it as a + * result. + * + * Do enough long multiplication to get the word we need + */ + + /* This assumes things about the sizes of the key and hash */ + BUG_ON(hashtab->key_length % sizeof(u32) != 0); + BUG_ON(sizeof(cuckoo_hash) != sizeof(u32)); + + a_words = (u32 *)a; + x_words = (u32 *)x; + + for (i = 0; i < hashtab->key_length / sizeof(u32); i++) { + a_temp = a_words[i]; + x_temp = x_words[i]; + + multiply_result = (a_temp * x_temp) + carry; + carry = (multiply_result >> 32) & 0xffffffff; + } + + *result = multiply_result & 0xffffffff; +} + + +/* + * Want to implement (ax mod 2^w) div 2^(w-q) for odd a, 0 < a < 2^w; + * w is the length of the key, q is the length of the hash, I think. + * See http://www.it-c.dk/people/pagh/papers/cuckoo-jour.pdf + */ +static cuckoo_hash cuckoo_compute_hash(cuckoo_hash_table *hashtab, + cuckoo_hash_key *key, + cuckoo_hash_key *a) +{ + unsigned q = hashtab->length_bits; + unsigned shift = 32 - q; + unsigned mask = ((1 << q) - 1) << shift; + cuckoo_hash hash; + + cuckoo_compute_hash_helper(hashtab, a, key, &hash); + + /* + * Take the top few bits to get the right length for this + * hash table + */ + hash = (hash & mask) >> shift; + + BUG_ON(hash >= hashtab->length); + + return hash; +} + + +static int cuckoo_hash_lookup0(cuckoo_hash_table *hashtab, + cuckoo_hash_key *key, + cuckoo_hash_value *value) +{ + cuckoo_hash hash = cuckoo_compute_hash(hashtab, key, &hashtab->a0); + + if ((hashtab->table0[hash].state == CUCKOO_HASH_STATE_OCCUPIED) + && cuckoo_hash_key_compare(hashtab, &(hashtab->table0[hash].key), + key)) { + *value = hashtab->table0[hash].value; + return 1; + } + + return 0; +} + +static int cuckoo_hash_lookup1(cuckoo_hash_table *hashtab, + cuckoo_hash_key *key, + cuckoo_hash_value *value) +{ + cuckoo_hash hash = cuckoo_compute_hash(hashtab, key, &hashtab->a1); + + if ((hashtab->table1[hash].state == CUCKOO_HASH_STATE_OCCUPIED) + && cuckoo_hash_key_compare(hashtab, &(hashtab->table1[hash].key), + key)) { + *value = hashtab->table1[hash].value; + return 1; + } + + return 0; +} + + +int cuckoo_hash_lookup(cuckoo_hash_table *hashtab, cuckoo_hash_key *key, + cuckoo_hash_value *value) +{ + return cuckoo_hash_lookup0(hashtab, key, value) + || cuckoo_hash_lookup1(hashtab, key, value); +} +EXPORT_SYMBOL_GPL(cuckoo_hash_lookup); + + +/* Transfer any active entries from "old_table" into hashtab */ +static int cuckoo_hash_transfer_entries(cuckoo_hash_table *hashtab, + cuckoo_hash_entry *old_table, + unsigned capacity) +{ + int i, rc; + cuckoo_hash_entry *entry; + + hashtab->entries = 0; + + for (i = 0; i < capacity; i++) { + entry = &old_table[i]; + if (entry->state == CUCKOO_HASH_STATE_OCCUPIED) { + rc = cuckoo_hash_add(hashtab, &(entry->key), + entry->value, 0); + if (rc != 0) { + return rc; + } + } + } + + return 0; +} + + +int cuckoo_hash_rehash(cuckoo_hash_table *hashtab) +{ + cuckoo_hash_entry *new_table; + cuckoo_hash_table old_hashtab; + int resize = 0, rc, rehash_count; + + /* + * Store old tables so we can access the existing values and + * copy across + */ + memcpy(&old_hashtab, hashtab, sizeof(cuckoo_hash_table)); + + /* resize if hashtable is more than half full */ + if (old_hashtab.entries > old_hashtab.length && + old_hashtab.length_bits < 32) + resize = 1; + + resize: + if (resize) { + new_table = kmalloc(sizeof(cuckoo_hash_entry) * 4 * hashtab->length, + GFP_ATOMIC); + if (new_table == NULL) { + rc = -ENOMEM; + goto err; + } + + hashtab->length = 2 * hashtab->length; + hashtab->length_bits++; + } else { + new_table = kmalloc(sizeof(cuckoo_hash_entry) * 2 * hashtab->length, + GFP_ATOMIC); + if (new_table == NULL) { + rc = -ENOMEM; + goto err; + } + } + + /* + * Point hashtab to new memory region so we can try to + * construct new table + */ + hashtab->table0 = new_table; + hashtab->table1 = (cuckoo_hash_entry *) + ((char *)new_table + hashtab->length * sizeof(cuckoo_hash_entry)); + + rehash_count = 0; + + again: + /* Zero the new tables */ + memset(new_table, 0, hashtab->length * 2 * sizeof(cuckoo_hash_entry)); + + /* Choose new parameters for the hash functions */ + set_hash_parameters(hashtab); + + /* + * Multiply old_table_length by 2 as the length refers to each + * table, and there are two of them. This assumes that they + * are arranged sequentially in memory, so assert it + */ + BUG_ON(((char *)old_hashtab.table1) != + ((char *)old_hashtab.table0 + old_hashtab.length + * sizeof(cuckoo_hash_entry))); + rc = cuckoo_hash_transfer_entries(hashtab, old_hashtab.table0, + old_hashtab.length * 2); + if (rc < 0) { + /* Problem */ + if (rc == -ENOSPC) { + ++rehash_count; + if (rehash_count < CUCKOO_HASH_MAX_LOOP) { + /* + * Wanted to rehash, but rather than + * recurse we can just do it here + */ + goto again; + } else { + /* + * Didn't manage to rehash, so let's + * go up a size (if we haven't already + * and there's space) + */ + if (!resize && hashtab->length_bits < 32) { + resize = 1; + kfree(new_table); + goto resize; + } + else + goto err; + } + } + else + goto err; + } + + /* Success, I think. Free up the old table */ + kfree(old_hashtab.table0); + + /* We should have put all the entries from old table in the new one */ + BUG_ON(hashtab->entries != old_hashtab.entries); + + return 0; + err: + EPRINTK("%s: Rehash failed, giving up\n", __FUNCTION__); + /* Some other error, give up, at least restore table to how it was */ + memcpy(hashtab, &old_hashtab, sizeof(cuckoo_hash_table)); + if (new_table) + kfree(new_table); + return rc; +} +EXPORT_SYMBOL_GPL(cuckoo_hash_rehash); + + +static int +cuckoo_hash_insert_or_displace(cuckoo_hash_entry *table, unsigned hash, + cuckoo_hash_key *key, + cuckoo_hash_value value, + cuckoo_hash_key *displaced_key, + cuckoo_hash_value *displaced_value) +{ + if (table[hash].state == CUCKOO_HASH_STATE_VACANT) { + cuckoo_hash_key_set(&(table[hash].key), key); + table[hash].value = value; + table[hash].state = CUCKOO_HASH_STATE_OCCUPIED; + + return 1; + } else { + cuckoo_hash_key_set(displaced_key, &(table[hash].key)); + *displaced_value = table[hash].value; + cuckoo_hash_key_set(&(table[hash].key), key); + table[hash].value = value; + + return 0; + } +} + + +int cuckoo_hash_add(cuckoo_hash_table *hashtab, cuckoo_hash_key *key, + cuckoo_hash_value value, int can_rehash) +{ + cuckoo_hash hash0, hash1; + int i, rc; + cuckoo_hash_key key1, key2; + + cuckoo_hash_key_set(&key1, key); + + again: + i = 0; + do { + hash0 = cuckoo_compute_hash(hashtab, &key1, &hashtab->a0); + if (cuckoo_hash_insert_or_displace(hashtab->table0, hash0, + &key1, value, &key2, + &value)) { + /* Success */ + hashtab->entries++; + return 0; + } + + hash1 = cuckoo_compute_hash(hashtab, &key2, &hashtab->a1); + if (cuckoo_hash_insert_or_displace(hashtab->table1, hash1, + &key2, value, &key1, + &value)) { + /* Success */ + hashtab->entries++; + return 0; + } + } while (++i < CUCKOO_HASH_MAX_LOOP); + + if (can_rehash) { + if ((rc = cuckoo_hash_rehash(hashtab)) < 0) { + /* + * Give up - this will drop whichever + * key/value pair we have currently displaced + * on the floor + */ + return rc; + } + goto again; + } + + EPRINTK("%s: failed hash add\n", __FUNCTION__); + /* + * Couldn't do it - bad as we've now removed some random thing + * from the table, and will just drop it on the floor. Better + * would be to somehow revert the table to the state it was in + * at the start + */ + return -ENOSPC; +} +EXPORT_SYMBOL_GPL(cuckoo_hash_add); + + +int cuckoo_hash_add_check(cuckoo_hash_table *hashtab, + cuckoo_hash_key *key, cuckoo_hash_value value, + int can_rehash) +{ + int stored_value; + + if (cuckoo_hash_lookup(hashtab, key, &stored_value)) + return -EBUSY; + + return cuckoo_hash_add(hashtab, key, value, can_rehash); +} +EXPORT_SYMBOL_GPL(cuckoo_hash_add_check); + + +int cuckoo_hash_remove(cuckoo_hash_table *hashtab, cuckoo_hash_key *key) +{ + cuckoo_hash hash; + + hash = cuckoo_compute_hash(hashtab, key, &hashtab->a0); + if ((hashtab->table0[hash].state == CUCKOO_HASH_STATE_OCCUPIED) && + cuckoo_hash_key_compare(hashtab, &(hashtab->table0[hash].key), + key)) { + hashtab->table0[hash].state = CUCKOO_HASH_STATE_VACANT; + hashtab->entries--; + return 0; + } + + hash = cuckoo_compute_hash(hashtab, key, &hashtab->a1); + if ((hashtab->table1[hash].state == CUCKOO_HASH_STATE_OCCUPIED) && + cuckoo_hash_key_compare(hashtab, &(hashtab->table1[hash].key), + key)) { + hashtab->table1[hash].state = CUCKOO_HASH_STATE_VACANT; + hashtab->entries--; + return 0; + } + + return -EINVAL; +} +EXPORT_SYMBOL_GPL(cuckoo_hash_remove); + + +int cuckoo_hash_update(cuckoo_hash_table *hashtab, cuckoo_hash_key *key, + cuckoo_hash_value value) +{ + cuckoo_hash hash; + + hash = cuckoo_compute_hash(hashtab, key, &hashtab->a0); + if ((hashtab->table0[hash].state == CUCKOO_HASH_STATE_OCCUPIED) && + cuckoo_hash_key_compare(hashtab, &(hashtab->table0[hash].key), + key)) { + hashtab->table0[hash].value = value; + return 0; + } + + hash = cuckoo_compute_hash(hashtab, key, &hashtab->a1); + if ((hashtab->table1[hash].state == CUCKOO_HASH_STATE_OCCUPIED) && + cuckoo_hash_key_compare(hashtab, &(hashtab->table1[hash].key), + key)) { + hashtab->table1[hash].value = value; + return 0; + } + + return -EINVAL; +} +EXPORT_SYMBOL_GPL(cuckoo_hash_update); + + +void cuckoo_hash_iterate_reset(cuckoo_hash_table *hashtab) +{ + hashtab->iterate_index = 0; +} +EXPORT_SYMBOL_GPL(cuckoo_hash_iterate_reset); + + +int cuckoo_hash_iterate(cuckoo_hash_table *hashtab, + cuckoo_hash_key *key, cuckoo_hash_value *value) +{ + unsigned index; + + while (hashtab->iterate_index < hashtab->length) { + index = hashtab->iterate_index; + ++hashtab->iterate_index; + if (hashtab->table0[index].state == CUCKOO_HASH_STATE_OCCUPIED) { + *key = hashtab->table0[index].key; + *value = hashtab->table0[index].value; + return 0; + } + } + + while (hashtab->iterate_index >= hashtab->length && + hashtab->iterate_index < hashtab->length * 2) { + index = hashtab->iterate_index - hashtab->length; + ++hashtab->iterate_index; + if (hashtab->table1[index].state == CUCKOO_HASH_STATE_OCCUPIED) { + *key = hashtab->table1[index].key; + *value = hashtab->table1[index].value; + return 0; + } + } + + return -ENOSPC; +} +EXPORT_SYMBOL_GPL(cuckoo_hash_iterate); + + +#if 0 +void cuckoo_hash_valid(cuckoo_hash_table *hashtab) +{ + int i, entry_count = 0; + + for (i=0; i < hashtab->length; i++) { + EPRINTK_ON(hashtab->table0[i].state != CUCKOO_HASH_STATE_VACANT && + hashtab->table0[i].state != CUCKOO_HASH_STATE_OCCUPIED); + if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED) + entry_count++; + EPRINTK_ON(hashtab->table1[i].state != CUCKOO_HASH_STATE_VACANT && + hashtab->table1[i].state != CUCKOO_HASH_STATE_OCCUPIED); + if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED) + entry_count++; + } + + if (entry_count != hashtab->entries) { + EPRINTK("%s: bad count\n", __FUNCTION__); + cuckoo_hash_dump(hashtab); + return; + } + + for (i=0; i< hashtab->length; i++) { + if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED) + if (i != cuckoo_compute_hash(hashtab, + &hashtab->table0[i].key, + &hashtab->a0)) { + EPRINTK("%s: Bad key table 0 index %d\n", + __FUNCTION__, i); + cuckoo_hash_dump(hashtab); + return; + } + if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED) + if (i != cuckoo_compute_hash(hashtab, + &hashtab->table1[i].key, + &hashtab->a1)) { + EPRINTK("%s: Bad key table 1 index %d\n", + __FUNCTION__, i); + cuckoo_hash_dump(hashtab); + return; + } + } + +} +EXPORT_SYMBOL_GPL(cuckoo_hash_valid); + + +void cuckoo_hash_dump(cuckoo_hash_table *hashtab) +{ + int i, entry_count; + + entry_count = 0; + for (i=0; i < hashtab->length; i++) { + EPRINTK_ON(hashtab->table0[i].state != CUCKOO_HASH_STATE_VACANT && + hashtab->table0[i].state != CUCKOO_HASH_STATE_OCCUPIED); + if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED) + entry_count++; + EPRINTK_ON(hashtab->table1[i].state != CUCKOO_HASH_STATE_VACANT && + hashtab->table1[i].state != CUCKOO_HASH_STATE_OCCUPIED); + if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED) + entry_count++; + } + + EPRINTK("======================\n"); + EPRINTK("Cuckoo hash table dump\n"); + EPRINTK("======================\n"); + EPRINTK("length: %d; length_bits: %d; key_length: %d\n", hashtab->length, + hashtab->length_bits, hashtab->key_length); + EPRINTK("Recorded entries: %d\n", hashtab->entries); + EPRINTK("Counted entries: %d\n", entry_count); + EPRINTK("a0: %llx; a1: %llx\n", hashtab->a0, hashtab->a1); + EPRINTK("-----------------------------------------\n"); + EPRINTK("Index Occupied Key Value Index0 Index1\n"); + EPRINTK("-----------------------------------------\n"); + for (i=0; i< hashtab->length; i++) { + if (hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED) + EPRINTK("%d %d %llx %d %d %d\n", i, + hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED, + hashtab->table0[i].key, hashtab->table0[i].value, + cuckoo_compute_hash(hashtab, &hashtab->table0[i].key, + &hashtab->a0), + cuckoo_compute_hash(hashtab, &hashtab->table0[i].key, + &hashtab->a1)); + else + EPRINTK("%d %d - - - -\n", i, + hashtab->table0[i].state == CUCKOO_HASH_STATE_OCCUPIED); + + } + EPRINTK("-----------------------------------------\n"); + EPRINTK("Index Occupied Key Value Index0 Index1\n"); + EPRINTK("-----------------------------------------\n"); + for (i=0; i< hashtab->length; i++) { + if (hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED) + EPRINTK("%d %d %llx %d %d %d\n", i, + hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED, + hashtab->table1[i].key, hashtab->table1[i].value, + cuckoo_compute_hash(hashtab, &hashtab->table1[i].key, + &hashtab->a0), + cuckoo_compute_hash(hashtab, &hashtab->table1[i].key, + &hashtab->a1)); + else + EPRINTK("%d %d - - - -\n", i, + hashtab->table1[i].state == CUCKOO_HASH_STATE_OCCUPIED); + } + EPRINTK("======================\n"); +} +EXPORT_SYMBOL_GPL(cuckoo_hash_dump); +#endif diff -r dd748ded9ba8 drivers/xen/sfc_netutil/accel_cuckoo_hash.h --- /dev/null +++ b/drivers/xen/sfc_netutil/accel_cuckoo_hash.h @@ -0,0 +1,227 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +/* + * A cuckoo hash table consists of two sub tables. Each entry can + * hash to a position in each table. If, on entry, its position is + * found to be occupied, the existing element is moved to it's other + * location. This recurses until success or a loop is found. If a + * loop is found the table is rehashed. + * + * See http://www.it-c.dk/people/pagh/papers/cuckoo-jour.pdf + */ + +#ifndef NET_ACCEL_CUCKOO_HASH_H +#define NET_ACCEL_CUCKOO_HASH_H + +/*! Type used for hash table keys of ip pairs */ +typedef struct { + u32 local_ip; + //u32 remote_ip; + u16 local_port; + //u16 remote_port; + /* Technically only 1 bit, but use 16 to make key a round + number size */ + u16 proto; +} cuckoo_hash_ip_key; + +/*! Type used for hash table keys of mac addresses */ +typedef u64 cuckoo_hash_mac_key; + +/*! This type is designed to be large enough to hold all supported key + * sizes to avoid having to malloc storage for them. + */ +typedef u64 cuckoo_hash_key; + +/*! Type used for the values stored in the hash table */ +typedef int cuckoo_hash_value; + +/*! Type used for the hash used to index the table */ +typedef u32 cuckoo_hash; + +/*! How long to spend displacing values when adding before giving up + * and rehashing */ +#define CUCKOO_HASH_MAX_LOOP (hashtab->length) + +/*! State of hash table entry */ +typedef enum { + CUCKOO_HASH_STATE_VACANT = 0, + CUCKOO_HASH_STATE_OCCUPIED +} cuckoo_hash_state; + +/*! An entry in the hash table */ +typedef struct { + cuckoo_hash_state state; + cuckoo_hash_key key; + cuckoo_hash_value value; +} cuckoo_hash_entry; + +/*! A cuckoo hash table */ +typedef struct { + /*! The length of each table (NB. there are two tables of this + * length) */ + unsigned length; + /*! The length of each table in bits */ + unsigned length_bits; + /*! The length of the key in bytes */ + unsigned key_length; + /*! The number of entries currently stored in the table */ + unsigned entries; + /*! Index into table used by cuckoo_hash_iterate */ + unsigned iterate_index; + + /* parameter of hash functions */ + /*! The "a" parameter of the first hash function */ + cuckoo_hash_key a0; + /*! The "a" parameter of the second hash function */ + cuckoo_hash_key a1; + + /*! The first table */ + cuckoo_hash_entry *table0; + /*! The second table */ + cuckoo_hash_entry *table1; +} cuckoo_hash_table; + +/*! Initialise the cuckoo has table + * + * \param hashtab A pointer to an unitialised hash table structure + * \param length_bits The number of elements in each table equals + * 2**length_bits + * \param key_length The length of the key in bytes + * + * \return 0 on success, -ENOMEM if it couldn't allocate the tables + */ +extern +int cuckoo_hash_init(cuckoo_hash_table *hashtab, unsigned length_bits, + unsigned key_length); + + +/*! Destroy a hash table + * + * \param hashtab A hash table that has previously been passed to a + * successful call of cuckoo_hash_init() + */ +extern +void cuckoo_hash_destroy(cuckoo_hash_table *hashtab); + + +/*! Lookup an entry in the hash table + * + * \param hashtab The hash table in which to look. + * \param key Pointer to a mac address to use as the key + * \param value On exit set to the value stored if key was present + * + * \return 0 if not present in the table, non-zero if it is (and value + * is set accordingly) + */ +extern +int cuckoo_hash_lookup(cuckoo_hash_table *hashtab, + cuckoo_hash_key *key, + cuckoo_hash_value *value); + +/*! Add an entry to the hash table. Key must not be a duplicate of + * anything already in the table. If this is a risk, see + * cuckoo_hash_add_check + * + * \param hashtab The hash table to add the entry to + * \param key Pointer to a mac address to use as a key + * \param value The value to store + * \param can_rehash Flag to allow the add function to rehash the + * table if necessary + * + * \return 0 on success, non-zero on failure. -ENOSPC means it just + * couldn't find anywhere to put it - this is bad and probably means + * an entry has been dropped on the floor (but the entry you just + * tried to add may now be included) + */ +extern +int cuckoo_hash_add(cuckoo_hash_table *hashtab, + cuckoo_hash_key *key, + cuckoo_hash_value value, + int can_rehash); + +/*! Same as cuckoo_hash_add but first checks to ensure entry is not + * already there + * \return -EBUSY if already there + */ + +extern +int cuckoo_hash_add_check(cuckoo_hash_table *hashtab, + cuckoo_hash_key *key, + cuckoo_hash_value value, + int can_rehash); +/*! Remove an entry from the table + * + * \param hashtab The hash table to remove the entry from + * \param key The key that was used to previously add the entry + * + * \return 0 on success, -EINVAL if the entry couldn't be found + */ +extern +int cuckoo_hash_remove(cuckoo_hash_table *hashtab, cuckoo_hash_key *key); + + +/*! Helper for those using mac addresses to convert to a key for the + * hash table + */ +static inline cuckoo_hash_mac_key cuckoo_mac_to_key(const u8 *mac) +{ + return (cuckoo_hash_mac_key)(mac[0]) + | (cuckoo_hash_mac_key)(mac[1]) << 8 + | (cuckoo_hash_mac_key)(mac[2]) << 16 + | (cuckoo_hash_mac_key)(mac[3]) << 24 + | (cuckoo_hash_mac_key)(mac[4]) << 32 + | (cuckoo_hash_mac_key)(mac[5]) << 40; +} + + +/*! Update an entry already in the hash table to take a new value + * + * \param hashtab The hash table to add the entry to + * \param key Pointer to a mac address to use as a key + * \param value The value to store + * + * \return 0 on success, non-zero on failure. + */ +int cuckoo_hash_update(cuckoo_hash_table *hashtab, cuckoo_hash_key *key, + cuckoo_hash_value value); + + +/*! Go through the hash table and return all used entries (one per call) + * + * \param hashtab The hash table to iterate over + * \param key Pointer to a key to take the returned key + * \param value Pointer to a value to take the returned value + * + * \return 0 on success (key, value set), non-zero on failure. + */ +int cuckoo_hash_iterate(cuckoo_hash_table *hashtab, + cuckoo_hash_key *key, cuckoo_hash_value *value); +void cuckoo_hash_iterate_reset(cuckoo_hash_table *hashtab); + +/* debug, not compiled by default */ +void cuckoo_hash_valid(cuckoo_hash_table *hashtab); +void cuckoo_hash_dump(cuckoo_hash_table *hashtab); + +#endif /* NET_ACCEL_CUCKOO_HASH_H */ diff -r dd748ded9ba8 drivers/xen/sfc_netutil/accel_msg_iface.c --- /dev/null +++ b/drivers/xen/sfc_netutil/accel_msg_iface.c @@ -0,0 +1,301 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include + +#include "accel_util.h" +#include "accel_msg_iface.h" + +#define NET_ACCEL_MSG_Q_SIZE (1024) +#define NET_ACCEL_MSG_Q_MASK (NET_ACCEL_MSG_Q_SIZE - 1) + +#ifdef NDEBUG +#define NET_ACCEL_CHECK_MAGIC(_p, _errval) +#define NET_ACCEL_SHOW_QUEUE(_t, _q, _id) +#else +#define NET_ACCEL_CHECK_MAGIC(_p, _errval) \ + if (_p->magic != NET_ACCEL_MSG_MAGIC) { \ + printk(KERN_ERR "%s: passed invalid shared page %p!\n", \ + __FUNCTION__, _p); \ + return _errval; \ + } +#define NET_ACCEL_SHOW_QUEUE(_t, _q, _id) \ + printk(_t ": queue %d write %x read %x base %x limit %x\n", \ + _id, _q->write, _q->read, _q->base, _q->limit); +#endif + +/* + * We've been passed at least 2 pages. 1 control page and 1 or more + * data pages. + */ +int net_accel_msg_init_page(void *mem, int len, int up) +{ + struct net_accel_shared_page *shared_page = + (struct net_accel_shared_page*)mem; + + if ((unsigned long)shared_page & NET_ACCEL_MSG_Q_MASK) + return -EINVAL; + + shared_page->magic = NET_ACCEL_MSG_MAGIC; + + shared_page->aflags = 0; + + shared_page->net_dev_up = up; + + return 0; +} +EXPORT_SYMBOL_GPL(net_accel_msg_init_page); + + +void net_accel_msg_init_queue(sh_msg_fifo2 *queue, + struct net_accel_msg_queue *indices, + struct net_accel_msg *base, int size) +{ + queue->fifo = base; + spin_lock_init(&queue->lock); + sh_fifo2_init(queue, size-1, &indices->read, &indices->write); +} +EXPORT_SYMBOL_GPL(net_accel_msg_init_queue); + + +static inline int _net_accel_msg_send(struct net_accel_shared_page *sp, + sh_msg_fifo2 *queue, + struct net_accel_msg *msg, + int is_reply) +{ + int rc = 0; + NET_ACCEL_CHECK_MAGIC(sp, -EINVAL); + rmb(); + if (is_reply) { + EPRINTK_ON(sh_fifo2_is_full(queue)); + sh_fifo2_put(queue, *msg); + } else { + if (sh_fifo2_not_half_full(queue)) { + sh_fifo2_put(queue, *msg); + } else { + rc = -ENOSPC; + } + } + wmb(); + return rc; +} + +/* Notify after a batch of messages have been sent */ +void net_accel_msg_notify(int irq) +{ + notify_remote_via_irq(irq); +} +EXPORT_SYMBOL_GPL(net_accel_msg_notify); + +/* + * Send a message on the specified FIFO. Returns 0 on success, -errno + * on failure. The message in msg is copied to the current slot of the + * FIFO. + */ +int net_accel_msg_send(struct net_accel_shared_page *sp, sh_msg_fifo2 *q, + struct net_accel_msg *msg) +{ + unsigned long flags; + int rc; + net_accel_msg_lock_queue(q, &flags); + rc = _net_accel_msg_send(sp, q, msg, 0); + net_accel_msg_unlock_queue(q, &flags); + return rc; +} +EXPORT_SYMBOL_GPL(net_accel_msg_send); + + +/* As net_accel_msg_send but also posts a notification to the far end. */ +int net_accel_msg_send_notify(struct net_accel_shared_page *sp, int irq, + sh_msg_fifo2 *q, struct net_accel_msg *msg) +{ + unsigned long flags; + int rc; + net_accel_msg_lock_queue(q, &flags); + rc = _net_accel_msg_send(sp, q, msg, 0); + net_accel_msg_unlock_queue(q, &flags); + if (rc >= 0) + notify_remote_via_irq(irq); + return rc; +} +EXPORT_SYMBOL_GPL(net_accel_msg_send_notify); + + +int net_accel_msg_reply(struct net_accel_shared_page *sp, sh_msg_fifo2 *q, + struct net_accel_msg *msg) +{ + unsigned long flags; + int rc; + net_accel_msg_lock_queue(q, &flags); + rc = _net_accel_msg_send(sp, q, msg, 1); + net_accel_msg_unlock_queue(q, &flags); + return rc; +} +EXPORT_SYMBOL_GPL(net_accel_msg_reply); + + +/* As net_accel_msg_send but also posts a notification to the far end. */ +int net_accel_msg_reply_notify(struct net_accel_shared_page *sp, int irq, + sh_msg_fifo2 *q, struct net_accel_msg *msg) +{ + unsigned long flags; + int rc; + net_accel_msg_lock_queue(q, &flags); + rc = _net_accel_msg_send(sp, q, msg, 1); + net_accel_msg_unlock_queue(q, &flags); + if (rc >= 0) + notify_remote_via_irq(irq); + return rc; +} +EXPORT_SYMBOL_GPL(net_accel_msg_reply_notify); + + +/* + * Look at a received message, if any, so a decision can be made about + * whether to read it now or not. Cookie is a bit of debug which is + * set here and checked when passed to net_accel_msg_recv_next() + */ +int net_accel_msg_peek(struct net_accel_shared_page *sp, + sh_msg_fifo2 *queue, + struct net_accel_msg *msg, int *cookie) +{ + unsigned long flags; + int rc = 0; + NET_ACCEL_CHECK_MAGIC(sp, -EINVAL); + net_accel_msg_lock_queue(queue, &flags); + rmb(); + if (sh_fifo2_is_empty(queue)) { + rc = -ENOENT; + } else { + *msg = sh_fifo2_peek(queue); + *cookie = *(queue->fifo_rd_i); + } + net_accel_msg_unlock_queue(queue, &flags); + return rc; +} +EXPORT_SYMBOL_GPL(net_accel_msg_peek); + + +/* + * Move the queue onto the next element, used after finished with a + * peeked msg + */ +int net_accel_msg_recv_next(struct net_accel_shared_page *sp, + sh_msg_fifo2 *queue, int cookie) +{ + unsigned long flags; + NET_ACCEL_CHECK_MAGIC(sp, -EINVAL); + net_accel_msg_lock_queue(queue, &flags); + rmb(); + /* Mustn't be empty */ + BUG_ON(sh_fifo2_is_empty(queue)); + /* + * Check cookie matches, i.e. we're advancing over the same message + * as was got using peek + */ + BUG_ON(cookie != *(queue->fifo_rd_i)); + sh_fifo2_rd_next(queue); + wmb(); + net_accel_msg_unlock_queue(queue, &flags); + return 0; +} +EXPORT_SYMBOL_GPL(net_accel_msg_recv_next); + + +/* + * Receive a message on the specified FIFO. Returns 0 on success, + * -errno on failure. + */ +int net_accel_msg_recv(struct net_accel_shared_page *sp, sh_msg_fifo2 *queue, + struct net_accel_msg *msg) +{ + unsigned long flags; + int rc = 0; + NET_ACCEL_CHECK_MAGIC(sp, -EINVAL); + net_accel_msg_lock_queue(queue, &flags); + rmb(); + if (sh_fifo2_is_empty(queue)) { + rc = -ENOENT; + } else { + sh_fifo2_get(queue, msg); + } + wmb(); + net_accel_msg_unlock_queue(queue, &flags); + return rc; +} +EXPORT_SYMBOL_GPL(net_accel_msg_recv); + + +/* + * Start sending a message without copying. returns a pointer to a message + * that will be filled out in place. The queue is locked until the message + * is sent. + */ +struct net_accel_msg *net_accel_msg_start_send(struct net_accel_shared_page *sp, + sh_msg_fifo2 *queue, unsigned long *flags) +{ + struct net_accel_msg *msg; + NET_ACCEL_CHECK_MAGIC(sp, NULL); + net_accel_msg_lock_queue(queue, flags); + rmb(); + if (sh_fifo2_not_half_full(queue)) { + msg = sh_fifo2_pokep(queue); + } else { + net_accel_msg_unlock_queue(queue, flags); + msg = NULL; + } + return msg; +} +EXPORT_SYMBOL_GPL(net_accel_msg_start_send); + + +static inline void _msg_complete(struct net_accel_shared_page *sp, + sh_msg_fifo2 *queue, + unsigned long *flags) +{ + sh_fifo2_wr_next(queue); + net_accel_msg_unlock_queue(queue, flags); +} + +/* + * Complete the sending of a message started with net_accel_msg_start_send. The + * message is implicit since the queue was locked by _start + */ +void net_accel_msg_complete_send(struct net_accel_shared_page *sp, + sh_msg_fifo2 *queue, + unsigned long *flags) +{ + _msg_complete(sp, queue, flags); +} +EXPORT_SYMBOL_GPL(net_accel_msg_complete_send); + +/* As net_accel_msg_complete_send but does the notify. */ +void net_accel_msg_complete_send_notify(struct net_accel_shared_page *sp, + sh_msg_fifo2 *queue, + unsigned long *flags, int irq) +{ + _msg_complete(sp, queue, flags); + notify_remote_via_irq(irq); +} +EXPORT_SYMBOL_GPL(net_accel_msg_complete_send_notify); diff -r dd748ded9ba8 drivers/xen/sfc_netutil/accel_msg_iface.h --- /dev/null +++ b/drivers/xen/sfc_netutil/accel_msg_iface.h @@ -0,0 +1,414 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef NET_ACCEL_MSG_IFACE_H +#define NET_ACCEL_MSG_IFACE_H + +#include +#include +#include +#include +#include +#include + +#include "accel_shared_fifo.h" + +#define NET_ACCEL_MSG_MAGIC (0x85465479) + +/*! We talk version 0.010 of the interdomain protocol */ +#define NET_ACCEL_MSG_VERSION (0x00001000) + +/*! Shared memory portion of inter-domain FIFO */ +struct net_accel_msg_queue { + u32 read; + u32 write; +}; + + +/* + * The aflags in the following structure is used as follows: + * + * - each bit is set when one of the corresponding variables is + * changed by either end. + * + * - the end that has made the change then forwards an IRQ to the + * other + * + * - the IRQ handler deals with these bits either on the fast path, or + * for less common changes, by jumping onto the slow path. + * + * - once it has seen a change, it clears the relevant bit. + * + * aflags is accessed atomically using clear_bit, test_bit, + * test_and_set_bit etc + */ + +/* + * The following used to signify to the other domain when the queue + * they want to use is full, and when it is no longer full. Could be + * compressed to use fewer bits but done this way for simplicity and + * clarity + */ + +/* "dom0->domU queue" is full */ +#define NET_ACCEL_MSG_AFLAGS_QUEUE0FULL 0x1 +#define NET_ACCEL_MSG_AFLAGS_QUEUE0FULL_B 0 +/* "dom0->domU queue" is not full */ +#define NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL 0x2 +#define NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL_B 1 +/* "domU->dom0 queue" is full */ +#define NET_ACCEL_MSG_AFLAGS_QUEUEUFULL 0x4 +#define NET_ACCEL_MSG_AFLAGS_QUEUEUFULL_B 2 +/* "domU->dom0 queue" is not full */ +#define NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL 0x8 +#define NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL_B 3 +/* dom0 -> domU net_dev up/down events */ +#define NET_ACCEL_MSG_AFLAGS_NETUPDOWN 0x10 +#define NET_ACCEL_MSG_AFLAGS_NETUPDOWN_B 4 + +/* + * Masks used to test if there are any messages for domU and dom0 + * respectively + */ +#define NET_ACCEL_MSG_AFLAGS_TO_DOMU_MASK \ + (NET_ACCEL_MSG_AFLAGS_QUEUE0FULL | \ + NET_ACCEL_MSG_AFLAGS_QUEUEUNOTFULL | \ + NET_ACCEL_MSG_AFLAGS_NETUPDOWN) +#define NET_ACCEL_MSG_AFLAGS_TO_DOM0_MASK \ + (NET_ACCEL_MSG_AFLAGS_QUEUE0NOTFULL | \ + NET_ACCEL_MSG_AFLAGS_QUEUEUFULL) + +/*! The shared data structure used for inter-VM communication. */ +struct net_accel_shared_page { + /*! Sanity check */ + u32 magic; + /*! Used by host/Dom0 */ + struct net_accel_msg_queue queue0; + /*! Used by guest/DomU */ + struct net_accel_msg_queue queue1; + /*! Atomic flags, used to communicate simple state changes */ + u32 aflags; + /*! State of net_dev used for acceleration */ + u32 net_dev_up; +}; + + +enum net_accel_hw_type { + /*! Not a virtualisable NIC: use slow path. */ + NET_ACCEL_MSG_HWTYPE_NONE = 0, + /*! NIC is Falcon-based */ + NET_ACCEL_MSG_HWTYPE_FALCON_A = 1, + NET_ACCEL_MSG_HWTYPE_FALCON_B = 2, +}; + +/*! The maximum number of pages used by an event queue. */ +#define EF_HW_FALCON_EVQ_PAGES 8 + +struct net_accel_hw_falcon_b { + /* VI */ + /*! Grant for Tx DMA Q */ + u32 txdmaq_gnt; + /*! Grant for Rx DMA Q */ + u32 rxdmaq_gnt; + /*! Machine frame number for Tx/Rx doorbell page */ + u32 doorbell_mfn; + /*! Grant for Tx/Rx doorbell page */ + u32 doorbell_gnt; + + /* Event Q */ + /*! Grants for the pages of the EVQ */ + u32 evq_mem_gnts[EF_HW_FALCON_EVQ_PAGES]; + u32 evq_offs; + /*! log2(pages in event Q) */ + u32 evq_order; + /*! Capacity in events */ + u32 evq_capacity; + /*! Eventq pointer register physical address */ + u32 evq_rptr; + /*! Interface instance */ + u32 instance; + /*! Capacity of RX queue */ + u32 rx_capacity; + /*! Capacity of TX queue */ + u32 tx_capacity; + + /* NIC */ + s32 nic_arch; + s32 nic_revision; + u8 nic_variant; +}; + +struct net_accel_hw_falcon_a { + struct net_accel_hw_falcon_b common; + u32 evq_rptr_gnt; +}; + + +/*! Description of the hardware that the DomU is being given. */ +struct net_accel_msg_hw { + u32 type; /*!< Hardware type */ + union { + struct net_accel_hw_falcon_a falcon_a; + struct net_accel_hw_falcon_b falcon_b; + } resources; +}; + +/*! Start-of-day handshake message. Dom0 fills in its version and + * sends, DomU checks, inserts its version and replies + */ +struct net_accel_msg_hello { + /*! Sender's version (set by each side in turn) */ + u32 version; + /*! max pages allocated/allowed for buffers */ + u32 max_pages; +}; + +/*! Maximum number of page requests that can fit in a message. */ +#define NET_ACCEL_MSG_MAX_PAGE_REQ (8) + +/*! Request for NIC buffers. DomU fils out pages and grants (and + * optionally) reqid, dom0 fills out buf and sends reply + */ +struct net_accel_msg_map_buffers { + u32 reqid; /*!< Optional request ID */ + u32 pages; /*!< Number of pages to map */ + u32 grants[NET_ACCEL_MSG_MAX_PAGE_REQ]; /*!< Grant ids to map */ + u32 buf; /*!< NIC buffer address of pages obtained */ +}; + +/*! Notification of a change to local mac address, used to filter + locally destined packets off the fast path */ +struct net_accel_msg_localmac { + u32 flags; /*!< Should this be added or removed? */ + u8 mac[ETH_ALEN]; /*!< The mac address to filter onto slow path */ +}; + +struct net_accel_msg_fastpath { + u32 flags; /*!< Should this be added or removed? */ + u8 mac[ETH_ALEN];/*!< The mac address to filter onto fast path */ + u16 port; /*!< The port of the connection */ + u32 ip; /*!< The IP address of the connection */ + u8 proto; /*!< The protocol of connection (TCP/UDP) */ +}; + +/*! Values for struct ef_msg_localmac/fastpath.flags */ +#define NET_ACCEL_MSG_ADD 0x1 +#define NET_ACCEL_MSG_REMOVE 0x2 + +/*! Overall message structure */ +struct net_accel_msg { + /*! ID specifying type of messge */ + u32 id; + union { + /*! handshake */ + struct net_accel_msg_hello hello; + /*! hardware description */ + struct net_accel_msg_hw hw; + /*! buffer map request */ + struct net_accel_msg_map_buffers mapbufs; + /*! mac address of a local interface */ + struct net_accel_msg_localmac localmac; + /*! address of a new fastpath connection */ + struct net_accel_msg_fastpath fastpath; + /*! make the message a fixed size */ + u8 pad[128 - sizeof(u32)]; + } u; +}; + + +#define NET_ACCEL_MSG_HW_TO_MSG(_u) container_of(_u, struct net_accel_msg, u.hw) + +/*! Inter-domain message FIFO */ +typedef struct { + struct net_accel_msg *fifo; + u32 fifo_mask; + u32 *fifo_rd_i; + u32 *fifo_wr_i; + spinlock_t lock; + u32 is_locked; /* Debug flag */ +} sh_msg_fifo2; + + +#define NET_ACCEL_MSG_OFFSET_MASK PAGE_MASK + +/* Modifiers */ +#define NET_ACCEL_MSG_REPLY (0x80000000) +#define NET_ACCEL_MSG_ERROR (0x40000000) + +/* Dom0 -> DomU and reply. Handshake/version check. */ +#define NET_ACCEL_MSG_HELLO (0x00000001) +/* Dom0 -> DomU : hardware setup (VI info.) */ +#define NET_ACCEL_MSG_SETHW (0x00000002) +/* + * Dom0 -> DomU. Notification of a local mac to add/remove from slow + * path filter + */ +#define NET_ACCEL_MSG_LOCALMAC (0x00000003) +/* + * DomU -> Dom0 and reply. Request for buffer table entries for + * preallocated pages. + */ +#define NET_ACCEL_MSG_MAPBUF (0x00000004) +/* + * Dom0 -> DomU. Notification of a local mac to add/remove from fast + * path filter + */ +#define NET_ACCEL_MSG_FASTPATH (0x00000005) + +/*! Initialise a message and set the type + * \param message : the message + * \param code : the message type + */ +static inline void net_accel_msg_init(struct net_accel_msg *msg, int code) { + msg->id = (u32)code; +} + +/*! initialise a shared page structure + * \param shared_page : mapped memory in which the structure resides + * \param len : size of the message FIFO area that follows + * \param up : initial up/down state of netdev + * \return 0 or an error code + */ +extern int net_accel_msg_init_page(void *shared_page, int len, int up); + +/*! initialise a message queue + * \param queue : the message FIFO to initialise + * \param indices : the read and write indices in shared memory + * \param base : the start of the memory area for the FIFO + * \param size : the size of the FIFO in bytes + */ +extern void net_accel_msg_init_queue(sh_msg_fifo2 *queue, + struct net_accel_msg_queue *indices, + struct net_accel_msg *base, int size); + +/* Notify after a batch of messages have been sent */ +extern void net_accel_msg_notify(int irq); + +/*! Send a message on the specified FIFO. The message is copied to the + * current slot of the FIFO. + * \param sp : pointer to shared page + * \param q : pointer to message FIFO to use + * \param msg : pointer to message + * \return 0 on success, -errno on + */ +extern int net_accel_msg_send(struct net_accel_shared_page *sp, + sh_msg_fifo2 *q, + struct net_accel_msg *msg); +extern int net_accel_msg_reply(struct net_accel_shared_page *sp, + sh_msg_fifo2 *q, + struct net_accel_msg *msg); + +/*! As net_accel_msg_send but also posts a notification to the far end. */ +extern int net_accel_msg_send_notify(struct net_accel_shared_page *sp, + int irq, sh_msg_fifo2 *q, + struct net_accel_msg *msg); +/*! As net_accel_msg_send but also posts a notification to the far end. */ +extern int net_accel_msg_reply_notify(struct net_accel_shared_page *sp, + int irq, sh_msg_fifo2 *q, + struct net_accel_msg *msg); + +/*! Receive a message on the specified FIFO. Returns 0 on success, + * -errno on failure. + */ +extern int net_accel_msg_recv(struct net_accel_shared_page *sp, + sh_msg_fifo2 *q, + struct net_accel_msg *msg); + +/*! Look at a received message, if any, so a decision can be made + * about whether to read it now or not. Cookie is a bit of debug + * which is set here and checked when passed to + * net_accel_msg_recv_next() + */ +extern int net_accel_msg_peek(struct net_accel_shared_page *sp, + sh_msg_fifo2 *queue, + struct net_accel_msg *msg, int *cookie); +/*! Move the queue onto the next element, used after finished with a + * peeked msg + */ +extern int net_accel_msg_recv_next(struct net_accel_shared_page *sp, + sh_msg_fifo2 *queue, int cookie); + +/*! Start sending a message without copying. returns a pointer to a + * message that will be filled out in place. The queue is locked + * until the message is sent. + */ +extern +struct net_accel_msg *net_accel_msg_start_send(struct net_accel_shared_page *sp, + sh_msg_fifo2 *queue, + unsigned long *flags); + + +/*! Complete the sending of a message started with + * net_accel_msg_start_send. The message is implicit since the queue + * was locked by _start + */ +extern void net_accel_msg_complete_send(struct net_accel_shared_page *sp, + sh_msg_fifo2 *queue, + unsigned long *flags); + +/*! As net_accel_msg_complete_send but does the notify. */ +extern void net_accel_msg_complete_send_notify(struct net_accel_shared_page *sp, + sh_msg_fifo2 *queue, + unsigned long *flags, int irq); + +/*! Lock the queue so that multiple "_locked" functions can be called + * without the queue being modified by others + */ +static inline +void net_accel_msg_lock_queue(sh_msg_fifo2 *queue, unsigned long *flags) +{ + spin_lock_irqsave(&queue->lock, (*flags)); + rmb(); + BUG_ON(queue->is_locked); + queue->is_locked = 1; +} + +/*! Unlock the queue */ +static inline +void net_accel_msg_unlock_queue(sh_msg_fifo2 *queue, unsigned long *flags) +{ + BUG_ON(!queue->is_locked); + queue->is_locked = 0; + wmb(); + spin_unlock_irqrestore(&queue->lock, (*flags)); +} + +/*! Give up without sending a message that was started with + * net_accel_msg_start_send() + */ +static inline +void net_accel_msg_abort_send(struct net_accel_shared_page *sp, + sh_msg_fifo2 *queue, unsigned long *flags) +{ + net_accel_msg_unlock_queue(queue, flags); +} + +/*! Test the queue to ensure there is sufficient space */ +static inline +int net_accel_msg_check_space(sh_msg_fifo2 *queue, unsigned space) +{ + return sh_fifo2_space(queue) >= space; +} + +#endif /* NET_ACCEL_MSG_IFACE_H */ diff -r dd748ded9ba8 drivers/xen/sfc_netutil/accel_shared_fifo.h --- /dev/null +++ b/drivers/xen/sfc_netutil/accel_shared_fifo.h @@ -0,0 +1,127 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef NET_ACCEL_SHARED_FIFO_H +#define NET_ACCEL_SHARED_FIFO_H + +/* + * This is based on fifo.h, but handles sharing between address spaces + * that don't trust each other, by splitting out the read and write + * indices. This costs at least one pointer indirection more than the + * vanilla version per access. + */ + +typedef struct { + char* fifo; + unsigned fifo_mask; + unsigned *fifo_rd_i; + unsigned *fifo_wr_i; +} sh_byte_fifo2; + +#define SH_FIFO2_M(f, x) ((x) & ((f)->fifo_mask)) + +static inline unsigned log2_ge(unsigned long n, unsigned min_order) { + unsigned order = min_order; + while((1ul << order) < n) ++order; + return order; +} + +static inline unsigned long pow2(unsigned order) { + return (1ul << order); +} + +#define is_pow2(x) (pow2(log2_ge((x), 0)) == (x)) + +#define sh_fifo2_valid(f) ((f) && (f)->fifo && (f)->fifo_mask > 0 && \ + is_pow2((f)->fifo_mask+1u)) + +#define sh_fifo2_init(f, cap, _rptr, _wptr) \ + do { \ + BUG_ON(!is_pow2((cap) + 1)); \ + (f)->fifo_rd_i = _rptr; \ + (f)->fifo_wr_i = _wptr; \ + *(f)->fifo_rd_i = *(f)->fifo_wr_i = 0u; \ + (f)->fifo_mask = (cap); \ + } while(0) + +#define sh_fifo2_num(f) SH_FIFO2_M((f),*(f)->fifo_wr_i - *(f)->fifo_rd_i) +#define sh_fifo2_space(f) SH_FIFO2_M((f),*(f)->fifo_rd_i - *(f)->fifo_wr_i-1u) +#define sh_fifo2_is_empty(f) (sh_fifo2_num(f)==0) +#define sh_fifo2_not_empty(f) (sh_fifo2_num(f)!=0) +#define sh_fifo2_is_full(f) (sh_fifo2_space(f)==0u) +#define sh_fifo2_not_full(f) (sh_fifo2_space(f)!=0u) +#define sh_fifo2_buf_size(f) ((f)->fifo_mask + 1u) +#define sh_fifo2_capacity(f) ((f)->fifo_mask) +#define sh_fifo2_end(f) ((f)->fifo + sh_fifo2_buf_size(f)) +#define sh_fifo2_not_half_full(f) (sh_fifo2_space(f) > (sh_fifo2_capacity(f) >> 1)) + +#define sh_fifo2_peek(f) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_rd_i)]) +#define sh_fifo2_peekp(f) ((f)->fifo + SH_FIFO2_M((f), *(f)->fifo_rd_i)) +#define sh_fifo2_poke(f) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_wr_i)]) +#define sh_fifo2_pokep(f) ((f)->fifo + SH_FIFO2_M((f), *(f)->fifo_wr_i)) +#define sh_fifo2_peek_i(f,i) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_rd_i+(i))]) +#define sh_fifo2_poke_i(f,i) ((f)->fifo[SH_FIFO2_M((f), *(f)->fifo_wr_i+(i))]) + +#define sh_fifo2_rd_next(f) \ + do {*(f)->fifo_rd_i = *(f)->fifo_rd_i + 1u;} while(0) +#define sh_fifo2_wr_next(f) \ + do {*(f)->fifo_wr_i = *(f)->fifo_wr_i + 1u;} while(0) +#define sh_fifo2_rd_adv(f, n) \ + do {*(f)->fifo_rd_i = *(f)->fifo_rd_i + (n);} while(0) +#define sh_fifo2_wr_adv(f, n) \ + do {*(f)->fifo_wr_i = *(f)->fifo_wr_i + (n);} while(0) + +#define sh_fifo2_put(f, v) \ + do {sh_fifo2_poke(f) = (v); wmb(); sh_fifo2_wr_next(f);} while(0) + +#define sh_fifo2_get(f, pv) \ + do {*(pv) = sh_fifo2_peek(f); mb(); sh_fifo2_rd_next(f);} while(0) + +static inline unsigned sh_fifo2_contig_num(sh_byte_fifo2 *f) +{ + unsigned fifo_wr_i = SH_FIFO2_M(f, *f->fifo_wr_i); + unsigned fifo_rd_i = SH_FIFO2_M(f, *f->fifo_rd_i); + + return (fifo_wr_i >= fifo_rd_i) + ? fifo_wr_i - fifo_rd_i + : f->fifo_mask + 1u - *(f)->fifo_rd_i; +} + +static inline unsigned sh_fifo2_contig_space(sh_byte_fifo2 *f) +{ + unsigned fifo_wr_i = SH_FIFO2_M(f, *f->fifo_wr_i); + unsigned fifo_rd_i = SH_FIFO2_M(f, *f->fifo_rd_i); + + return (fifo_rd_i > fifo_wr_i) + ? fifo_rd_i - fifo_wr_i - 1 + : (f->fifo_mask + 1u - fifo_wr_i + /* + * The last byte can't be used if the read pointer + * is at zero. + */ + - (fifo_rd_i==0)); +} + + +#endif /* NET_ACCEL_SHARED_FIFO_H */ diff -r dd748ded9ba8 drivers/xen/sfc_netutil/accel_util.c --- /dev/null +++ b/drivers/xen/sfc_netutil/accel_util.c @@ -0,0 +1,333 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "accel_util.h" + +#ifdef EFX_GCOV +#include "gcov.h" + +static int __init net_accel_init(void) +{ + gcov_provider_init(THIS_MODULE); + return 0; +} +module_init(net_accel_init); + +static void __exit net_accel_exit(void) +{ + gcov_provider_fini(THIS_MODULE); +} +module_exit(net_accel_exit); +#endif + +/* Shutdown remote domain that is misbehaving */ +int net_accel_shutdown_remote(int domain) +{ + struct sched_remote_shutdown sched_shutdown = { + .domain_id = domain, + .reason = SHUTDOWN_crash + }; + + EPRINTK("Crashing domain %d\n", domain); + + return HYPERVISOR_sched_op(SCHEDOP_remote_shutdown, &sched_shutdown); +} +EXPORT_SYMBOL(net_accel_shutdown_remote); + + +/* Based on xenbus_backend_client.c:xenbus_map_ring() */ +static int net_accel_map_grant(struct xenbus_device *dev, int gnt_ref, + grant_handle_t *handle, void *vaddr, + u64 *dev_bus_addr, unsigned flags) +{ + struct gnttab_map_grant_ref op; + + gnttab_set_map_op(&op, (unsigned long)vaddr, flags, + gnt_ref, dev->otherend_id); + + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)); + + if (op.status != GNTST_okay) { + xenbus_dev_error + (dev, op.status, + "failed mapping in shared page %d from domain %d\n", + gnt_ref, dev->otherend_id); + } else { + *handle = op.handle; + if (dev_bus_addr) + *dev_bus_addr = op.dev_bus_addr; + } + + return op.status; +} + + +/* Based on xenbus_backend_client.c:xenbus_unmap_ring() */ +static int net_accel_unmap_grant(struct xenbus_device *dev, + grant_handle_t handle, + void *vaddr, u64 dev_bus_addr, + unsigned flags) +{ + struct gnttab_unmap_grant_ref op; + + gnttab_set_unmap_op(&op, (unsigned long)vaddr, flags, handle); + + if (dev_bus_addr) + op.dev_bus_addr = dev_bus_addr; + + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)); + + if (op.status != GNTST_okay) + xenbus_dev_error(dev, op.status, + "failed unmapping page at handle %d error %d\n", + handle, op.status); + + return op.status; +} + + +int net_accel_map_device_page(struct xenbus_device *dev, + int gnt_ref, grant_handle_t *handle, + u64 *dev_bus_addr) +{ + return net_accel_map_grant(dev, gnt_ref, handle, 0, dev_bus_addr, + GNTMAP_device_map); +} +EXPORT_SYMBOL_GPL(net_accel_map_device_page); + + +int net_accel_unmap_device_page(struct xenbus_device *dev, + grant_handle_t handle, u64 dev_bus_addr) +{ + return net_accel_unmap_grant(dev, handle, 0, dev_bus_addr, + GNTMAP_device_map); +} +EXPORT_SYMBOL_GPL(net_accel_unmap_device_page); + + +struct net_accel_valloc_grant_mapping { + struct vm_struct *vm; + int pages; + grant_handle_t grant_handles[0]; +}; + +/* Map a series of grants into a contiguous virtual area */ +static void *net_accel_map_grants_valloc(struct xenbus_device *dev, + unsigned *grants, int npages, + unsigned flags, void **priv) +{ + struct net_accel_valloc_grant_mapping *map; + struct vm_struct *vm; + void *addr; + int i, j, rc; + + vm = alloc_vm_area(PAGE_SIZE * npages); + if (vm == NULL) { + EPRINTK("No memory from alloc_vm_area.\n"); + return NULL; + } + /* + * Get a structure in which we will record all the info needed + * to undo the mapping. + */ + map = kzalloc(sizeof(struct net_accel_valloc_grant_mapping) + + npages * sizeof(grant_handle_t), GFP_KERNEL); + if (map == NULL) { + EPRINTK("No memory for net_accel_valloc_grant_mapping\n"); + free_vm_area(vm); + return NULL; + } + map->vm = vm; + map->pages = npages; + + /* Do the actual mapping */ + addr = vm->addr; + for (i = 0; i < npages; i++) { + rc = net_accel_map_grant(dev, grants[i], map->grant_handles + i, + addr, NULL, flags); + if (rc != 0) + goto undo; + addr = (void*)((unsigned long)addr + PAGE_SIZE); + } + + if (priv) + *priv = (void *)map; + else + kfree(map); + + return vm->addr; + + undo: + EPRINTK("Aborting contig map due to single map failure %d (%d of %d)\n", + rc, i+1, npages); + for (j = 0; j < i; j++) { + addr = (void*)((unsigned long)vm->addr + (j * PAGE_SIZE)); + net_accel_unmap_grant(dev, map->grant_handles[j], addr, 0, + flags); + } + free_vm_area(vm); + kfree(map); + return NULL; +} + +/* Undo the result of the mapping */ +static void net_accel_unmap_grants_vfree(struct xenbus_device *dev, + unsigned flags, void *priv) +{ + struct net_accel_valloc_grant_mapping *map = + (struct net_accel_valloc_grant_mapping *)priv; + + void *addr = map->vm->addr; + int npages = map->pages; + int i; + + for (i = 0; i < npages; i++) { + net_accel_unmap_grant(dev, map->grant_handles[i], addr, 0, + flags); + addr = (void*)((unsigned long)addr + PAGE_SIZE); + } + free_vm_area(map->vm); + kfree(map); +} + + +void *net_accel_map_grants_contig(struct xenbus_device *dev, + unsigned *grants, int npages, + void **priv) +{ + return net_accel_map_grants_valloc(dev, grants, npages, + GNTMAP_host_map, priv); +} +EXPORT_SYMBOL(net_accel_map_grants_contig); + + +void net_accel_unmap_grants_contig(struct xenbus_device *dev, + void *priv) +{ + net_accel_unmap_grants_vfree(dev, GNTMAP_host_map, priv); +} +EXPORT_SYMBOL(net_accel_unmap_grants_contig); + + +void *net_accel_map_iomem_page(struct xenbus_device *dev, int gnt_ref, + void **priv) +{ + return net_accel_map_grants_valloc(dev, &gnt_ref, 1, + GNTMAP_host_map, priv); +} +EXPORT_SYMBOL(net_accel_map_iomem_page); + + +void net_accel_unmap_iomem_page(struct xenbus_device *dev, void *priv) +{ + net_accel_unmap_grants_vfree(dev, GNTMAP_host_map, priv); +} +EXPORT_SYMBOL(net_accel_unmap_iomem_page); + + +int net_accel_grant_page(struct xenbus_device *dev, unsigned long mfn, + int is_iomem) +{ + int err = gnttab_grant_foreign_access(dev->otherend_id, mfn, + is_iomem ? GTF_PCD : 0); + if (err < 0) + xenbus_dev_error(dev, err, "failed granting access to page\n"); + return err; +} +EXPORT_SYMBOL_GPL(net_accel_grant_page); + + +int net_accel_ungrant_page(grant_ref_t gntref) +{ + if (unlikely(gnttab_query_foreign_access(gntref) != 0)) { + EPRINTK("%s: remote domain still using grant %d\n", __FUNCTION__, + gntref); + return -EBUSY; + } + + gnttab_end_foreign_access(gntref, 0); + return 0; +} +EXPORT_SYMBOL_GPL(net_accel_ungrant_page); + + +int net_accel_xen_net_read_mac(struct xenbus_device *dev, u8 mac[]) +{ + char *s, *e, *macstr; + int i; + + macstr = s = xenbus_read(XBT_NIL, dev->nodename, "mac", NULL); + if (IS_ERR(macstr)) + return PTR_ERR(macstr); + + for (i = 0; i < ETH_ALEN; i++) { + mac[i] = simple_strtoul(s, &e, 16); + if ((s == e) || (*e != ((i == ETH_ALEN-1) ? '\0' : ':'))) { + kfree(macstr); + return -ENOENT; + } + s = e+1; + } + + kfree(macstr); + return 0; +} +EXPORT_SYMBOL_GPL(net_accel_xen_net_read_mac); + + +void net_accel_update_state(struct xenbus_device *dev, int state) +{ + struct xenbus_transaction tr; + int err; + + DPRINTK("%s: setting accelstate to %s\n", __FUNCTION__, + xenbus_strstate(state)); + + if (xenbus_exists(XBT_NIL, dev->nodename, "")) { + VPRINTK("%s: nodename %s\n", __FUNCTION__, dev->nodename); + again: + err = xenbus_transaction_start(&tr); + if (err == 0) + err = xenbus_printf(tr, dev->nodename, "accelstate", + "%d", state); + if (err != 0) { + xenbus_transaction_end(tr, 1); + } else { + err = xenbus_transaction_end(tr, 0); + if (err == -EAGAIN) + goto again; + } + } +} +EXPORT_SYMBOL_GPL(net_accel_update_state); + +MODULE_LICENSE("GPL"); diff -r dd748ded9ba8 drivers/xen/sfc_netutil/accel_util.h --- /dev/null +++ b/drivers/xen/sfc_netutil/accel_util.h @@ -0,0 +1,127 @@ +/**************************************************************************** + * Solarflare driver for Xen network acceleration + * + * Copyright 2006-2008: Solarflare Communications Inc, + * 9501 Jeronimo Road, Suite 250, + * Irvine, CA 92618, USA + * + * Maintained by Solarflare Communications + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation, incorporated herein by reference. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + **************************************************************************** + */ + +#ifndef NETBACK_ACCEL_UTIL_H +#define NETBACK_ACCEL_UTIL_H + +#ifdef DPRINTK +#undef DPRINTK +#endif + +#define FILE_LEAF strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__ + +#if 1 +#define VPRINTK(_f, _a...) +#else +#define VPRINTK(_f, _a...) \ + printk("(file=%s, line=%d) " _f, \ + FILE_LEAF , __LINE__ , ## _a ) +#endif + +#if 1 +#define DPRINTK(_f, _a...) +#else +#define DPRINTK(_f, _a...) \ + printk("(file=%s, line=%d) " _f, \ + FILE_LEAF , __LINE__ , ## _a ) +#endif + +#define EPRINTK(_f, _a...) \ + printk("(file=%s, line=%d) " _f, \ + FILE_LEAF , __LINE__ , ## _a ) + +#define EPRINTK_ON(exp) \ + do { \ + if (exp) \ + EPRINTK("%s at %s:%d\n", #exp, __FILE__, __LINE__); \ + } while(0) + +#define DPRINTK_ON(exp) \ + do { \ + if (exp) \ + DPRINTK("%s at %s:%d\n", #exp, __FILE__, __LINE__); \ + } while(0) + +#define MAC_FMT "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x" +#define MAC_ARG(_mac) (_mac)[0], (_mac)[1], (_mac)[2], (_mac)[3], (_mac)[4], (_mac)[5] + +#include + +/*! Map a set of pages from another domain + * \param dev The xenbus device context + * \param priv The private data returned by the mapping function + */ +extern +void *net_accel_map_grants_contig(struct xenbus_device *dev, + unsigned *grants, int npages, + void **priv); + +/*! Unmap a set of pages mapped using net_accel_map_grants_contig. + * \param dev The xenbus device context + * \param priv The private data returned by the mapping function + */ +extern +void net_accel_unmap_grants_contig(struct xenbus_device *dev, void *priv); + +/*! Read the MAC address of a device from xenstore */ +extern +int net_accel_xen_net_read_mac(struct xenbus_device *dev, u8 mac[]); + +/*! Update the accelstate field for a device in xenstore */ +extern +void net_accel_update_state(struct xenbus_device *dev, int state); + +/* These four map/unmap functions are based on + * xenbus_backend_client.c:xenbus_map_ring(). However, they are not + * used for ring buffers, instead just to map pages between domains, + * or to map a page so that it is accessible by a device + */ +extern +int net_accel_map_device_page(struct xenbus_device *dev, + int gnt_ref, grant_handle_t *handle, + u64 *dev_bus_addr); +extern +int net_accel_unmap_device_page(struct xenbus_device *dev, + grant_handle_t handle, u64 dev_bus_addr); +extern +void *net_accel_map_iomem_page(struct xenbus_device *dev, int gnt_ref, + void **priv); +extern +void net_accel_unmap_iomem_page(struct xenbus_device *dev, void *priv); + +/*! Grrant a page to remote domain */ +extern +int net_accel_grant_page(struct xenbus_device *dev, unsigned long mfn, + int is_iomem); +/*! Undo a net_accel_grant_page */ +extern +int net_accel_ungrant_page(grant_ref_t gntref); + + +/*! Shutdown remote domain that is misbehaving */ +extern +int net_accel_shutdown_remote(int domain); + + +#endif