WARNING - OLD ARCHIVES

This is an archived copy of the Xen.org mailing list, which we have preserved to ensure that existing links to archives are not broken. The live archive, which contains the latest emails, can be found at http://lists.xen.org/
   
 
 
Xen 
 
Home Products Support Community News
 
   
 

xen-devel

[Xen-devel] [PATCH 21/22] NC2 VMQ support.

This only includes the transmit half, because the receiver uses an
unmodified posted buffers mode implementation.

This includes various bits of patches which were

Signed-off-by: Jose Renato Santos <jsantos@xxxxxxxxxx>
Signed-off-by: Mitch Williams <mitch.a.williams@xxxxxxxxx>
Signed-off-by: Steven Smith <steven.smith@xxxxxxxxxx>

All bugs are mine, of course.
---
 drivers/xen/Kconfig                        |    5 +
 drivers/xen/netchannel2/Makefile           |    4 +
 drivers/xen/netchannel2/chan.c             |    7 +-
 drivers/xen/netchannel2/netback2.c         |    9 +
 drivers/xen/netchannel2/netchannel2_core.h |   10 +
 drivers/xen/netchannel2/posted_buffer.h    |   50 ++
 drivers/xen/netchannel2/posted_buffers.c   |   20 +-
 drivers/xen/netchannel2/util.c             |    8 +-
 drivers/xen/netchannel2/vmq.c              |  805 ++++++++++++++++++++++++++++
 drivers/xen/netchannel2/vmq.h              |   58 ++
 drivers/xen/netchannel2/vmq_def.h          |   68 +++
 drivers/xen/netchannel2/xmit_packet.c      |    6 +
 12 files changed, 1029 insertions(+), 21 deletions(-)
 create mode 100644 drivers/xen/netchannel2/posted_buffer.h
 create mode 100644 drivers/xen/netchannel2/vmq.c
 create mode 100644 drivers/xen/netchannel2/vmq.h
 create mode 100644 drivers/xen/netchannel2/vmq_def.h

diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index a7e5b5c..a37b0cd 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -234,6 +234,11 @@ config XEN_NETDEV2_FRONTEND
        depends on XEN_NETCHANNEL2
        default y
 
+config XEN_NETDEV2_VMQ
+       bool "Net channel 2 support for multi-queue devices"
+       depends on XEN_NETDEV2_BACKEND && NET_VMQ
+       default y
+
 config XEN_NETDEV2_BYPASSABLE
        bool "Net channel 2 bypassee support"
        depends on XEN_NETDEV2_BACKEND
diff --git a/drivers/xen/netchannel2/Makefile b/drivers/xen/netchannel2/Makefile
index 11a257e..918d8d8 100644
--- a/drivers/xen/netchannel2/Makefile
+++ b/drivers/xen/netchannel2/Makefile
@@ -12,6 +12,10 @@ ifeq ($(CONFIG_XEN_NETDEV2_FRONTEND),y)
 netchannel2-objs += netfront2.o
 endif
 
+ifeq ($(CONFIG_XEN_NETDEV2_VMQ),y)
+netchannel2-objs += vmq.o
+endif
+
 ifeq ($(CONFIG_XEN_NETDEV2_BYPASSABLE),y)
 netchannel2-objs += bypassee.o
 endif
diff --git a/drivers/xen/netchannel2/chan.c b/drivers/xen/netchannel2/chan.c
index 060b49b..8dad6fe 100644
--- a/drivers/xen/netchannel2/chan.c
+++ b/drivers/xen/netchannel2/chan.c
@@ -13,6 +13,7 @@
 
 #include "netchannel2_endpoint.h"
 #include "netchannel2_core.h"
+#include "vmq.h"
 
 static int process_ring(struct napi_struct *napi,
                        int work_avail);
@@ -810,6 +811,8 @@ static int process_ring(struct napi_struct *napi,
        /* Pick up incoming messages. */
        work_done = nc2_poll(ncrp, work_avail, &rx_queue);
 
+       do_vmq_work(nc);
+
        /* Transmit pending packets. */
        if (!skb_queue_empty(&ncrp->pending_tx_queue)) {
                skb = __skb_dequeue(&ncrp->pending_tx_queue);
@@ -828,9 +831,11 @@ static int process_ring(struct napi_struct *napi,
                   This must happen before we flush the rings, since
                   that's when the PACKET messages will be made
                   visible to the other end. */
-               if (ncrp == &nc->rings)
+               if (ncrp == &nc->rings) {
                        flush_hypercall_batcher(&nc->batcher,
                                                nc2_posted_on_gntcopy_fail);
+                       vmq_flush_unmap_hypercall();
+               }
 
                flush_rings(ncrp);
 
diff --git a/drivers/xen/netchannel2/netback2.c 
b/drivers/xen/netchannel2/netback2.c
index 129ef81..eb2a781 100644
--- a/drivers/xen/netchannel2/netback2.c
+++ b/drivers/xen/netchannel2/netback2.c
@@ -10,8 +10,13 @@
 #include "netchannel2_core.h"
 #include "netchannel2_endpoint.h"
 #include "netchannel2_uspace.h"
+#include "vmq.h"
 
+#ifdef CONFIG_XEN_NETDEV2_VMQ
+#define NR_TX_BUFS (VMQ_MAX_BUFFERS+256)
+#else
 #define NR_TX_BUFS 256
+#endif
 
 static atomic_t next_handle;
 /* A list of all currently-live netback2 interfaces. */
@@ -168,6 +173,8 @@ static int attach_to_frontend(struct netback2 *nd)
                return err;
        }
 
+       nc2_vmq_connect(nc);
+
        /* All done */
        nd->attached = 1;
 
@@ -176,6 +183,8 @@ static int attach_to_frontend(struct netback2 *nd)
 
 static void nb2_shutdown(struct netchannel2 *nc)
 {
+       nc2_vmq_disconnect(nc);
+
        nc2_set_nr_tx_buffers(nc, 0);
 }
 
diff --git a/drivers/xen/netchannel2/netchannel2_core.h 
b/drivers/xen/netchannel2/netchannel2_core.h
index 1939cbb..8e1657d 100644
--- a/drivers/xen/netchannel2/netchannel2_core.h
+++ b/drivers/xen/netchannel2/netchannel2_core.h
@@ -7,6 +7,8 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 
+#include "vmq_def.h"
+
 /* After we send this number of frags, we request the other end to
  * notify us when sending the corresponding finish packet message */
 #define MAX_MAX_COUNT_FRAGS_NO_EVENT 192
@@ -43,6 +45,9 @@ enum transmit_policy {
        transmit_policy_grant = transmit_policy_first,
        transmit_policy_post,
        transmit_policy_map,
+#ifdef CONFIG_XEN_NETDEV2_VMQ
+       transmit_policy_vmq,
+#endif
        transmit_policy_small,
        transmit_policy_last = transmit_policy_small
 };
@@ -437,6 +442,11 @@ struct netchannel2 {
 
        struct hypercall_batcher batcher;
 
+#ifdef CONFIG_XEN_NETDEV2_VMQ
+       /* vmq data for supporting multi-queue devices */
+       nc2_vmq_t vmq;
+#endif
+
 #ifdef CONFIG_XEN_NETDEV2_AUTOMATIC_BYPASS
        struct nc2_auto_bypass auto_bypass;
 #endif
diff --git a/drivers/xen/netchannel2/posted_buffer.h 
b/drivers/xen/netchannel2/posted_buffer.h
new file mode 100644
index 0000000..e249777
--- /dev/null
+++ b/drivers/xen/netchannel2/posted_buffer.h
@@ -0,0 +1,50 @@
+/* Buffer management related bits, shared between vmq.c and
+ * posted_buffer.c */
+#ifndef NC2_POSTED_BUFFER_H__
+#define NC2_POSTED_BUFFER_H__
+
+/* A buffer which the other end has provided us which we can use to
+   transmit packets to it. */
+struct nc2_tx_buffer {
+       struct list_head list;
+       uint32_t id; /* ID assigned by the remote endpoint. */
+       grant_ref_t gref;
+       uint16_t off_in_page;
+       uint16_t size;
+       grant_handle_t grant_handle;
+};
+
+/* add a buffer to the pending list to be returned to the other end buffer */
+static inline void return_tx_buffer(struct netchannel2 *nc,
+                                   struct nc2_tx_buffer *buffer)
+{
+       list_add(&buffer->list, &nc->pending_tx_buffer_return);
+}
+
+static inline struct nc2_tx_buffer *_get_tx_buffer(struct netchannel2 *nc)
+{
+       struct nc2_tx_buffer *buffer;
+       struct list_head *entry = nc->avail_tx_buffers.next;
+       list_del(entry);
+       buffer = list_entry(entry, struct nc2_tx_buffer, list);
+       nc->nr_avail_tx_buffers--;
+       return buffer;
+}
+
+/* recycle a posted buffer: return it to the list of available buffers */
+static inline void recycle_tx_buffer(struct netchannel2 *nc,
+                                   struct nc2_tx_buffer *buffer)
+{
+       list_add(&buffer->list, &nc->avail_tx_buffers);
+       nc->nr_avail_tx_buffers++;
+}
+
+/* add a buffer slot to list of unused buffer slots after it has been
+ * returned to other end */
+static inline void free_tx_buffer(struct netchannel2 *nc,
+                                 struct nc2_tx_buffer *buffer)
+{
+       list_add(&buffer->list, &nc->unused_tx_buffer_slots);
+}
+
+#endif /* !NC2_POSTED_BUFFER_H__ */
diff --git a/drivers/xen/netchannel2/posted_buffers.c 
b/drivers/xen/netchannel2/posted_buffers.c
index 96de7da..9fb7570 100644
--- a/drivers/xen/netchannel2/posted_buffers.c
+++ b/drivers/xen/netchannel2/posted_buffers.c
@@ -9,6 +9,7 @@
 #include <xen/live_maps.h>
 #include "netchannel2_endpoint.h"
 #include "netchannel2_core.h"
+#include "posted_buffer.h"
 
 #define POSTED_BUFFER_SIZE PAGE_SIZE
 
@@ -350,17 +351,6 @@ void nc2_handle_set_nr_posted_buffers(struct netchannel2 
*nc,
 
 /* -------------------------- Transmit ------------------------------- */
 
-/* A buffer which the other end has provided us which we can use to
-   transmit packets to it. */
-struct nc2_tx_buffer {
-       struct list_head list;
-       uint32_t id; /* ID assigned by the remote endpoint. */
-       grant_ref_t gref;
-       uint16_t off_in_page;
-       uint16_t size;
-       grant_handle_t grant_handle;
-};
-
 /* A representation of a packet which is halfway through being
    prepared for transmission. */
 struct post_packet_plan {
@@ -373,14 +363,6 @@ struct post_packet_plan {
        volatile struct netchannel2_fragment *output_frag;
 };
 
-/* add a buffer slot to list of unused buffer slots after it has been
- * returned to other end */
-static void free_tx_buffer(struct netchannel2 *nc,
-                          struct nc2_tx_buffer *buffer)
-{
-       list_add(&buffer->list, &nc->unused_tx_buffer_slots);
-}
-
 /* A grant copy failed while we were transmitting a packet.  That
    indicates that the *receiving* domain gave us a bad RX buffer.
    We're too late to send them an error, so there isn't really
diff --git a/drivers/xen/netchannel2/util.c b/drivers/xen/netchannel2/util.c
index 79d9f09..1d96256 100644
--- a/drivers/xen/netchannel2/util.c
+++ b/drivers/xen/netchannel2/util.c
@@ -34,7 +34,13 @@ int allocate_txp_slot(struct netchannel2_ring_pair *ncrp,
 static void nc2_free_skb(struct netchannel2 *nc,
                         struct sk_buff *skb)
 {
-       dev_kfree_skb(skb);
+#ifdef CONFIG_XEN_NETDEV2_VMQ
+       nc2_vmq_t *vmq = &nc->vmq;
+       if (get_skb_overlay(skb)->policy == transmit_policy_vmq)
+               skb_queue_tail(&vmq->dealloc_queue, skb);
+       else
+#endif
+               dev_kfree_skb(skb);
 }
 
 void release_txp_slot(struct netchannel2_ring_pair *ncrp,
diff --git a/drivers/xen/netchannel2/vmq.c b/drivers/xen/netchannel2/vmq.c
new file mode 100644
index 0000000..e36962b
--- /dev/null
+++ b/drivers/xen/netchannel2/vmq.c
@@ -0,0 +1,805 @@
+/*****************************************************************************
+ * vmq.c
+ *
+ * Support multi-queue network devices.
+ *
+ * Copyright (c) 2008, Kaushik Kumar Ram, Rice University.
+ * Copyright (c) 2008, Jose Renato Santos, Hewlett-Packard Co.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+/* This only implements the transmit half of the method; receive is
+ * handled by posted_buffers.c */
+#include <linux/kernel.h>
+#include <linux/netvmq.h>
+#include <linux/skbuff.h>
+#include <xen/xenbus.h>
+#include <xen/balloon.h>
+#include "netchannel2_core.h"
+
+#include "posted_buffer.h"
+#include "vmq.h"
+
+/* state of device queue when operating in vmq mode */
+#define VMQ_QUEUE_DISABLED  0
+#define VMQ_QUEUE_STARTING  1
+#define VMQ_QUEUE_ENABLED   2
+#define VMQ_QUEUE_CLOSING   3
+
+#define VMQ_MAX_UNMAP_OPS 256
+struct vmq_unmap_grants {
+       unsigned n;
+       gnttab_unmap_grant_ref_t gop[VMQ_MAX_UNMAP_OPS];
+};
+typedef struct vmq_unmap_grants vmq_unmap_grants_t;
+
+vmq_unmap_grants_t vmq_unmap_grants;
+
+static inline void vmq_flush_unmap_grants(void)
+{
+       if (vmq_unmap_grants.n == 0)
+               return;
+
+       if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+                                     vmq_unmap_grants.gop,
+                                     vmq_unmap_grants.n))
+               BUG();
+       vmq_unmap_grants.n = 0;
+}
+
+static inline gnttab_unmap_grant_ref_t *vmq_next_unmap_gop(void)
+{
+       if (vmq_unmap_grants.n == VMQ_MAX_UNMAP_OPS)
+               vmq_flush_unmap_grants();
+       return &vmq_unmap_grants.gop[vmq_unmap_grants.n++];
+}
+
+void vmq_flush_unmap_hypercall(void)
+{
+       vmq_flush_unmap_grants();
+}
+
+static inline unsigned long vmq_idx_to_pfn(nc2_vmq_t *vmq, unsigned int idx)
+{
+       return page_to_pfn(vmq->pages[idx]);
+}
+
+static inline unsigned long vmq_idx_to_kaddr(nc2_vmq_t *vmq, unsigned int idx)
+{
+       return (unsigned long)pfn_to_kaddr(vmq_idx_to_pfn(vmq, idx));
+}
+
+/* get vmq idx from page struct         */
+static long nc2_vmq_page_index(struct page *page)
+{
+       nc2_vmq_buf_t *vmq_buf;
+       vmq_buf = (nc2_vmq_buf_t *)page->mapping;
+       return vmq_buf - vmq_buf->nc->vmq.buffer;
+}
+
+/* Read a physical device name from xenstore and
+ * returns a pointer to the associated net_device structure.
+ *  Returns NULL on error. */
+static struct net_device *read_pdev(struct xenbus_device *dev)
+{
+       char *pdevstr;
+       struct net_device *pdev = NULL;
+
+       pdevstr = xenbus_read(XBT_NIL, dev->nodename, "pdev", NULL);
+       if (IS_ERR(pdevstr))
+               return NULL;
+
+       if (pdevstr)
+               pdev = dev_get_by_name(&init_net, pdevstr);
+
+       kfree(pdevstr);
+
+       return pdev;
+}
+
+static void nc2_vmq_page_release(struct page *page, unsigned int order)
+{
+       printk(KERN_CRIT "%s: ERROR: Unexpected release of netchannel2 vmq 
page",
+              __func__);
+       BUG_ON(1);
+}
+
+static inline int nc2_vmq_is_disabled(struct netchannel2 *nc)
+{
+       return nc->vmq.vmq_state == VMQ_QUEUE_DISABLED;
+}
+
+static inline int nc2_vmq_is_starting(struct netchannel2 *nc)
+{
+       return nc->vmq.vmq_state == VMQ_QUEUE_STARTING;
+}
+
+static inline int nc2_vmq_is_enabled(struct netchannel2 *nc)
+{
+       return nc->vmq.vmq_state == VMQ_QUEUE_ENABLED;
+}
+
+static inline int nc2_vmq_is_closing(struct netchannel2 *nc)
+{
+       return nc->vmq.vmq_state == VMQ_QUEUE_CLOSING;
+}
+
+static inline void nc2_vmq_enable(struct netchannel2 *nc)
+{
+       nc2_vmq_t *vmq = &nc->vmq;
+       vmq_get(vmq);
+       vmq_enable_queue(vmq->pdev, vmq->vmq_id);
+       vmq->vmq_state = VMQ_QUEUE_ENABLED;
+}
+
+void nc2_vmq_disconnect(struct netchannel2 *nc)
+{
+       nc2_vmq_t *vmq = &nc->vmq;
+
+       if (nc2_vmq_is_enabled(nc)) {
+               vmq_disable_queue(vmq->pdev, vmq->vmq_id);
+               vmq_free_queue(vmq->pdev, vmq->vmq_id);
+               vmq->vmq_state = VMQ_QUEUE_CLOSING;
+               /* wait until all buffers have been returned by dev driver */
+               wait_event(vmq->waiting_to_free,
+                          atomic_read(&vmq->refcnt) == 0);
+               return;
+       }
+
+       if (nc2_vmq_is_starting(nc)) {
+               vmq_free_queue(vmq->pdev, vmq->vmq_id);
+               vmq->vmq_state = VMQ_QUEUE_CLOSING;
+               return;
+       }
+
+}
+
+
+static void nc2_vmq_end_map_buffers(gnttab_map_grant_ref_t *mop, int count,
+                                   struct netchannel2 *nc, u16 *alloc_idx)
+{
+       int i, err;
+       u16 idx;
+       unsigned int prod;
+       nc2_vmq_t *vmq = &nc->vmq;
+
+       prod = vmq->mapped_pages_prod;
+
+       for (i = 0; i < count; i++) {
+               idx = alloc_idx[i];
+
+               /* Check error status */
+               err = mop->status;
+               if (likely(!err)) {
+                       set_phys_to_machine(
+                                           __pa(vmq_idx_to_kaddr(vmq, idx))
+                                           >> PAGE_SHIFT,
+                                           FOREIGN_FRAME(mop->dev_bus_addr
+                                                         >> PAGE_SHIFT));
+                       /* Store the handle */
+                       vmq->buffer[idx].buf->grant_handle = mop->handle;
+
+                       /* Add it to the mapped pages list */
+                       vmq->mapped_pages[VMQ_IDX_MASK(prod++)] = idx;
+                       mop++;
+                       continue;
+               }
+
+               /* Error mapping page: return posted buffer to other end.
+                * TODO: We might need an error field on the return buffer
+                * message */
+               return_tx_buffer(nc, vmq->buffer[idx].buf);
+
+               /* Add the page back to the free list */
+               vmq->unmapped_pages[VMQ_IDX_MASK(vmq->unmapped_pages_prod++)]
+                       = idx;
+
+               mop++;
+       }
+
+       smp_wmb();
+       vmq->mapped_pages_prod = prod;
+
+       return;
+}
+
+/* Map guest buffers and place them in the mapped buffers list. The mapped
+ * pages in this list are used when allocating a skb (vmq_alloc_skb()).
+ */
+static void nc2_vmq_map_buffers(struct netchannel2 *nc)
+{
+       u16 idx;
+       int count = 0;
+       unsigned int cons;
+       int nbufs;
+       int buf_avail;
+       struct nc2_tx_buffer *buf;
+       struct nc2_vmq *vmq = &nc->vmq;
+       int n_mapped = nr_vmq_bufs(nc);
+
+
+       /*
+        * Putting hundreds of bytes on the stack is considered rude.
+        * Static works because a tasklet can only be on one CPU at any time.
+        */
+       static gnttab_map_grant_ref_t rx_map_ops[VMQ_MAX_BUFFERS];
+       static u16 alloc_idx[VMQ_MAX_BUFFERS];
+
+       /* If there is at least VMQ_MIN_BUFFERS buffers, no work to do */
+       if (n_mapped >= VMQ_MIN_BUFFERS)
+               return;
+
+       /* Try to get VMQ_MAX_BUFFERS mapped buffers, if there are
+          sufficient buffers posted by the other end  */
+       nbufs = VMQ_MAX_BUFFERS - n_mapped;
+       buf_avail = nc->nr_avail_tx_buffers;
+       if (nbufs > buf_avail)
+               nbufs = buf_avail;
+
+       /* Xen cannot handle more than 512 grant ops in a single hypercall */
+       if (nbufs > 512)
+               nbufs = 512;
+
+       /* give up if there are no buffers available */
+       if (nbufs <= 0)
+               return;
+
+       /* Note that we *should* have free pages to consume here
+        * and no checks are needed.
+        */
+       cons = vmq->unmapped_pages_cons;
+
+       while (count < nbufs) {
+               idx = vmq->unmapped_pages[VMQ_IDX_MASK(cons++)];
+               buf = vmq->buffer[idx].buf = _get_tx_buffer(nc);
+               /* Setup grant map operation */
+               gnttab_set_map_op(&rx_map_ops[count],
+                                 vmq_idx_to_kaddr(vmq, idx),
+                                 GNTMAP_host_map,
+                                 buf->gref,
+                                 nc->rings.otherend_id);
+               alloc_idx[count] = idx;
+               count++;
+       }
+
+       vmq->unmapped_pages_cons = cons;
+
+       /* Map all the pages */
+       BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
+                                        rx_map_ops, nbufs));
+
+       /* Finalize buffer mapping after checking if the grant operations
+          succeeded */
+       nc2_vmq_end_map_buffers(rx_map_ops, nbufs, nc, alloc_idx);
+
+       vmq->nbufs += nbufs;
+}
+
+static void nc2_vmq_unmap_buf(struct netchannel2 *nc,
+                             unsigned int idx, int recycle)
+{
+       nc2_vmq_t *vmq = &nc->vmq;
+       unsigned long pfn;
+       gnttab_unmap_grant_ref_t *gop;
+       unsigned prod;
+
+       pfn = vmq_idx_to_pfn(vmq, idx);
+       /* Already unmapped? */
+       if (!phys_to_machine_mapping_valid(pfn))
+               return;
+
+       gop = vmq_next_unmap_gop();
+       gnttab_set_unmap_op(gop, vmq_idx_to_kaddr(vmq, idx),
+                           GNTMAP_host_map,
+                           vmq->buffer[idx].buf->grant_handle);
+
+       vmq->nbufs--;
+
+       set_phys_to_machine(__pa(vmq_idx_to_kaddr(vmq, idx)) >>
+                           PAGE_SHIFT,
+                           INVALID_P2M_ENTRY);
+       /* Ready for next use. */
+       gnttab_reset_grant_page(vmq->pages[idx]);
+       /* Add the page back to the unmapped list */
+       prod = vmq->unmapped_pages_prod;
+       vmq->unmapped_pages[VMQ_IDX_MASK(prod++)] = idx;
+       if (recycle)
+               recycle_tx_buffer(nc, vmq->buffer[idx].buf);
+       else
+               free_tx_buffer(nc, vmq->buffer[idx].buf);
+       smp_wmb();
+       vmq->unmapped_pages_prod = prod;
+}
+
+static void nc2_vmq_free_mapped_bufs(struct netchannel2 *nc)
+{
+       nc2_vmq_t *vmq = &nc->vmq;
+       unsigned int idx;
+       unsigned prod, cons;
+
+       /* The queue should be disabled before this function is called */
+       BUG_ON(vmq->vmq_state == VMQ_QUEUE_ENABLED);
+
+       cons = vmq->mapped_pages_cons;
+       prod = vmq->mapped_pages_prod;
+       smp_rmb();
+
+       while (cons != prod) {
+               idx = vmq->mapped_pages[VMQ_IDX_MASK(cons++)];
+               nc2_vmq_unmap_buf(nc, idx, 1);
+       }
+
+       vmq_flush_unmap_grants();
+
+       vmq->mapped_pages_cons = cons;
+
+}
+
+static void nc2_vmq_free_skb(struct sk_buff *skb)
+{
+       struct netchannel2 *nc;
+       nc2_vmq_t *vmq;
+       unsigned int idx;
+       int nr_frags, i;
+       struct skb_shared_info *shinfo = skb_shinfo(skb);
+       skb_frag_t *frags = shinfo->frags;
+
+       nc = netdev_priv(skb->dev);
+       vmq = &nc->vmq;
+
+       nr_frags = shinfo->nr_frags;
+       for (i = 0; i < nr_frags; i++) {
+               idx = nc2_vmq_page_index(frags[i].page);
+               nc2_vmq_unmap_buf(nc, idx, 1);
+       }
+
+       vmq_flush_unmap_grants();
+
+       shinfo->frag_list = NULL;
+       shinfo->nr_frags = 0;
+
+       /* Add the skb back to the free pool */
+       skb_queue_tail(&vmq->free_skb_list, skb);
+}
+
+/* Initialize the free socket buffer list */
+static int vmq_init_free_skb_list(int n, struct sk_buff_head *free_skb_list)
+{
+       int i;
+       struct sk_buff *skb;
+
+       skb_queue_head_init(free_skb_list);
+
+       for (i = 0; i < n; i++) {
+               skb = alloc_skb(VMQ_SKB_SIZE, GFP_ATOMIC);
+               if (!skb) {
+                       printk("Netchannel2 vmq: Failed to allocate socket "
+                              "buffer %d (max=%d)\n", i, (int)n);
+                       goto error;
+               }
+               skb_queue_tail(free_skb_list, skb);
+       }
+
+       return 0;
+error:
+       /* Free all the allocated buffers and return Error */
+       while (!skb_queue_empty(free_skb_list))
+               kfree_skb(skb_dequeue(free_skb_list));
+
+       return -1;
+}
+
+/* Initialize vmq. Return 1 if vmq is used and 0 otherwise */
+int nc2_vmq_connect(struct netchannel2 *nc)
+{
+       nc2_vmq_t *vmq = &nc->vmq;
+       struct page *page;
+       int q_id;
+       int size;
+       int i;
+
+       vmq->vmq_mode = 0;
+       vmq->pdev = read_pdev(nc->xenbus_device);
+
+       /* cannot use vmq mode if physical device not found */
+       if (!vmq->pdev)
+               return 0;
+
+       /* Allocate a RX queue */
+       q_id = vmq_alloc_queue(vmq->pdev, VMQ_TYPE_RX);
+       if (q_id < 0)
+               /* Allocation failed, cannot use multi-queue */
+               goto free_pdev;
+
+       vmq->vmq_id = q_id;
+
+       /* Set the size of the queue */
+       size = vmq_get_maxsize(vmq->pdev);
+       if (size > VMQ_QUEUE_SIZE)
+               size = VMQ_QUEUE_SIZE;
+       if (vmq_set_size(vmq->pdev, q_id, size) < 0) {
+               /* Failure, free up the queue and return error */
+               printk(KERN_ERR "%s: could not set queue size on net device\n",
+                      __func__);
+               goto free_queue;
+       }
+       vmq->vmq_size = size;
+
+       /* Set the mac address of the queue */
+       if (vmq_set_mac(vmq->pdev, q_id, nc->rings.remote_mac) < 0) {
+               /* Failure, free up the queue and return error */
+               printk(KERN_ERR "%s: could not set MAC address for net device 
queue\n",
+                      __func__);
+               goto free_queue;
+       }
+
+       vmq->pages = alloc_empty_pages_and_pagevec(VMQ_MAX_BUFFERS);
+       if (vmq->pages == NULL) {
+               printk(KERN_ERR "%s: out of memory\n", __func__);
+               goto free_queue;
+       }
+
+       skb_queue_head_init(&vmq->dealloc_queue);
+       skb_queue_head_init(&vmq->rx_queue);
+
+       if (vmq_init_free_skb_list(VMQ_MAX_BUFFERS,
+                                  &vmq->free_skb_list)) {
+               printk(KERN_ERR "%s: Could not allocate free socket buffers",
+                       __func__);
+               goto free_pagevec;
+       }
+
+       for (i = 0; i < VMQ_MAX_BUFFERS; i++) {
+               vmq->buffer[i].nc = nc;
+               page = vmq->pages[i];
+               SetPageForeign(page, nc2_vmq_page_release);
+               page->mapping = (void *)&vmq->buffer[i];
+               vmq->unmapped_pages[i] = i;
+       }
+
+       vmq->unmapped_pages_prod = VMQ_MAX_BUFFERS;
+       vmq->unmapped_pages_cons = 0;
+
+       vmq->mapped_pages_prod = 0;
+       vmq->mapped_pages_cons = 0;
+
+       vmq->nbufs = 0;
+       vmq->vmq_mode = 1;
+
+       /* Store the pointer to netchannel2 device in pdev */
+       BUG_ON((vmq->pdev->vmq == NULL) || (vmq->pdev->vmq->queue == NULL));
+       vmq->pdev->vmq->queue[q_id].guest = (void *)nc->net_device;
+
+       atomic_set(&vmq->refcnt, 0);
+       init_waitqueue_head(&vmq->waiting_to_free);
+
+       printk(KERN_INFO "Netchannel2 using vmq mode for guest %d\n",
+              nc->xenbus_device->otherend_id);
+
+       vmq->vmq_state = VMQ_QUEUE_STARTING;
+
+       return 1;       /* Success */
+
+
+free_pagevec:
+       free_empty_pages_and_pagevec(vmq->pages, VMQ_MAX_BUFFERS);
+free_queue:
+       vmq_free_queue(vmq->pdev, vmq->vmq_id);
+free_pdev:
+       dev_put(vmq->pdev);
+       vmq->pdev = NULL;
+       return 0;
+}
+
+void nc2_vmq_shutdown(struct netchannel2 *nc)
+{
+       nc2_vmq_t *vmq = &nc->vmq;
+       int i;
+
+       if (!vmq->vmq_mode)
+               return;
+
+       /* All posted bufs should have been returned */
+       BUG_ON(nr_vmq_bufs(nc) != nr_vmq_mapped_bufs(nc));
+
+       /* free the mapped bufs */
+       nc2_vmq_free_mapped_bufs(nc);
+
+       /* Free the vmq pages */
+       if (vmq->pages) {
+               for (i = 0; i < VMQ_MAX_BUFFERS; i++) {
+                       if (PageForeign(vmq->pages[i]))
+                               ClearPageForeign(vmq->pages[i]);
+                       vmq->pages[i]->mapping = NULL;
+               }
+               free_empty_pages_and_pagevec(vmq->pages, VMQ_MAX_BUFFERS);
+               vmq->pages = NULL;
+       }
+
+       while (!skb_queue_empty(&vmq->free_skb_list)) {
+               /* Free the socket buffer pool */
+               kfree_skb(skb_dequeue(&vmq->free_skb_list));
+       }
+       vmq->vmq_state = VMQ_QUEUE_DISABLED;
+       vmq->vmq_mode = 0;
+
+       if (vmq->pdev) {
+               dev_put(vmq->pdev);
+               vmq->pdev = NULL;
+       }
+
+       vmq_put(vmq);
+}
+
+static int prepare_xmit_allocate_vmq(struct netchannel2 *nc,
+                                    struct sk_buff *skb)
+{
+       unsigned msg_size;
+
+       msg_size = get_transmitted_packet_msg_size(skb);
+       if (!nc2_reserve_payload_bytes(&nc->rings.prod_ring, msg_size))
+               return -1;
+       return 0;
+}
+
+void do_vmq_work(struct netchannel2 *nc)
+{
+       nc2_vmq_t *vmq = &nc->vmq;
+       struct sk_buff *skb;
+       unsigned long flags;
+
+       /* if not in vmq mode do nothing */
+       if (!nc2_in_vmq_mode(nc))
+               return;
+
+       /* Map guest buffers for dedicated NIC RX queue if needed */
+       if (nr_vmq_bufs(nc) < VMQ_MIN_BUFFERS) {
+               nc2_vmq_map_buffers(nc);
+               /* We delay enabling the queue until we have enough
+                  posted buffers. Check if it is time to enable it */
+               if (nc2_vmq_is_starting(nc) &&
+                   (nr_vmq_bufs(nc) >= VMQ_MIN_BUFFERS)) {
+                       nc2_vmq_enable(nc);
+               }
+       }
+
+       /* free vmq skb's returned by the physical device driver */
+       while (!skb_queue_empty(&nc->vmq.dealloc_queue))
+               nc2_vmq_free_skb(skb_dequeue(&nc->vmq.dealloc_queue));
+
+       /* complete vmq closing after all packets returned by physical
+        * device driver */
+
+       if (nc2_vmq_is_closing(nc) &&
+           (nr_vmq_bufs(nc) == nr_vmq_mapped_bufs(nc))) {
+               nc->vmq.vmq_state = VMQ_QUEUE_DISABLED;
+               nc2_vmq_shutdown(nc);
+       }
+
+       spin_lock_irqsave(&vmq->rx_queue.lock, flags);
+       while (!skb_queue_empty(&vmq->rx_queue)) {
+               skb = __skb_dequeue(&nc->vmq.rx_queue);
+               if (prepare_xmit_allocate_vmq(nc, skb) < 0) {
+                       __skb_queue_head(&vmq->rx_queue, skb);
+                       spin_unlock_irqrestore(&vmq->rx_queue.lock, flags);
+                       return;
+               }
+               __skb_queue_tail(&nc->rings.pending_tx_queue, skb);
+       }
+       spin_unlock_irqrestore(&vmq->rx_queue.lock, flags);
+}
+
+/* Return the netchannel2 device corresponding to the given queue in pdev */
+static inline struct net_device *nc2_vmq_queue_to_vif(struct net_device *pdev,
+                                                     int queue_id)
+{
+       net_vmq_t *n_vmq;
+       vmq_queue_t *vmq_q;
+
+       n_vmq = pdev->vmq;
+       BUG_ON(n_vmq == NULL);
+       vmq_q = &n_vmq->queue[queue_id];
+       BUG_ON(vmq_q == NULL);
+
+       return (struct net_device *)vmq_q->guest;
+}
+
+/* Handle incoming vmq packet */
+int vmq_netif_rx(struct sk_buff *skb, int queue_id)
+{
+       struct skb_cb_overlay *skb_co = get_skb_overlay(skb);
+       struct net_device *dev;
+       struct netchannel2 *nc;
+       nc2_vmq_t *vmq;
+
+       memset(skb_co, 0, sizeof(*skb_co));
+
+       skb_co->nr_fragments = skb_shinfo(skb)->nr_frags;
+       skb_co->type = NC2_PACKET_TYPE_pre_posted;
+       skb_co->policy = transmit_policy_vmq;
+
+       /* get the netchannel2 interface corresponding to this queue */
+       dev = nc2_vmq_queue_to_vif(skb->dev, queue_id);
+       nc = netdev_priv(dev);
+       vmq = &nc->vmq;
+
+       /* replace source dev with destination dev */
+       skb->dev = dev;
+       /* add skb to rx_queue */
+       skb_queue_tail(&vmq->rx_queue, skb);
+
+       /* Trigger thread excution to procees new packets */
+       nc2_kick(&nc->rings);
+
+       return 0;
+}
+EXPORT_SYMBOL(vmq_netif_rx);
+
+
+/* Allocate a socket buffer from the free list, get a guest posted
+ * buffer, attach it to the skb, and return it.
+ */
+struct sk_buff *vmq_alloc_skb(struct net_device *netdevice, int queue_id,
+                             unsigned int length)
+{
+       struct sk_buff *skb;
+       struct netchannel2 *nc;
+       nc2_vmq_t *vmq;
+       unsigned int idx;
+       int nr_bufs, i;
+       unsigned int cons;
+       unsigned int prod;
+
+       /* get the netchannel2 interface corresponding to this queue */
+       nc = netdev_priv(nc2_vmq_queue_to_vif(netdevice, queue_id));
+
+       vmq = &nc->vmq;
+
+       /* Get a free buffer from the pool */
+       if (skb_queue_empty(&vmq->free_skb_list)) {
+               /* No buffers to allocate */
+               return NULL;
+       }
+
+
+       skb = skb_dequeue(&vmq->free_skb_list);
+       BUG_ON(skb == NULL);
+
+       nr_bufs = VMQ_NUM_BUFFERS(length);
+
+       cons = vmq->mapped_pages_cons;
+       prod = vmq->mapped_pages_prod;
+       smp_rmb();
+
+       if (nr_bufs > (prod - cons))
+               /* Not enough mapped buffers in the pool */
+               goto kick_nc2;
+
+       if (nr_bufs > MAX_SKB_FRAGS)
+               goto error;
+
+       for (i = 0; i < nr_bufs; i++) {
+               idx = vmq->mapped_pages[VMQ_IDX_MASK(cons)];
+               /* FIX ME: This can be simplified */
+               skb_shinfo(skb)->frags[i].page =
+                       virt_to_page(vmq_idx_to_kaddr(vmq, idx));
+               skb_shinfo(skb)->frags[i].page_offset = 0;
+               skb_shinfo(skb)->frags[i].size = PAGE_SIZE;
+               skb_shinfo(skb)->nr_frags++;
+               skb->dev = netdevice;
+               cons++;
+       }
+
+       vmq->mapped_pages_cons = cons;
+
+       /* if number of buffers get low run tasklet to map more buffers */
+       if (nr_vmq_bufs(nc)  < VMQ_MIN_BUFFERS)
+               nc2_kick(&nc->rings);
+
+       return skb;
+
+kick_nc2:
+       /* kick netchannel2 interface to get any recently posted buffers */
+       nc2_kick(&nc->rings);
+error:
+       /* Add the skb back to the free pool */
+       skb_queue_tail(&vmq->free_skb_list, skb);
+       return NULL;
+}
+EXPORT_SYMBOL(vmq_alloc_skb);
+
+/* Detach the guest pages and free the socket buffer */
+void vmq_free_skb(struct sk_buff *skb, int queue_id)
+{
+       struct net_device *dev;
+       struct netchannel2 *nc;
+       nc2_vmq_t *vmq;
+
+       /* get the netchannel2 interface corresponding to this queue */
+       dev = nc2_vmq_queue_to_vif(skb->dev, queue_id);
+
+       nc = netdev_priv(dev);
+       vmq = &nc->vmq;
+
+       /* Add skb to the dealloc queue */
+       skb->dev = dev;
+       skb_queue_tail(&vmq->dealloc_queue, skb);
+
+       /* kick netchannel2 interface  */
+       nc2_kick(&nc->rings);
+
+}
+EXPORT_SYMBOL(vmq_free_skb);
+
+int nc2_is_vmq_packet(struct netchannel2 *nc, struct sk_buff *skb)
+{
+       int nr_frags;
+       long idx;
+       nc2_vmq_t *vmq = &nc->vmq;
+
+       nr_frags = skb_shinfo(skb)->nr_frags;
+       if (vmq->vmq_mode && nr_frags &&
+           PageForeign(skb_shinfo(skb)->frags[0].page)) {
+               idx = nc2_vmq_page_index(skb_shinfo(skb)->frags[0].page);
+               if ((idx >= 0) && (idx < VMQ_MAX_BUFFERS))
+                       return 1;
+       }
+
+       return 0;
+}
+
+/* Prepare to transmit a vmq packet */
+void xmit_vmq(struct netchannel2 *nc, struct sk_buff *skb,
+             volatile void *msg_buf)
+{
+       volatile struct netchannel2_msg_packet *msg = msg_buf;
+       volatile struct netchannel2_fragment *out_frag;
+       nc2_vmq_t *vmq = &nc->vmq;
+       skb_frag_t *frag;
+       struct nc2_tx_buffer *txbuf;
+       int nr_frags;
+       unsigned int idx;
+       unsigned x;
+
+       nr_frags = skb_shinfo(skb)->nr_frags;
+       for (x = 0; x < nr_frags; x++) {
+               frag = &skb_shinfo(skb)->frags[x];
+               out_frag = &msg->frags[x];
+
+               idx = nc2_vmq_page_index(frag->page);
+               txbuf = vmq->buffer[idx].buf;
+               out_frag->pre_post.id = txbuf->id;
+               out_frag->off  = frag->page_offset;
+               out_frag->size = frag->size;
+               /* TODO: need to batch unmap grants */
+               nc2_vmq_unmap_buf(nc, idx, 0);
+       }
+
+       /* Avoid unmapping frags grants when skb is freed later */
+       /* by nc2_vmq_free_skb() */
+       skb_shinfo(skb)->nr_frags = 0;
+}
+
diff --git a/drivers/xen/netchannel2/vmq.h b/drivers/xen/netchannel2/vmq.h
new file mode 100644
index 0000000..fa1cc8a
--- /dev/null
+++ b/drivers/xen/netchannel2/vmq.h
@@ -0,0 +1,58 @@
+#ifndef VMQ_H__
+#define VMQ_H__
+
+#include "netchannel2_core.h"
+
+#ifdef CONFIG_XEN_NETDEV2_VMQ
+
+int nc2_vmq_connect(struct netchannel2 *nc);
+void nc2_vmq_disconnect(struct netchannel2 *nc);
+void do_vmq_work(struct netchannel2 *nc);
+int nc2_is_vmq_packet(struct netchannel2 *nc, struct sk_buff *skb);
+void xmit_vmq(struct netchannel2 *nc, struct sk_buff *skb,
+             volatile void *msg);
+void vmq_flush_unmap_hypercall(void);
+
+#define vmq_get(_b)                                            \
+               atomic_inc(&(_b)->refcnt);
+
+#define vmq_put(_b)                                            \
+       do {                                                    \
+               if (atomic_dec_and_test(&(_b)->refcnt)) {       \
+                       wake_up(&(_b)->waiting_to_free);        \
+               }                                               \
+       } while (0)
+
+static inline int nr_vmq_mapped_bufs(struct netchannel2 *nc)
+{
+       return nc->vmq.mapped_pages_prod -
+               nc->vmq.mapped_pages_cons;
+}
+
+static inline int nr_vmq_bufs(struct netchannel2 *nc)
+{
+       return nc->vmq.nbufs;
+}
+
+static inline int nc2_in_vmq_mode(struct netchannel2 *nc)
+{
+       return nc->vmq.vmq_mode;
+}
+
+#else
+static inline int nc2_vmq_connect(struct netchannel2 *nc)
+{
+       return 0;
+}
+static inline void nc2_vmq_disconnect(struct netchannel2 *nc)
+{
+}
+static inline void do_vmq_work(struct netchannel2 *nc)
+{
+}
+static inline void vmq_flush_unmap_hypercall(void)
+{
+}
+#endif /* CONFIG_XEN_NETDEV2_VMQ */
+
+#endif /* !VMQ_H__ */
diff --git a/drivers/xen/netchannel2/vmq_def.h 
b/drivers/xen/netchannel2/vmq_def.h
new file mode 100644
index 0000000..60f1ccb
--- /dev/null
+++ b/drivers/xen/netchannel2/vmq_def.h
@@ -0,0 +1,68 @@
+#ifndef VMQ_DEF_H__
+#define VMQ_DEF_H__
+
+
+/* size of HW queue in VMQ device */
+#define VMQ_QUEUE_SIZE 1024
+
+/* Mimimum amount of buffers needed for VMQ
+ * This is the lower water mark that triggers mapping more guest buffers
+ * Should be larger than the queue size to allow for in flight packets
+ */
+#define VMQ_MIN_BUFFERS 1920
+
+/* Maximum amount of posted buffers which are reserved for VMQ
+ * Should be less than MAX_POSTED_BUFFERS. For now, the difference can be used
+ * for intra-node guest to guest traffic. When we map guest buffers we try to
+ * have VMQ_MAX_BUFFERS mapped. The difference 
(VMQ_MAX_BUFFERS-VMQ_MIN_BUFFERS)
+ * helps batch multiple grant map operattions
+ * VMQ_QUEUE_SIZE < VMQ_MIN_BUFFER < VMQ_MAX_BUFFER < MAX_POSTED_BUFFERS
+ * VMQ_MAX_BUFFERS must be a power of 2
+ */
+#define VMQ_MAX_BUFFERS 2048
+
+/* skb size is zero since packet data uses fragments */
+#define VMQ_SKB_SIZE 0
+
+#define VMQ_NUM_BUFFERS(len) ((len + PAGE_SIZE - 1) / PAGE_SIZE)
+
+#define VMQ_IDX_MASK(_i) ((_i)&(VMQ_MAX_BUFFERS-1))
+
+typedef struct nc2_vmq_buf {
+       struct nc2_tx_buffer *buf;
+       struct netchannel2   *nc;
+} nc2_vmq_buf_t;
+
+typedef struct nc2_vmq {
+       struct net_device *pdev;        /* Pointer to physical device */
+       int vmq_mode;                   /* indicate if vif is in vmq mode   */
+       struct page **pages;            /* pages for mapping guest RX bufs  */
+       struct sk_buff_head free_skb_list;     /* Free socket buffer pool   */
+       struct sk_buff_head dealloc_queue;     /* list of skb's to be free  */
+       struct sk_buff_head rx_queue;          /* list of received packets  */
+
+       /* guest mapped buffers */
+       nc2_vmq_buf_t buffer[VMQ_MAX_BUFFERS];
+
+       /* Ring with free pages available for mapping guest RX buffers */
+       u16 unmapped_pages[VMQ_MAX_BUFFERS];
+       unsigned int unmapped_pages_prod;
+       unsigned int unmapped_pages_cons;
+
+       /* Ring of mapped RX  pages avaialable for vmq device */
+       u16 mapped_pages[VMQ_MAX_BUFFERS];
+       unsigned int mapped_pages_prod;
+       unsigned int mapped_pages_cons;
+
+       unsigned int nbufs;           /* number of vmq buffers: posted to   */
+                                     /* HW queue or available to be posted */
+       int vmq_id;                   /* Queue id    */
+       int vmq_size;                 /* Queue size  */
+       int vmq_state;                /* queue stste */
+
+       atomic_t         refcnt;
+       wait_queue_head_t waiting_to_free;
+
+} nc2_vmq_t;
+
+#endif /* !VMQ_DEF_H__ */
diff --git a/drivers/xen/netchannel2/xmit_packet.c 
b/drivers/xen/netchannel2/xmit_packet.c
index 1a879aa..09827fc 100644
--- a/drivers/xen/netchannel2/xmit_packet.c
+++ b/drivers/xen/netchannel2/xmit_packet.c
@@ -3,6 +3,7 @@
 #include <linux/kernel.h>
 #include <linux/version.h>
 #include "netchannel2_core.h"
+#include "vmq.h"
 
 /* You don't normally want to transmit in posted buffers mode, because
    grant mode is usually faster, but it's sometimes useful for testing
@@ -189,6 +190,11 @@ int nc2_really_start_xmit(struct netchannel2_ring_pair 
*ncrp,
        set_offload_flags(skb, msg);
 
        switch (skb_co->policy) {
+#ifdef CONFIG_XEN_NETDEV2_VMQ
+       case transmit_policy_vmq:
+               xmit_vmq(nc, skb, msg);
+               break;
+#endif
        case transmit_policy_small:
                /* Nothing to do */
                break;
-- 
1.6.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel

<Prev in Thread] Current Thread [Next in Thread>