[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Xen-devel] [PATCH 19/22] Add the basic VMQ APIs. Nobody uses or implements them at the moment, but that will change shortly.



This includes various bits of patches which were

Signed-off-by: Jose Renato Santos <jsantos@xxxxxxxxxx>
Signed-off-by: Mitch Williams <mitch.a.williams@xxxxxxxxx>
Signed-off-by: Steven Smith <steven.smith@xxxxxxxxxx>

All bugs are mine, of course.
---
 include/linux/netdevice.h |    5 +
 include/linux/netvmq.h    |  399 +++++++++++++++++++++++++++++++++++++++++++++
 net/Kconfig               |    6 +
 3 files changed, 410 insertions(+), 0 deletions(-)
 create mode 100644 include/linux/netvmq.h

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2b7b804..f439800 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -753,6 +753,11 @@ struct net_device
 #define GSO_MAX_SIZE           65536
        unsigned int            gso_max_size;
 
+#ifdef CONFIG_NET_VMQ
+       /* multi-queue for virtualization */
+       struct net_vmq          *vmq;
+#endif
+
 #ifdef CONFIG_DCBNL
        /* Data Center Bridging netlink ops */
        struct dcbnl_rtnl_ops *dcbnl_ops;
diff --git a/include/linux/netvmq.h b/include/linux/netvmq.h
new file mode 100644
index 0000000..108807b
--- /dev/null
+++ b/include/linux/netvmq.h
@@ -0,0 +1,399 @@
+/******************************************************************************
+ * netvmq.h
+ *
+ * Interface between the I/O virtualization layer and multi-queue devices to
+ * enable direct data placement in guest memory
+ *
+ * Copyright (c) 2008, Jose Renato Santos, Hewlett-Packard Co.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation; or, when distributed
+ * separately from the Linux kernel or incorporated into other
+ * software packages, subject to the following license:
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this source file (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy, modify,
+ * merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * This file defines the vmq API for Linux network device drivers
+ * to enable the use of multi-queue NICs for virtualization.
+ * The goal is to enable network device drivers to dedicate
+ * each RX queue to a specific guest. This means network
+ * drivers should be able to allocate physical memory from
+ * the set of memory pages assigned to a specific guest.
+ *
+ * The interface between network device drivers and the virtualization
+ * layer has two components:
+ *   1) A set of functions implemented by the virtualization layer that
+ *      can be called from new multi-queue network device drivers
+ *   2) A set of new functions implemented by the device drivers to support
+ *    multi-queue
+ */
+
+#ifndef _NETVMQ_H
+#define _NETVMQ_H
+
+#ifdef CONFIG_NET_VMQ
+
+#include <linux/netdevice.h>
+
+/* status flags for vmq_queue struct  */
+/* allocated/free queue*/
+#define _VMQ_queue_allocated   (0)
+#define VMQ_queue_allocated    (1U<<_VMQ_queue_allocated)
+
+/* queue type. RX/TX */
+#define _VMQ_queue_rx          (1)
+#define VMQ_queue_rx           (1U<<_VMQ_queue_rx)
+
+/* enabled/disabled queue */
+#define _VMQ_queue_enabled     (2)
+#define VMQ_queue_enabled      (1U<<_VMQ_queue_enabled)
+
+/* queue type used to allocate or check number of available queues */
+#define VMQ_TYPE_RX            (1)
+#define VMQ_TYPE_TX            (2)
+#define VMQ_TYPE_TX_RX         (VMQ_TYPE_RX | VMQ_TYPE_TX)
+
+
+struct vmq_queue {
+       /* queue flags - VMQ_queue_*   */
+       unsigned int            flags;
+       /* pointer to opaque struct with guest information     */
+       /* format is specific to the virtualization layer used */
+       void                    *guest;
+       /* pointer to opaque struct in device driver */
+       void                    *devqueue;
+};
+typedef struct vmq_queue vmq_queue_t;
+
+struct net_vmq {
+       /* pointer to device driver specific functions for multi-queue */
+
+       int (*avail_queues)(struct net_device *netdev,
+                           unsigned int queue_type);
+       int (*alloc_queue)(struct net_device *netdev,
+                          unsigned int queue_type);
+       int (*free_queue)(struct net_device *netdev, int queue);
+       int (*get_maxsize)(struct net_device *netdev);
+       int (*get_size)(struct net_device *netdev, int queue);
+       int (*set_size)(struct net_device *netdev, int queue, int size);
+       int (*set_mac)(struct net_device *netdev, int queue, u8 *mac_addr);
+       int (*set_vlan)(struct net_device *netdev, int queue, int vlan_id);
+       int (*enable)(struct net_device *netdev, int queue);
+       int (*disable)(struct net_device *netdev, int queue);
+
+       /* maximum number of vm queues that device can allocate */
+       int                     nvmq;
+
+       /* Variable size Vector with queues info */
+       /* nvmq defines the vector size */
+       vmq_queue_t             *queue;
+};
+typedef struct net_vmq net_vmq_t;
+
+/**
+ *     alloc_vmq - Allocate net_vmq struct used for multi-queue devices
+ *     @max_queue: Maximum number of queues that can be allocated
+ *                  for virtualization
+ */
+static inline net_vmq_t *alloc_vmq(int max_queues)
+{
+       net_vmq_t *vmq;
+       vmq = kzalloc(sizeof(net_vmq_t), GFP_KERNEL);
+       if (!vmq)
+               return NULL;
+       vmq->queue = kzalloc(max_queues * sizeof(vmq_queue_t), GFP_KERNEL);
+       if (!vmq->queue) {
+               kfree(vmq);
+               return NULL;
+       }
+       return vmq;
+}
+
+/**
+ *     free_vmq - Free net_vmq struct
+ *     @vmq: pointer to net_vmq struct
+ */
+static inline void free_vmq(net_vmq_t *vmq)
+{
+       kfree(vmq->queue);
+       kfree(vmq);
+}
+
+/*================================================================*
+ * 1) Functions provided by the virtualization layer to support   *
+ * multi-queue devices.                                           *
+ * Device drivers that support multi-queue should use these new   *
+ * functions instead of the ones they replace                     *
+ *================================================================*/
+
+
+/* vmq_alloc_skb : This function should be used instead of the usual
+ * netdev_alloc_skb() in order to post RX buffers to a RX queue
+ * dedicated to a guest. Queues not dedicated to a guest should
+ * use the reguler netdev_alloc_skb() function
+ *
+ * It will return buffers from memory belonging to a given guest
+ * The device driver should not try to change the data alignment
+ * or change the skb data pointer in any way.
+ * The function should already return an skb with the right alignment
+ *
+ * The device driver should be prepared to handle a NULL return value
+ * indicating no memory for that guest is currently available. In this case
+ * the device driver should only postpone the buffer allocation
+ * (probably until the next buffer is used by the device) and continue
+ * operating with the previously posted buffers
+ *
+ *   netdev: network device allocating the skb
+ *   queue: Queue id of a queue dedicated to a guest
+ *          individual queues are identified by a integer in the
+ *          the range [0, MAX-1]. Negative values are use to indicate error
+ *          The maximum number of queues (MAX) is determined by the device
+ *
+ *   length: size to allocate
+ */
+struct sk_buff *vmq_alloc_skb(struct net_device *netdev, int queue,
+                             unsigned int length);
+
+
+/* vmq_free_skb : Free an skb allocated with vmq_alloc_skb()
+ *
+ *   skb: socket buffer to be freed
+ *   qid: Queue id of a queue dedicated to a guest
+ *        We could add a qid field in sk_buff struct and avoid passing it
+ *        as a parameter in vm_free_skb() and vmq_netif_rx()
+ */
+void vmq_free_skb(struct sk_buff *skb, int queue);
+
+/* vmq_alloc_page : Allocate full pages from guest memory.
+ * This can only be used when the device MTU is larger than a page
+ * and multiple pages are neeeded to receive a packet.
+ *
+ * Similarly to vmq_alloc_skb(),
+ * the device driver should be prepared to handle a NULL return value
+ * indicating no memory for that guest is currently available. In this case
+ * the device driver should only postpone the buffer allocation
+ * (probably until the next buffer is used by the device) and continue
+ * operating with the previously posted buffers
+ *
+ *   netdev: network device allocating the skb
+ *   queue: Queue id of a queue dedicated to a guest
+ *          individual queues are identified by a integer in the
+ *          the range [0, MAX-1]. Negative values are use to indicate error
+ *          The maximum number of queues (MAX) is determined by the device
+ */
+struct page *vmq_alloc_page(struct net_device *netdev, int queue);
+
+/* vmq_free_page : Free a guest page allocated with vmq_alloc_page()
+ *
+ *   page: page to be freed
+ *   queue: Queue id of a queue dedicated to a guest
+ */
+void vmq_free_page(struct page *page, int queue);
+
+/*
+ * vmq_netif_rx: This function is a replacement for the generic netif_rx()
+ * and allows packets received on a particular queue to be forwarded directly
+ * to a particular guest bypassing the regular network stack (bridge in xen).
+ * In Xen this function will be implemented by the Xen netback driver.
+ * The use of this function by the driver is optional and may be configured
+ * using a kernel CONFIG option (CONFIG option to be defined)
+ *
+ *   skb: Received socket buffer
+ *   queue: Queue id of a queue dedicated to a guest
+ */
+int vmq_netif_rx(struct sk_buff *skb, int queue);
+
+/*==============================================================*
+ * 2) New device driver functions for multi-queue devices       *
+ *==============================================================*/
+
+/* vmq_avail_queues: Returns number of available queues that can be allocated
+ *     It does not include already allocated queues or queues used for receive
+ *     side scaling. It should return 0 when vmq_alloc_queue() would fail
+ *
+ *   netdev: network device
+ *   queue_type: Queue type, (VMQ_TYPE_*)
+ *   RETURN VALUE:
+ *     number of available queues
+ *     returns 0 on success
+ */
+static inline int vmq_avail_queues(struct net_device *netdev,
+                                  unsigned int queue_type)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->avail_queues(netdev, queue_type);
+}
+
+/* vmq_alloc_queue: allocate a queue
+ *
+ *   netdev: network device
+ *   queue_type: Queue type, (VMQ_TYPE_*)
+ *   RETURN VALUE:
+ *     queue id of the allocated queue (the qid should be an integer which
+ *       cannot exceed or be equal to the maximum number of queues);
+ *     a negative value indicates error
+ */
+static inline int vmq_alloc_queue(struct net_device *netdev,
+                                 unsigned int queue_type)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->alloc_queue(netdev, queue_type);
+}
+
+/* vmq_free_queue: free a queue previously allocated with vmq_alloc_queue()
+ *
+ *   netdev: network device
+ *   queue: id of queue to be freed
+ *   RETURN VALUE:
+ *     a negative value indicates error;
+ *     returns 0 on success
+ */
+static inline int vmq_free_queue(struct net_device *netdev, int queue)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->free_queue(netdev, queue);
+}
+
+/* vmq_get_maxsize: Get maximum size that can be set for a queue
+ * (max number of HW descriptors)
+ *
+ *   netdev: network device
+ *   RETURN VALUE:
+ *     max size of a queue
+ *     a negative value indicates error,
+ */
+static inline int vmq_get_maxsize(struct net_device *netdev)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->get_maxsize(netdev);
+}
+
+/* vmq_get_size: Get size of queue (number of HW descriptors)
+ *
+ *   netdev: network device
+ *   queue: queue id
+ *   RETURN VALUE:
+ *     size of queue
+ *     a negative value indicates error,
+ */
+static inline int vmq_get_size(struct net_device *netdev, int queue)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->get_size(netdev, queue);
+}
+
+/* vmq_set_size: Set size of queue (number of HW descriptors)
+ *   It can return error if size exceeds maximum hw capablity
+ *   We will probably need function to return the maximum
+ *   HW queue size, but we can live without it for now
+ *   netdev: network device
+ *   queue: queue id
+ *   size: Queue size (number of HW descriptors)
+ *   RETURN VALUE:
+ *     a negative value indicates error,
+ *     returns 0 on success
+ */
+static inline int vmq_set_size(struct net_device *netdev, int queue, int size)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->set_size(netdev, queue, size);
+}
+
+/* vmq_set_mac: Set MAC address filter for a queue
+ *
+ *   netdev: network device
+ *   queue: queue id
+ *   mac_addr: pointer to a 6 byte array with the MAC address
+ *             MAC address FF:FF:FF:FF:FF:FF is used to reset the filter
+ *   RETURN VALUE:
+ *     a negative value indicates error,
+ *     returns 0 on success
+ */
+static inline int vmq_set_mac(struct net_device *netdev, int queue,
+                             u8 *mac_addr)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->set_mac(netdev, queue, mac_addr);
+}
+
+/* vmq_set_vlan: Set VLAN filter for a queue
+ *
+ *   netdev: network device
+ *   queue: queue id
+ *   vlan_id: VLAN id
+ *            The invalid VLAN id -1 is used to reset the VLAN filter
+ *   RETURN VALUE:
+ *     a negative value indicates error,
+ *     returns 0 on success
+ */
+static inline int vmq_set_vlan(struct net_device *netdev, int queue,
+                              int vlan_id)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->set_vlan(netdev, queue, vlan_id);
+}
+
+/* vmq_enable_queue: Enable queue
+ *     For receive queues this will trigger allocating and posting buffers
+ *
+ *   netdev: network device
+ *   queue: queue id
+ *   RETURN VALUE:
+ *     a negative value indicates error,
+ *     returns 0 on success
+ */
+static inline int vmq_enable_queue(struct net_device *netdev, int queue)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->enable(netdev, queue);
+}
+
+/* vmq_disable_queue: Disable queue
+ *     This will flush all buffers in the queue and will free the respective
+ *     skb's or fragment pages
+ *
+ *   netdev: network device
+ *   queue_id: queue id
+ *   RETURN VALUE:
+ *     a negative value indicates error,
+ *     returns 0 on success
+ */
+static inline int vmq_disable_queue(struct net_device *netdev, int queue)
+{
+       if (!netdev->vmq)
+               return -EINVAL;
+       return netdev->vmq->disable(netdev, queue);
+}
+
+#endif /* CONFIG_NET_VMQ */
+
+#endif /* _NETVMQ_H */
diff --git a/net/Kconfig b/net/Kconfig
index 0732cb3..7837a9e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -37,6 +37,12 @@ source "net/unix/Kconfig"
 source "net/xfrm/Kconfig"
 source "net/iucv/Kconfig"
 
+config NET_VMQ
+       bool "Virtual-machine multi-queue support"
+       default n
+       help
+         Add support for the VMQ features of certain modern network cards.
+
 config INET
        bool "TCP/IP networking"
        ---help---
-- 
1.6.3.1


_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxx
http://lists.xensource.com/xen-devel


 


Rackspace

Lists.xenproject.org is hosted with RackSpace, monitoring our
servers 24x7x365 and backed by RackSpace's Fanatical Support®.